Posted By

webonomic on 04/13/12


Tagged

xml twitter sas hashtag


Versions (?)

Creating a report of Twitter hashtag activity


 / Published in: SAS
 

URL: http://www.sascommunity.org/wiki/Twitter_SAS_program_example

The program uses the XML LIBNAME engine, FILENAME URL, SGPLOT procedure, and a simple PROC PRINT to create a report of recent Twitter activity around a specified hashtag.

  1. /* This part needs to run just once to establish */
  2. /* the XML map that we'll use to map the Twitter */
  3. /* XML response into a SAS data set */
  4. filename twsearch temp;
  5.  
  6. /** this is the XML map that will convert the RSS search feed
  7.   into a SAS data set **/
  8. data _null_;
  9. infile datalines truncover;
  10. file twsearch;
  11. input line $1000.;
  12. put line;
  13. datalines4;
  14. <?xml version="1.0" encoding="windows-1252"?>
  15. <!-- ############################################################ -->
  16. <!-- 2008-10-03T11:35:31 -->
  17. <!-- SAS XML Libname Engine Map -->
  18. <!-- Generated by XML Mapper, 902000.2.1.20080911191346_v920 -->
  19. <!-- ############################################################ -->
  20. <SXLEMAP name="SXLEMAP" version="1.2">
  21. <!-- ############################################################ -->
  22. <TABLE name="entry">
  23. <TABLE-PATH syntax="XPath">/feed/entry</TABLE-PATH>
  24. <COLUMN name="id">
  25. <PATH syntax="XPath">/feed/entry/id</PATH>
  26. <TYPE>character</TYPE>
  27. <DATATYPE>string</DATATYPE>
  28. <LENGTH>50</LENGTH>
  29. </COLUMN>
  30. <COLUMN name="published">
  31. <PATH syntax="XPath">/feed/entry/published</PATH>
  32. <TYPE>numeric</TYPE>
  33. <DATATYPE>datetime</DATATYPE>
  34. <FORMAT width="19">IS8601DT</FORMAT>
  35. <INFORMAT width="19">IS8601DT</INFORMAT>
  36. </COLUMN>
  37. <COLUMN name="title">
  38. <PATH syntax="XPath">/feed/entry/title</PATH>
  39. <TYPE>character</TYPE>
  40. <DATATYPE>string</DATATYPE>
  41. <LENGTH>200</LENGTH>
  42. </COLUMN>
  43. <COLUMN name="content">
  44. <PATH syntax="XPath">/feed/entry/content</PATH>
  45. <TYPE>character</TYPE>
  46. <DATATYPE>string</DATATYPE>
  47. <LENGTH>800</LENGTH>
  48. </COLUMN>
  49. <COLUMN name="updated">
  50. <PATH syntax="XPath">/feed/entry/updated</PATH>
  51. <TYPE>numeric</TYPE>
  52. <DATATYPE>datetime</DATATYPE>
  53. <FORMAT width="19">IS8601DT</FORMAT>
  54. <INFORMAT width="19">IS8601DT</INFORMAT>
  55. </COLUMN>
  56. <COLUMN name="authorName">
  57. <PATH syntax="XPath">/feed/entry/author/name</PATH>
  58. <TYPE>character</TYPE>
  59. <DATATYPE>string</DATATYPE>
  60. <LENGTH>50</LENGTH>
  61. </COLUMN>
  62. <COLUMN name="authorUri">
  63. <PATH syntax="XPath">/feed/entry/author/uri</PATH>
  64. <TYPE>character</TYPE>
  65. <DATATYPE>string</DATATYPE>
  66. <LENGTH>50</LENGTH>
  67. </COLUMN>
  68. </TABLE>
  69. </SXLEMAP>
  70. ;;;;
  71.  
  72. /** this macro makes it simple to get several "pages" worth of tweets **/
  73. %macro getTweets(pages=5,hashtag=sasgf11,scale=HOURS);
  74. %if &scale = HOURS %then
  75. %do;
  76. %let scaleVar = hoursAgo;
  77. %end;
  78. %else %if &scale = MINUTES %then
  79. %do;
  80. %let scaleVar = minutesAgo;
  81. %end;
  82. %else %if &scale = DAYS %then
  83. %do;
  84. %let scaleVar = daysAgo;
  85. %end;
  86.  
  87. /* create initial dataset */
  88. data work.feed;
  89. run;
  90.  
  91. %do pgNo=1 %to &pages;
  92. /* used %NRSTR() to escape the ampersands that occur in this URL query string */
  93. /* &hashtag and &pgNo are macro variables that are resolved at run time */
  94. %let feed="http://search.twitter.com/search.atom?lang=en%nrstr(&q)=%23&hashtag.%nrstr(&page)=&pgNo";
  95. filename twit URL &feed
  96. /* if you need to specify a proxy server to get to the internet */
  97. /* proxy="http://your.proxy.com" */
  98. ;
  99. /* use the XML library engine */
  100. libname tf XML xmlfileref=twit xmlmap=twsearch;
  101.  
  102. data work.feed;
  103. /* when run in SAS Enterprise Guide, SYSECHO will */
  104. /* update the task status with this message */
  105. sysecho "Fetching tweet page &pgNo of &pages";
  106. set work.feed tf.entry;
  107. run;
  108.  
  109.  
  110. %end;
  111.  
  112. data work.feed;
  113. set work.feed;
  114. length hoursAgo 8 minutesAgo 8 daysAgo 8;
  115. label hoursAgo = "Hours ago"
  116. minutesAgo = "Minutes ago"
  117. daysAgo = "Days ago";
  118. if published not = .;
  119. published=published+gmtoff();
  120. daysAgo = datdif(datepart(published),today(),'act/act');
  121. hoursAgo = int( (datetime()-published) / 3600 );
  122. minutesAgo = int( (datetime()-published) / 60 );
  123. run;
  124.  
  125. title "Report of #&hashtag. hashtag activity";
  126. title2 "as of %TRIM(%QSYSFUNC(DATE(), NLDATE20.)) at %TRIM(%SYSFUNC(TIME(), TIMEAMPM12.))";
  127. ods graphics / height=500 width=800;
  128. proc sgplot data=work.feed;
  129. vbar &scaleVar;
  130. yaxis LABEL="Number of tweets";
  131. xaxis discreteorder=data;
  132. run;
  133.  
  134. proc print data=work.feed
  135. obs="Row Number"
  136. label
  137. ;
  138. format published dateampm20.;
  139. var published authorname title;
  140. run;
  141.  
  142. %mend;
  143.  
  144. /* for high-volume topics, set the scale to MINUTES */
  145. /* or HOURS for more interesting reports */
  146.  
  147. /* example call to find #SASGF11 tweets */
  148. %getTweets(pages=10, hashtag=sasgf11, scale=DAYS);

Report this snippet  

You need to login to post a comment.