Posted By

karlhorky on 09/30/09


Tagged

regex rss feed remote file regularexpressions xml save request ASP local modify retrieve vbscript


Versions (?)

ASP (vbscript): Retrieve Remote XML RSS Feed, Modify Using Regular Expression, and then Save to Local XML File


 / Published in: ASP
 

This script will retrieve a remote xml rss feed, modify it based upon a date extracted from a remote html document (using a regular expression), and save it to a local file.

  1. <%
  2. ' ASP (vbscript):Retrieve Remote XML RSS Feed, Modify, and then Save to Local XML File
  3. ' Author: Karl Horky
  4. ' Creation Date: Sept 30, 2009
  5. ' Last Modified: Sept 30, 2009
  6. '
  7. '
  8. '
  9. '
  10. ' Sample Input:
  11. '
  12. ' <?xml version="1.0" encoding="UTF-8"?>
  13. '
  14. ' <rss version="2.0">
  15. '
  16. ' <channel>
  17. '
  18. ' <title>News</title>
  19. '
  20. ' <link>http://www.news.com/</link>
  21. '
  22. ' <description>The latest headlines</description>
  23. '
  24. ' <language>en-us</language>
  25. '
  26. ' <copyright>Copyright © 2009</copyright>
  27. '
  28. ' <ttl>5</ttl>
  29. '
  30. ' <item>
  31. '
  32. ' <title>News Item 1</title>
  33. '
  34. ' <description>Item Description 1</description>
  35. '
  36. ' <link>http://www.news.com/news_release_1.htm</link>
  37. '
  38. ' <pubDate>Tue, 29 Sep 2009 17:47:42 GMT</pubDate>
  39. '
  40. ' </item>
  41. '
  42. '
  43. '
  44. ' <item>
  45. '
  46. ' <title>News Item 2</title>
  47. '
  48. ' <description>Item Description 2</description>
  49. '
  50. ' <link>http://www.news.com/news_release_2.htm</link>
  51. '
  52. ' <pubDate>Mon, 24 Aug 2009 07:00:00 GMT</pubDate>
  53. '
  54. ' </item>
  55. '
  56. ' </channel>
  57. '
  58. ' </rss>
  59. '
  60. '
  61. '
  62. '
  63. '
  64. '
  65. '
  66. ' Sample Output file (note the changed date on first item):
  67. '
  68. ' <rss version="2.0">
  69. '
  70. ' <channel>
  71. '
  72. ' <title>News</title>
  73. '
  74. ' <link>http://www.news.com/</link>
  75. '
  76. ' <description>The latest headlines</description>
  77. '
  78. ' <language>en-us</language>
  79. '
  80. ' <copyright>Copyright © 2009</copyright>
  81. '
  82. ' <ttl>5</ttl>
  83. '
  84. ' <item>
  85. '
  86. ' <title>News Item 1</title>
  87. '
  88. ' <description>Item Description 1</description>
  89. '
  90. ' <link>http://www.news.com/news_release_1.htm</link>
  91. '
  92. ' <pubDate>Tue, 30 Sep 2009 12:21:49 GMT</pubDate>
  93. '
  94. ' </item>
  95. '
  96. '
  97. '
  98. ' <item>
  99. '
  100. ' <title>News Item 2</title>
  101. '
  102. ' <description>Item Description 2</description>
  103. '
  104. ' <link>http://www.news.com/news_release_2.htm</link>
  105. '
  106. ' <pubDate>Mon, 24 Aug 2009 07:00:00 GMT</pubDate>
  107. '
  108. ' </item>
  109. '
  110. ' </channel>
  111. '
  112. ' </rss>
  113. '
  114. '
  115. ' Leading Zero
  116. function LdgZ(ByVal N)
  117. if (N>=0) and (N<10) then LdgZ = "0" & N else LdgZ = "" & N
  118. end function
  119.  
  120. function GetNewsDate()
  121. NewsURL = "http://www.news.com/news_release_1.html"
  122.  
  123. 'Create connection and get content
  124. Set NewsHttp = Server.CreateObject("MSXML2.ServerXMLHTTP.3.0")
  125. NewsHttp.Open "GET", NewsURL, false
  126. NewsHttp.Send()
  127. NewsContent = NewsHttp.ResponseText
  128.  
  129. 'Match the Updated date paragraph
  130. Set re = new regexp
  131. re.Pattern = "<p class=MsoNormal align=center style='text-align:center'><b><span lang=EN-CA\s*style='mso-ansi-language:EN-CA;mso-fareast-language:EN-CA'>Update:\s*([^<]+)</span></b></p>"
  132.  
  133. 'Set match to the Updated date paragraph
  134. Set reMatches = re.Execute(NewsContent)
  135. Set reMatch = reMatches(0)
  136.  
  137. 'Strip out only the date from the paragraph
  138. NewsDate = re.Replace(reMatch, "$1")
  139.  
  140. 'Clean up some objects
  141. Set re = nothing
  142. Set reMatches = nothing
  143. Set reMatch = nothing
  144.  
  145. 'Strip out the PDT part of the date, make the date a standard format, and add 8 hours to the date
  146. 'to compensate for GMT
  147. Set re = new regexp
  148. re.Pattern = "^(\d*)(\s?[pa])\.m\.\sPDT,\s*(.*)"
  149. adjustedDate = DateAdd("h",8,re.Replace(NewsDate, "$3 $1$2m"))
  150.  
  151. ' Format the date
  152. GetNewsDate = WeekdayName(Weekday(adjustedDate), 2) & ", " & Day(adjustedDate) & " " & MonthName(Month(adjustedDate),true) & " " & LdgZ(Hour(adjustedDate)) & ":" & LdgZ(Minute(adjustedDate)) & ":" & LdgZ(Second(adjustedDate)) & " GMT"
  153.  
  154. ' Clean up some objects
  155. Set NewsHttp = Nothing
  156. Set re = nothing
  157. end function
  158.  
  159. Response.Expires = -1
  160.  
  161. URLToRSS = "http://www.news.com/yourfeed.xml"
  162.  
  163. ' Create HTTP request
  164. Set xmlHttp = Server.CreateObject("MSXML2.ServerXMLHTTP.3.0")
  165. xmlHttp.Open "GET", URLToRSS, false
  166. xmlHttp.Send()
  167. RSSXML = xmlHttp.ResponseText
  168.  
  169. ' Parse out XML DOM
  170. Set xmlDOM = Server.CreateObject("MSXML2.DomDocument.3.0")
  171. xmlDOM.async = False
  172. xmlDOM.validateOnParse = False
  173. xmlDOM.resolveExternals = False
  174.  
  175. xmlFile = Server.MapPath("/location_to/yourlocalfeed.xml")
  176.  
  177. ' If no errors occur parsing the xml, save the file
  178. If xmlDOM.LoadXml(RSSXML) Then
  179. xmlDOM.getElementsByTagName("pubDate")(0).childNodes(0).nodeValue = GetNewsDate()
  180. xmlDOM.Save(xmlFile)
  181. End If
  182.  
  183. ' Clear variables
  184. Set xmlHttp = Nothing
  185. Set xmlDOM = Nothing
  186. %>

Report this snippet  

You need to login to post a comment.