Posted By

adkatrit on 10/16/10


Tagged

search new news article York times


Versions (?)

Who likes this?

2 people have marked this snippet as a favorite

Cory
adkatrit


new york times article api function


 / Published in: PHP
 

URL: http://developer.nytimes.com/docs/article_search_api/

  1. <?php
  2. //ini_set('display_errors',3);
  3. //error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);
  4. include_once("simple_html_dom.php");
  5.  
  6.  
  7.  
  8. //NYT_query("json","javascript","[article search key]",false);
  9. // with the 4th parameter set to false , it will only print out the total numbser of articles matching your query.
  10.  
  11. NYT_query("json","jay-z","[article search key]",true,80);
  12. NYT_query("json","eminem","[article search key]",true,80);
  13.  
  14. //NYT_query("json","javascript","[article search key]",true,10);
  15. //with the 4th parameter set to true, it will print out the total number and also, write each article to file.
  16. //the txt files will be in the current working directory under a directory named {your query} in this case it's javascript
  17. //some will return zero kb. im not sure what the hell is going on with that(nyt server error status 500), but you can get most articles this way.
  18. //the 5th paramter is to specify the maximum number of articles to write to file. This is pretty helpful when queries like "computer" have over 120000 results
  19.  
  20. //in the future i'll probably refine the search paramters a bit to get more accurate results such as the ability to search for all articles containing "computer" in the "technology" section of the NYTimes
  21.  
  22.  
  23.  
  24. function NYT_query($format,$query,$apikey,$writetoFile,$max){
  25. $URL = "http://api.nytimes.com/svc/search/v1/article?format=$format&query=$query&api-key=$apikey";
  26. $html = file_get_html($URL);
  27. $result= json_decode($html);
  28. $arr = $result->results;
  29. $total = $result->total;
  30. echo $total."\n\n";
  31. if($writetoFile){
  32. if(isset($max)){
  33. if($max>$total){
  34. $max=$total;
  35. }
  36. $maximum = floor($max/10);
  37. }else{
  38. $maximum= floor($total/10);
  39. }
  40. for($i=0;$i<$maximum;$i++){
  41. $url= "http://api.nytimes.com/svc/search/v1/article?format=$format&query=$query&offset=$i&api-key=$apikey";
  42. $html = file_get_html($url);
  43. $result= json_decode($html);
  44. $array = $result->results;
  45. foreach($array as $t){
  46. $title = urldecode($t->title);
  47. $url = $t->url;
  48. $artBody = html_entity_decode(extractArticle($url));
  49.  
  50. if(is_dir("archive/".$query)){
  51. writeToFile("archive/".$query."/".$title.".txt",$artBody);
  52. }else{
  53. mkdir("archive/".$query);
  54. writeToFile("archive/".$query."/".$title.".txt",$artBody);
  55. }
  56. }
  57. }
  58. }
  59. }
  60.  
  61. function extractArticle($url){
  62. $html = file_get_html($url);
  63. $body="";
  64. foreach($html->find('.articleBody') as $element){
  65. $body.= $element->plaintext."\n";
  66. }
  67. $html->clear();
  68. unset($html);
  69. return $body;
  70. }
  71. function writeToFile($filename,$body){
  72. $myFile = $filename;
  73. $fh = fopen($myFile, 'w') or die("can't open file");
  74. $stringData = $body;
  75. fwrite($fh, $stringData);
  76. fclose($fh);
  77. }

Report this snippet  

You need to login to post a comment.