Revision: 34029
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at October 16, 2010 06:07 by adkatrit
Initial Code
<?php
//ini_set('display_errors',3);
//error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);
include_once("simple_html_dom.php");
//NYT_query("json","javascript","[article search key]",false);
// with the 4th parameter set to false , it will only print out the total numbser of articles matching your query.
NYT_query("json","jay-z","[article search key]",true,80);
NYT_query("json","eminem","[article search key]",true,80);
//NYT_query("json","javascript","[article search key]",true,10);
//with the 4th parameter set to true, it will print out the total number and also, write each article to file.
//the txt files will be in the current working directory under a directory named {your query} in this case it's javascript
//some will return zero kb. im not sure what the hell is going on with that(nyt server error status 500), but you can get most articles this way.
//the 5th paramter is to specify the maximum number of articles to write to file. This is pretty helpful when queries like "computer" have over 120000 results
//in the future i'll probably refine the search paramters a bit to get more accurate results such as the ability to search for all articles containing "computer" in the "technology" section of the NYTimes
function NYT_query($format,$query,$apikey,$writetoFile,$max){
$URL = "http://api.nytimes.com/svc/search/v1/article?format=$format&query=$query&api-key=$apikey";
$html = file_get_html($URL);
$result= json_decode($html);
$arr = $result->results;
$total = $result->total;
echo $total."\n\n";
if($writetoFile){
if(isset($max)){
if($max>$total){
$max=$total;
}
$maximum = floor($max/10);
}else{
$maximum= floor($total/10);
}
for($i=0;$i<$maximum;$i++){
$url= "http://api.nytimes.com/svc/search/v1/article?format=$format&query=$query&offset=$i&api-key=$apikey";
$html = file_get_html($url);
$result= json_decode($html);
$array = $result->results;
foreach($array as $t){
$title = urldecode($t->title);
$url = $t->url;
$artBody = html_entity_decode(extractArticle($url));
if(is_dir("archive/".$query)){
writeToFile("archive/".$query."/".$title.".txt",$artBody);
}else{
mkdir("archive/".$query);
writeToFile("archive/".$query."/".$title.".txt",$artBody);
}
}
}
}
}
function extractArticle($url){
$html = file_get_html($url);
$body="";
foreach($html->find('.articleBody') as $element){
$body.= $element->plaintext."\n";
}
$html->clear();
unset($html);
return $body;
}
function writeToFile($filename,$body){
$myFile = $filename;
$fh = fopen($myFile, 'w') or die("can't open file");
$stringData = $body;
fwrite($fh, $stringData);
fclose($fh);
}
Initial URL
http://developer.nytimes.com/docs/article_search_api/
Initial Description
Initial Title
new york times article api function
Initial Tags
search
Initial Language
PHP