Revision: 69126
Updated Code
at April 23, 2015 07:05 by Firsh
Updated Code
function scrape_youtube($rss_url, $limit){
$limit = $limit === 0 ? -1 : $limit;
if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){
return $this->scrape_youtube_channel($rss_url, $limit);
}elseif(stripos($rss_url, 'list=') !== false){
return $this->scrape_youtube_playlist($rss_url, $limit);
}else{
return __('YouTube source could not be determined.', 'jig_td');
}
}
function scrape_youtube_playlist($rss_url, $limit){
if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) {
$url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en";
}else{
return __('YouTube playlist ID could not be determined.', 'jig_td');
}
$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);
$html = $this->file_get_contents_curl($url);
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXpath($doc);
$videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]');
$rss_items = array();
$count = 0;
if (!empty($videos)) {
foreach ($videos as $video) {
if($count == $limit){
break;
}
$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);
$ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0);
$rss_item = new JIGstdClass();
$rss_item->get_title = trim($anchor->nodeValue);
if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){
continue;
}
$rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : '');
$rss_item->get_date = __("No date available.","jig_td");
$rss_item->get_enclosures = array();
$rss_item->get_enclosures[] = new JIGstdClass();
$rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
$rss_item->get_permalink = $host.$anchor->getAttribute('href');
$rss_items[] = $rss_item;
$count++;
}
}
return $rss_items;
}
function scrape_youtube_channel($rss_url, $limit){
//http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50
if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) {
$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
}elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) {
$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
}else{
return __('YouTube username could not be determined.', 'jig_td');
}
$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);
$html = $this->file_get_contents_curl($url);
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXpath($doc);
$videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]');
$rss_items = array();
$count = 0;
if (!empty($videos)) {
foreach ($videos as $video) {
if($count == $limit){
break;
}
$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);
$rss_item = new JIGstdClass();
$rss_item->get_title = trim($anchor->getAttribute('title'));
$rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue);
$rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue;
$rss_item->get_enclosures = array();
$rss_item->get_enclosures[] = new JIGstdClass();
$rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
$rss_item->get_permalink = $host.$anchor->getAttribute('href');
$rss_items[] = $rss_item;
$count++;
}
}
return $rss_items;
}
Revision: 69125
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at April 23, 2015 07:04 by Firsh
Initial Code
function scrape_youtube($rss_url, $limit){
$limit = $limit === 0 ? -1 : $limit;
if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){
return $this->scrape_youtube_channel($rss_url, $limit);
}elseif(stripos($rss_url, 'list=') !== false){
return $this->scrape_youtube_playlist($rss_url, $limit);
}else{
return __('YouTube source could not be determined.', 'jig_td');
}
}
function scrape_youtube_playlist($rss_url, $limit){
if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) {
$url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en";
}else{
return __('YouTube playlist ID could not be determined.', 'jig_td');
}
$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);
$html = $this->file_get_contents_curl($url);
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXpath($doc);
$videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]');
$rss_items = array();
$count = 0;
if (!empty($videos)) {
foreach ($videos as $video) {
if($count == $limit){
break;
}
$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);
$ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0);
$rss_item = new JIGstdClass();
$rss_item->get_title = trim($anchor->nodeValue);
if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){
continue;
}
$rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : '');
$rss_item->get_date = __("No date available.","jig_td");
$rss_item->get_enclosures = array();
$rss_item->get_enclosures[] = new JIGstdClass();
$rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
$rss_item->get_permalink = $host.$anchor->getAttribute('href');
$rss_items[] = $rss_item;
$count++;
}
}
return $rss_items;
}
function scrape_youtube_channel($rss_url, $limit){
//http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50
if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) {
$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
}elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) {
$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
}else{
return __('YouTube username could not be determined.', 'jig_td');
}
$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);
$html = $this->file_get_contents_curl($url);
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xpath = new DOMXpath($doc);
$videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]');
$rss_items = array();
$count = 0;
if (!empty($videos)) {
foreach ($videos as $video) {
if($count == $limit){
break;
}
$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);
$rss_item = new JIGstdClass();
$rss_item->get_title = trim($anchor->getAttribute('title'));
$rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue);
$rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue;
$rss_item->get_enclosures = array();
$rss_item->get_enclosures[] = new JIGstdClass();
$rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
$rss_item->get_permalink = $host.$anchor->getAttribute('href');
$rss_items[] = $rss_item;
$count++;
}
}
return $rss_items;
}
Initial URL
http://justifiedgrid.com/
Initial Description
http://stackoverflow.com/questions/29752447/how-to-get-a-youtube-channel-rss-feed-after-2015-april-20-without-v3-api
Initial Title
Improved YouTube scrapers
Initial Tags
Initial Language
PHP