Return to Snippet

Revision: 69126
at April 23, 2015 07:05 by Firsh


Updated Code
function scrape_youtube($rss_url, $limit){
	$limit = $limit === 0 ? -1 : $limit;
	if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){
		return $this->scrape_youtube_channel($rss_url, $limit);
	}elseif(stripos($rss_url, 'list=') !== false){
		return $this->scrape_youtube_playlist($rss_url, $limit);
	}else{
		return __('YouTube source could not be determined.', 'jig_td');
	}
}
function scrape_youtube_playlist($rss_url, $limit){
	if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) {
		$url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en";
	}else{
		return __('YouTube playlist ID could not be determined.', 'jig_td');
	}


	$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
	//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);

	$html = $this->file_get_contents_curl($url);
	$html =  mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); 
	$doc = new DOMDocument();
	@$doc->loadHTML($html);
	$xpath = new DOMXpath($doc);

	$videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]');
	$rss_items = array();
	$count = 0;
	if (!empty($videos)) {
		foreach ($videos as $video) {
			if($count == $limit){
				break;
			}
			$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);

			$ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0);
			$rss_item = new JIGstdClass();


			$rss_item->get_title = trim($anchor->nodeValue);
			if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){
				continue;
			}
			$rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : '');
			$rss_item->get_date = __("No date available.","jig_td");				
			$rss_item->get_enclosures = array();
			$rss_item->get_enclosures[] = new JIGstdClass();
			$rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
			
			$rss_item->get_permalink = $host.$anchor->getAttribute('href');

			$rss_items[] = $rss_item;
			$count++;
		}
	}
	return $rss_items;
}


function scrape_youtube_channel($rss_url, $limit){

	//http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50
	if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) {
		$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
	}elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) {
		$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
	}else{
		return __('YouTube username could not be determined.', 'jig_td');
	}

	$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
	//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);

	$html = $this->file_get_contents_curl($url);
	$html =  mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); 
	$doc = new DOMDocument();
	@$doc->loadHTML($html);
	$xpath = new DOMXpath($doc);

	$videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]');
	$rss_items = array();
	$count = 0;

	if (!empty($videos)) {
		foreach ($videos as $video) {
			if($count == $limit){
				break;
			}
			$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);

			$rss_item = new JIGstdClass();


			$rss_item->get_title = trim($anchor->getAttribute('title'));
			$rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue);
			$rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue;				
			$rss_item->get_enclosures = array();
			$rss_item->get_enclosures[] = new JIGstdClass();
			$rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
			
			$rss_item->get_permalink = $host.$anchor->getAttribute('href');

			$rss_items[] = $rss_item;
			$count++;
		}
	}
	return $rss_items;

}

Revision: 69125
at April 23, 2015 07:04 by Firsh


Initial Code
function scrape_youtube($rss_url, $limit){
			$limit = $limit === 0 ? -1 : $limit;
			if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){
				return $this->scrape_youtube_channel($rss_url, $limit);
			}elseif(stripos($rss_url, 'list=') !== false){
				return $this->scrape_youtube_playlist($rss_url, $limit);
			}else{
				return __('YouTube source could not be determined.', 'jig_td');
			}
		}
		function scrape_youtube_playlist($rss_url, $limit){
			if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) {
				$url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en";
			}else{
				return __('YouTube playlist ID could not be determined.', 'jig_td');
			}


			$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
			//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);

			$html = $this->file_get_contents_curl($url);
			$html =  mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); 
			$doc = new DOMDocument();
			@$doc->loadHTML($html);
			$xpath = new DOMXpath($doc);

			$videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]');
			$rss_items = array();
			$count = 0;
			if (!empty($videos)) {
				foreach ($videos as $video) {
					if($count == $limit){
						break;
					}
					$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);

					$ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0);
					$rss_item = new JIGstdClass();


					$rss_item->get_title = trim($anchor->nodeValue);
					if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){
						continue;
					}
					$rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : '');
					$rss_item->get_date = __("No date available.","jig_td");				
					$rss_item->get_enclosures = array();
					$rss_item->get_enclosures[] = new JIGstdClass();
					$rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
					
					$rss_item->get_permalink = $host.$anchor->getAttribute('href');

					$rss_items[] = $rss_item;
					$count++;
				}
			}
			return $rss_items;
		}


		function scrape_youtube_channel($rss_url, $limit){

			//http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50
			if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) {
				$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
			}elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) {
				$url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en";
			}else{
				return __('YouTube username could not be determined.', 'jig_td');
			}

			$host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com';
			//$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url);

			$html = $this->file_get_contents_curl($url);
			$html =  mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); 
			$doc = new DOMDocument();
			@$doc->loadHTML($html);
			$xpath = new DOMXpath($doc);

			$videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]');
			$rss_items = array();
			$count = 0;

			if (!empty($videos)) {
				foreach ($videos as $video) {
					if($count == $limit){
						break;
					}
					$anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0);

					$rss_item = new JIGstdClass();


					$rss_item->get_title = trim($anchor->getAttribute('title'));
					$rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue);
					$rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue;				
					$rss_item->get_enclosures = array();
					$rss_item->get_enclosures[] = new JIGstdClass();
					$rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb'));
					
					$rss_item->get_permalink = $host.$anchor->getAttribute('href');

					$rss_items[] = $rss_item;
					$count++;
				}
			}
			return $rss_items;

		}

Initial URL
http://justifiedgrid.com/

Initial Description
http://stackoverflow.com/questions/29752447/how-to-get-a-youtube-channel-rss-feed-after-2015-april-20-without-v3-api

Initial Title
Improved YouTube scrapers

Initial Tags


Initial Language
PHP