JavaScript Search Indexer - JavaScript Snipplr Social Repository

Revision: 56217

at March 16, 2012 06:58 by tpryan

Updated Code

importPackage(java.io);
load("./rhino/lib.js");

var rootPath = arguments[0];
var startPath= new File(rootPath);
var fileList = directoryList(startPath.getCanonicalPath());
var fileList = filterDirectoryList(fileList, true, "html");
var searchIndex = indexFiles(fileList, rootPath);


serializeToDisk(searchIndex, "./search/searchindex.js", true);
serializeToDisk(searchIndex, rootPath + "/search/searchindex.js");

function indexFiles(fileList, rootPath){
	var searchIndex = [];
	for (var i = 0; i < fileList.length; i++){
		var fileToRead = fileList[i]['path'];
		
		var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/");
		var badFilter = fileToRead.indexOf("/bad/")
		if (contentFilter > 0 && badFilter < 1){
			var resultSet = indexContentPage(fileToRead, rootPath);
			
			//TODO: Go back and make this stuff optional in indexContentPage,
			//Adding stuff in the indexer required me to kill it from search json. 
			delete resultSet['filepath'];
			delete resultSet['contents'];
			resultSet['lastModified'] = new Date(resultSet['lastModified']);

			searchIndex.push(resultSet);
			
		}	
	}
	return searchIndex;
}

function serializeToDisk(content, location, prettyify){
	var prettyify = typeof prettyify !== 'undefined' ? prettyify : false;
	var fstream = new FileWriter(location);
	var out = new BufferedWriter(fstream);
	if (prettyify){
		out.write(JSON.stringify(content, null, 3));
	}
	else{
		out.write(JSON.stringify(content));
	}
	
	
	out.close();
}


importPackage(java.io);

function directoryList(startPath){
	var fileObject=new File(startPath);
	var list = fileObject.list();
	var results = []; 

	for (var i=0; i<list.length; i++) {
	    var child = new File(startPath + "/" + list[i]);

	    if (child.isDirectory()){
	    	var recurseDirectoryListing = directoryList(child.getCanonicalPath());
	    	results = results.concat(recurseDirectoryListing);
	    }
	    else{
	    	var fileArray = {};
	    	fileArray['path'] = child.getCanonicalPath();
	    	fileArray['name'] = child.getName();
	    	fileArray['parent'] = child.getParent();
	    	fileArray['hidden'] = child.isHidden();
	    	fileArray['dir'] = child.isDirectory();
	    	fileArray['lastModified'] = child.lastModified();
	    	var pos = fileArray['name'].lastIndexOf('.');
	    	if (pos < 0){
				fileArray['ext'] = '';
			}else{
				fileArray['ext'] = fileArray['name'].substring(pos+1);
			}	

	    	results.push(fileArray);
	    }
	    
	}
	return results;
}

function filterDirectoryList(directoryList, filesOnly, extension, folderToTarget){

	var filesOnly = typeof filesOnly !== 'undefined' ? filesOnly : false;
  	var extension = typeof extension !== 'undefined' ? extension : '';
  	var folderToTarget = typeof folderToTarget !== 'undefined' ? folderToTarget : '';

	var results = []; 

	for (var i=0; i<directoryList.length; i++) {
		var file = directoryList[i];

		if (filesOnly && file.dir){
			continue;
		}

		if (extension.length > 0 && (file.ext != extension)){
			continue;
		}

		if (folderToTarget.length > 0 && file.path.indexOf(folderToTarget) < 0){
			continue;
		}

		results.push(file);
	}	
	return results;
}

function displayDirectoryList(fileList){
	for (var i=0; i<fileList.length; i++) {
		print(fileList[i]['path'] ); 
		print(fileList[i]['lastModified']);

	}
}

function displayIndexList(indexList){
	for (var i=0; i<indexList.length; i++) {
		print(indexList[i]['url'] );
		print(indexList[i]['title'] );
	}	 
}

function createURLPath(filePath, rootPath){
	var base = filePath.replace(rootPath, "");
	base = base.replace("index.html", "");
	base = base.replace("/tags/", "/");
	base = base.replace("/articles/", "/");

	return base;
}

function grabBettwenTags(html, tag){
	var tag = typeof tag !== 'undefined' ? tag : "p";
	var start = html.indexOf("<" + tag, html)  + tag.length + 2;
	var end = html.indexOf("</" + tag, start );
	return html.slice(start, end);
}

function indexContentPage(filePath, rootPath){
	var fileContents = readFile(filePath);
	var resultSet = {};
	resultSet['filepath'] = String(filePath);
	resultSet['url'] = String(createURLPath(filePath, rootPath));
	resultSet['title'] = String(grabBettwenTags(fileContents, "h1"));
	resultSet['titleContents'] = resultSet['title'].replace(/<(?:.|\n)*?>/gm, '');
	resultSet['rawContents'] = String(fileContents).replace(/<(?:.|\n)*?>/gm, '');
	resultSet['summary'] = String(grabBettwenTags(fileContents, "p"));
	resultSet['lastModified'] = File(filePath).lastModified();
	resultSet['contents'] = fileContents;
	return resultSet;
}

Revision: 56216

at March 16, 2012 06:55 by tpryan

Initial Code

importPackage(java.io);
load("./rhino/lib.js");

var rootPath = arguments[0];
var startPath= new File(rootPath);
var fileList = directoryList(startPath.getCanonicalPath());
var fileList = filterDirectoryList(fileList, true, "html");
var searchIndex = indexFiles(fileList, rootPath);


serializeToDisk(searchIndex, "./search/searchindex.js", true);
serializeToDisk(searchIndex, rootPath + "/search/searchindex.js");

function indexFiles(fileList, rootPath){
	var searchIndex = [];
	for (var i = 0; i < fileList.length; i++){
		var fileToRead = fileList[i]['path'];
		
		var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/");
		var badFilter = fileToRead.indexOf("/bad/")
		if (contentFilter > 0 && badFilter < 1){
			var resultSet = indexContentPage(fileToRead, rootPath);
			
			//TODO: Go back and make this stuff optional in indexContentPage,
			//Adding stuff in the indexer required me to kill it from search json. 
			delete resultSet['filepath'];
			delete resultSet['contents'];
			resultSet['lastModified'] = new Date(resultSet['lastModified']);

			searchIndex.push(resultSet);
			
		}	
	}
	return searchIndex;
}

function serializeToDisk(content, location, prettyify){
	var prettyify = typeof prettyify !== 'undefined' ? prettyify : false;
	var fstream = new FileWriter(location);
	var out = new BufferedWriter(fstream);
	if (prettyify){
		out.write(JSON.stringify(content, null, 3));
	}
	else{
		out.write(JSON.stringify(content));
	}
	
	
	out.close();
}

Initial URL

Initial Description

A JavaScript search indexer for whichElement.com

Initial Title

JavaScript Search Indexer

Initial Tags

Initial Language

JavaScript

Choose a language for easy browsing: