Revision: 56217
Updated Code
at March 16, 2012 06:58 by tpryan
Updated Code
importPackage(java.io);
load("./rhino/lib.js");
var rootPath = arguments[0];
var startPath= new File(rootPath);
var fileList = directoryList(startPath.getCanonicalPath());
var fileList = filterDirectoryList(fileList, true, "html");
var searchIndex = indexFiles(fileList, rootPath);
serializeToDisk(searchIndex, "./search/searchindex.js", true);
serializeToDisk(searchIndex, rootPath + "/search/searchindex.js");
function indexFiles(fileList, rootPath){
var searchIndex = [];
for (var i = 0; i < fileList.length; i++){
var fileToRead = fileList[i]['path'];
var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/");
var badFilter = fileToRead.indexOf("/bad/")
if (contentFilter > 0 && badFilter < 1){
var resultSet = indexContentPage(fileToRead, rootPath);
//TODO: Go back and make this stuff optional in indexContentPage,
//Adding stuff in the indexer required me to kill it from search json.
delete resultSet['filepath'];
delete resultSet['contents'];
resultSet['lastModified'] = new Date(resultSet['lastModified']);
searchIndex.push(resultSet);
}
}
return searchIndex;
}
function serializeToDisk(content, location, prettyify){
var prettyify = typeof prettyify !== 'undefined' ? prettyify : false;
var fstream = new FileWriter(location);
var out = new BufferedWriter(fstream);
if (prettyify){
out.write(JSON.stringify(content, null, 3));
}
else{
out.write(JSON.stringify(content));
}
out.close();
}
importPackage(java.io);
function directoryList(startPath){
var fileObject=new File(startPath);
var list = fileObject.list();
var results = [];
for (var i=0; i<list.length; i++) {
var child = new File(startPath + "/" + list[i]);
if (child.isDirectory()){
var recurseDirectoryListing = directoryList(child.getCanonicalPath());
results = results.concat(recurseDirectoryListing);
}
else{
var fileArray = {};
fileArray['path'] = child.getCanonicalPath();
fileArray['name'] = child.getName();
fileArray['parent'] = child.getParent();
fileArray['hidden'] = child.isHidden();
fileArray['dir'] = child.isDirectory();
fileArray['lastModified'] = child.lastModified();
var pos = fileArray['name'].lastIndexOf('.');
if (pos < 0){
fileArray['ext'] = '';
}else{
fileArray['ext'] = fileArray['name'].substring(pos+1);
}
results.push(fileArray);
}
}
return results;
}
function filterDirectoryList(directoryList, filesOnly, extension, folderToTarget){
var filesOnly = typeof filesOnly !== 'undefined' ? filesOnly : false;
var extension = typeof extension !== 'undefined' ? extension : '';
var folderToTarget = typeof folderToTarget !== 'undefined' ? folderToTarget : '';
var results = [];
for (var i=0; i<directoryList.length; i++) {
var file = directoryList[i];
if (filesOnly && file.dir){
continue;
}
if (extension.length > 0 && (file.ext != extension)){
continue;
}
if (folderToTarget.length > 0 && file.path.indexOf(folderToTarget) < 0){
continue;
}
results.push(file);
}
return results;
}
function displayDirectoryList(fileList){
for (var i=0; i<fileList.length; i++) {
print(fileList[i]['path'] );
print(fileList[i]['lastModified']);
}
}
function displayIndexList(indexList){
for (var i=0; i<indexList.length; i++) {
print(indexList[i]['url'] );
print(indexList[i]['title'] );
}
}
function createURLPath(filePath, rootPath){
var base = filePath.replace(rootPath, "");
base = base.replace("index.html", "");
base = base.replace("/tags/", "/");
base = base.replace("/articles/", "/");
return base;
}
function grabBettwenTags(html, tag){
var tag = typeof tag !== 'undefined' ? tag : "p";
var start = html.indexOf("<" + tag, html) + tag.length + 2;
var end = html.indexOf("</" + tag, start );
return html.slice(start, end);
}
function indexContentPage(filePath, rootPath){
var fileContents = readFile(filePath);
var resultSet = {};
resultSet['filepath'] = String(filePath);
resultSet['url'] = String(createURLPath(filePath, rootPath));
resultSet['title'] = String(grabBettwenTags(fileContents, "h1"));
resultSet['titleContents'] = resultSet['title'].replace(/<(?:.|\n)*?>/gm, '');
resultSet['rawContents'] = String(fileContents).replace(/<(?:.|\n)*?>/gm, '');
resultSet['summary'] = String(grabBettwenTags(fileContents, "p"));
resultSet['lastModified'] = File(filePath).lastModified();
resultSet['contents'] = fileContents;
return resultSet;
}
Revision: 56216
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at March 16, 2012 06:55 by tpryan
Initial Code
importPackage(java.io);
load("./rhino/lib.js");
var rootPath = arguments[0];
var startPath= new File(rootPath);
var fileList = directoryList(startPath.getCanonicalPath());
var fileList = filterDirectoryList(fileList, true, "html");
var searchIndex = indexFiles(fileList, rootPath);
serializeToDisk(searchIndex, "./search/searchindex.js", true);
serializeToDisk(searchIndex, rootPath + "/search/searchindex.js");
function indexFiles(fileList, rootPath){
var searchIndex = [];
for (var i = 0; i < fileList.length; i++){
var fileToRead = fileList[i]['path'];
var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/");
var badFilter = fileToRead.indexOf("/bad/")
if (contentFilter > 0 && badFilter < 1){
var resultSet = indexContentPage(fileToRead, rootPath);
//TODO: Go back and make this stuff optional in indexContentPage,
//Adding stuff in the indexer required me to kill it from search json.
delete resultSet['filepath'];
delete resultSet['contents'];
resultSet['lastModified'] = new Date(resultSet['lastModified']);
searchIndex.push(resultSet);
}
}
return searchIndex;
}
function serializeToDisk(content, location, prettyify){
var prettyify = typeof prettyify !== 'undefined' ? prettyify : false;
var fstream = new FileWriter(location);
var out = new BufferedWriter(fstream);
if (prettyify){
out.write(JSON.stringify(content, null, 3));
}
else{
out.write(JSON.stringify(content));
}
out.close();
}
Initial URL
Initial Description
A JavaScript search indexer for whichElement.com
Initial Title
JavaScript Search Indexer
Initial Tags
Initial Language
JavaScript