/ Published in: JavaScript

A JavaScript search indexer for whichElement.com
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
importPackage(java.io); load("./rhino/lib.js"); var rootPath = arguments[0]; var startPath= new File(rootPath); var fileList = directoryList(startPath.getCanonicalPath()); var fileList = filterDirectoryList(fileList, true, "html"); var searchIndex = indexFiles(fileList, rootPath); serializeToDisk(searchIndex, "./search/searchindex.js", true); serializeToDisk(searchIndex, rootPath + "/search/searchindex.js"); function indexFiles(fileList, rootPath){ var searchIndex = []; for (var i = 0; i < fileList.length; i++){ var fileToRead = fileList[i]['path']; var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/"); var badFilter = fileToRead.indexOf("/bad/") if (contentFilter > 0 && badFilter < 1){ var resultSet = indexContentPage(fileToRead, rootPath); //TODO: Go back and make this stuff optional in indexContentPage, //Adding stuff in the indexer required me to kill it from search json. delete resultSet['filepath']; delete resultSet['contents']; resultSet['lastModified'] = new Date(resultSet['lastModified']); searchIndex.push(resultSet); } } return searchIndex; } function serializeToDisk(content, location, prettyify){ var prettyify = typeof prettyify !== 'undefined' ? prettyify : false; var fstream = new FileWriter(location); var out = new BufferedWriter(fstream); if (prettyify){ out.write(JSON.stringify(content, null, 3)); } else{ out.write(JSON.stringify(content)); } out.close(); } importPackage(java.io); function directoryList(startPath){ var fileObject=new File(startPath); var list = fileObject.list(); var results = []; for (var i=0; i<list.length; i++) { var child = new File(startPath + "/" + list[i]); if (child.isDirectory()){ var recurseDirectoryListing = directoryList(child.getCanonicalPath()); results = results.concat(recurseDirectoryListing); } else{ var fileArray = {}; fileArray['path'] = child.getCanonicalPath(); fileArray['name'] = child.getName(); fileArray['parent'] = child.getParent(); fileArray['hidden'] = child.isHidden(); fileArray['dir'] = child.isDirectory(); fileArray['lastModified'] = child.lastModified(); var pos = fileArray['name'].lastIndexOf('.'); if (pos < 0){ fileArray['ext'] = ''; }else{ fileArray['ext'] = fileArray['name'].substring(pos+1); } results.push(fileArray); } } return results; } function filterDirectoryList(directoryList, filesOnly, extension, folderToTarget){ var filesOnly = typeof filesOnly !== 'undefined' ? filesOnly : false; var extension = typeof extension !== 'undefined' ? extension : ''; var folderToTarget = typeof folderToTarget !== 'undefined' ? folderToTarget : ''; var results = []; for (var i=0; i<directoryList.length; i++) { var file = directoryList[i]; if (filesOnly && file.dir){ continue; } if (extension.length > 0 && (file.ext != extension)){ continue; } if (folderToTarget.length > 0 && file.path.indexOf(folderToTarget) < 0){ continue; } results.push(file); } return results; } function displayDirectoryList(fileList){ for (var i=0; i<fileList.length; i++) { print(fileList[i]['path'] ); print(fileList[i]['lastModified']); } } function displayIndexList(indexList){ for (var i=0; i<indexList.length; i++) { print(indexList[i]['url'] ); print(indexList[i]['title'] ); } } function createURLPath(filePath, rootPath){ var base = filePath.replace(rootPath, ""); base = base.replace("index.html", ""); base = base.replace("/tags/", "/"); base = base.replace("/articles/", "/"); return base; } function grabBettwenTags(html, tag){ var tag = typeof tag !== 'undefined' ? tag : "p"; var start = html.indexOf("<" + tag, html) + tag.length + 2; var end = html.indexOf("</" + tag, start ); return html.slice(start, end); } function indexContentPage(filePath, rootPath){ var fileContents = readFile(filePath); var resultSet = {}; resultSet['filepath'] = String(filePath); resultSet['url'] = String(createURLPath(filePath, rootPath)); resultSet['title'] = String(grabBettwenTags(fileContents, "h1")); resultSet['titleContents'] = resultSet['title'].replace(/<(?:.|\n)*?>/gm, ''); resultSet['rawContents'] = String(fileContents).replace(/<(?:.|\n)*?>/gm, ''); resultSet['summary'] = String(grabBettwenTags(fileContents, "p")); resultSet['lastModified'] = File(filePath).lastModified(); resultSet['contents'] = fileContents; return resultSet; }
Comments
