JavaScript Search Indexer


/ Published in: JavaScript
Save to your folder(s)

A JavaScript search indexer for whichElement.com


Copy this code and paste it in your HTML
  1. importPackage(java.io);
  2. load("./rhino/lib.js");
  3.  
  4. var rootPath = arguments[0];
  5. var startPath= new File(rootPath);
  6. var fileList = directoryList(startPath.getCanonicalPath());
  7. var fileList = filterDirectoryList(fileList, true, "html");
  8. var searchIndex = indexFiles(fileList, rootPath);
  9.  
  10.  
  11. serializeToDisk(searchIndex, "./search/searchindex.js", true);
  12. serializeToDisk(searchIndex, rootPath + "/search/searchindex.js");
  13.  
  14. function indexFiles(fileList, rootPath){
  15. var searchIndex = [];
  16. for (var i = 0; i < fileList.length; i++){
  17. var fileToRead = fileList[i]['path'];
  18.  
  19. var contentFilter = fileToRead.indexOf("/articles/") + fileToRead.indexOf("/tags/");
  20. var badFilter = fileToRead.indexOf("/bad/")
  21. if (contentFilter > 0 && badFilter < 1){
  22. var resultSet = indexContentPage(fileToRead, rootPath);
  23.  
  24. //TODO: Go back and make this stuff optional in indexContentPage,
  25. //Adding stuff in the indexer required me to kill it from search json.
  26. delete resultSet['filepath'];
  27. delete resultSet['contents'];
  28. resultSet['lastModified'] = new Date(resultSet['lastModified']);
  29.  
  30. searchIndex.push(resultSet);
  31.  
  32. }
  33. }
  34. return searchIndex;
  35. }
  36.  
  37. function serializeToDisk(content, location, prettyify){
  38. var prettyify = typeof prettyify !== 'undefined' ? prettyify : false;
  39. var fstream = new FileWriter(location);
  40. var out = new BufferedWriter(fstream);
  41. if (prettyify){
  42. out.write(JSON.stringify(content, null, 3));
  43. }
  44. else{
  45. out.write(JSON.stringify(content));
  46. }
  47.  
  48.  
  49. out.close();
  50. }
  51.  
  52.  
  53. importPackage(java.io);
  54.  
  55. function directoryList(startPath){
  56. var fileObject=new File(startPath);
  57. var list = fileObject.list();
  58. var results = [];
  59.  
  60. for (var i=0; i<list.length; i++) {
  61. var child = new File(startPath + "/" + list[i]);
  62.  
  63. if (child.isDirectory()){
  64. var recurseDirectoryListing = directoryList(child.getCanonicalPath());
  65. results = results.concat(recurseDirectoryListing);
  66. }
  67. else{
  68. var fileArray = {};
  69. fileArray['path'] = child.getCanonicalPath();
  70. fileArray['name'] = child.getName();
  71. fileArray['parent'] = child.getParent();
  72. fileArray['hidden'] = child.isHidden();
  73. fileArray['dir'] = child.isDirectory();
  74. fileArray['lastModified'] = child.lastModified();
  75. var pos = fileArray['name'].lastIndexOf('.');
  76. if (pos < 0){
  77. fileArray['ext'] = '';
  78. }else{
  79. fileArray['ext'] = fileArray['name'].substring(pos+1);
  80. }
  81.  
  82. results.push(fileArray);
  83. }
  84.  
  85. }
  86. return results;
  87. }
  88.  
  89. function filterDirectoryList(directoryList, filesOnly, extension, folderToTarget){
  90.  
  91. var filesOnly = typeof filesOnly !== 'undefined' ? filesOnly : false;
  92. var extension = typeof extension !== 'undefined' ? extension : '';
  93. var folderToTarget = typeof folderToTarget !== 'undefined' ? folderToTarget : '';
  94.  
  95. var results = [];
  96.  
  97. for (var i=0; i<directoryList.length; i++) {
  98. var file = directoryList[i];
  99.  
  100. if (filesOnly && file.dir){
  101. continue;
  102. }
  103.  
  104. if (extension.length > 0 && (file.ext != extension)){
  105. continue;
  106. }
  107.  
  108. if (folderToTarget.length > 0 && file.path.indexOf(folderToTarget) < 0){
  109. continue;
  110. }
  111.  
  112. results.push(file);
  113. }
  114. return results;
  115. }
  116.  
  117. function displayDirectoryList(fileList){
  118. for (var i=0; i<fileList.length; i++) {
  119. print(fileList[i]['path'] );
  120. print(fileList[i]['lastModified']);
  121.  
  122. }
  123. }
  124.  
  125. function displayIndexList(indexList){
  126. for (var i=0; i<indexList.length; i++) {
  127. print(indexList[i]['url'] );
  128. print(indexList[i]['title'] );
  129. }
  130. }
  131.  
  132. function createURLPath(filePath, rootPath){
  133. var base = filePath.replace(rootPath, "");
  134. base = base.replace("index.html", "");
  135. base = base.replace("/tags/", "/");
  136. base = base.replace("/articles/", "/");
  137.  
  138. return base;
  139. }
  140.  
  141. function grabBettwenTags(html, tag){
  142. var tag = typeof tag !== 'undefined' ? tag : "p";
  143. var start = html.indexOf("<" + tag, html) + tag.length + 2;
  144. var end = html.indexOf("</" + tag, start );
  145. return html.slice(start, end);
  146. }
  147.  
  148. function indexContentPage(filePath, rootPath){
  149. var fileContents = readFile(filePath);
  150. var resultSet = {};
  151. resultSet['filepath'] = String(filePath);
  152. resultSet['url'] = String(createURLPath(filePath, rootPath));
  153. resultSet['title'] = String(grabBettwenTags(fileContents, "h1"));
  154. resultSet['titleContents'] = resultSet['title'].replace(/<(?:.|\n)*?>/gm, '');
  155. resultSet['rawContents'] = String(fileContents).replace(/<(?:.|\n)*?>/gm, '');
  156. resultSet['summary'] = String(grabBettwenTags(fileContents, "p"));
  157. resultSet['lastModified'] = File(filePath).lastModified();
  158. resultSet['contents'] = fileContents;
  159. return resultSet;
  160. }

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.