介绍下Lucene建立索引的过程

2023-07-06   


代码如下:
  
   1. package utils;
   2.
   3. import java.io.File;
   4. import java.io.FileReader;
   5. import java.io.IOException;
   6. import java.io.Reader;
   7.
   8. import org.apache.lucene.analysis.standard.StandardAnalyzer;
   9. import org.apache.lucene.document.Document;
   10. import org.apache.lucene.document.Field;
   11. import org.apache.lucene.index.IndexWriter;
   12.
   13. public class Indexer
   14.
   15. public int index(String indexDir, String dataDir) throws IOException
   16.
   17. File indexDirFile = new File(indexDir);
   18. File dataDirFile = new File(dataDir);
   19. int numIndexed = index(indexDirFile, dataDirFile);
   20. return 0;
   21.
   22.
   23. private int index(File indexDirFile, File dataDirFile) throws IOException
   24. if(!dataDirFile.exists() || !dataDirFile.isDirectory())
   25.
   26. throw new IOException(dataDirFile + ” does not exist or is not a directory”);
   27.
   28. IndexWriter writer = new IndexWriter(indexDirFile, new StandardAnalyzer(), true);
   29. writer.setUseCompoundFile(false);
   30. indexDirectory(writer, dataDirFile);
   31.
   32. int numIndexed = writer.docCount();
   33. writer.optimize();
   34. writer.close();
   35. return numIndexed;
   36.
   37.
   38. private void indexDirectory(IndexWriter writer, File dataDirFile) throws IOException
   39. File[] files = dataDirFile.listFiles();
   40. for(int i = 0; i
   41.
   42. File f = files[i];
   43. if(f.isDirectory())
   44.
   45. indexDirectory(writer, f);
   46. else if(f.getName().endsWith(”.java”) || f.getName().endsWith(”.txt”))//需要索引的文件类型
   47.
   48. indexFile(writer, f);
   49.
   50.
   51.
   52.
   53.
   54.
   55. private void indexFile(IndexWriter writer, File f) throws IOException
   56. if(f.isHidden() || !f.exists() || !f.canRead())
   57.
   58. return;
   59.
   60. System.out.println(”Indexing” + f.getCanonicalPath());
   61. Document doc = new Document();
   62. Reader txtReader = new FileReader(f);
   63. doc.add(new Field(”path”,f.getCanonicalPath(),Field.Store.YES,Field.Index.UN_TOKENIZED));
   64. doc.add(new Field(”contents”,txtReader));
   65. doc.add(new Field(”name”,f.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED));
   66. writer.addDocument(doc);
   67.
   68.
   69.
   70.
   71.
   调用的代码如下:
   1. String filesRepoDir = “C:/workspace-2.0″;//需要被索引的目录
   2. String indexDir = “C:/apache-tomcat-6.0.18/webapps/index”;//存放索引的目录
   3. Indexer indexer= new Indexer();
   4. indexer.index(indexDir, filesRepoDir);


相关内容:

  1. 如何用Lucene索引数据库
  2. 介绍一下SQL Server里面的索引视图
  3. 简述索引存取方法的作用和建立索引的原则
  4. Lucene推荐的分页方式是什么?
  5. 当我正在为表建立索引的时候,SQL Server 会禁止对表的访问吗
  6. 哪些情况下不应该使用索引