Posted on 2008-06-01 05:11
HanLab 阅读(220)
评论(0) 编辑 收藏 所属分类:
Lucene
package indexer;
//package ch2.lucenedemo.process;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import parameters.Param;
import pretreat.FileControl;
public class IndexOnFS implements IIndexTool{
// 成员变量存储创建的索引文件存放的位置
private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
//建立索引的目标文件
private String INDEX_WANTED_PATH = "e:\\";
//目录数组总数
private int NumOfDir = 0;
//存放根目录下的所有子目录
private ArrayList<String> DirList = new ArrayList<String>();
//地址映射
private Directory dir = null;
private IndexWriter writer;
public IndexOnFS(String path){
try {
dir = FSDirectory.getDirectory(INDEX_STORE_PATH);
} catch (IOException ex) {
Logger.getLogger(IndexOnFS.class.getName()).log(Level.SEVERE, null, ex);
}
INDEX_WANTED_PATH = path;
makeSegments();
searchDirectorys(path);
}
//建立索引之前遍历所有目录并存放,这是为了迎合IndexWriter的同步机制
public void searchDirectorys(String rootDir){
File rootfile = new File(rootDir);
File[] files = rootfile.listFiles();
if(files!=null)
for (int i = 0; i < files.length; i++){
if(files[i].isDirectory()){
DirList.add(files[i].getPath());
searchDirectorys(files[i].getPath());
}
}
}
public void printAllDirectorys(){
for(int i = 0;i<DirList.size();i++)
System.out.println(DirList.get(i));
}
public void createIndexs() {
createIndex(INDEX_WANTED_PATH);
for(int k = 0;k<DirList.size();k++)
createIndex(DirList.get(k));
}
public Document preIndexWrite(File file){
// 创建一个新的Document
Document doc = new Document();
// 文件名对应的Field
Field field = new Field("filename", file.getName(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
// 文件内容对应的Filed
field = new Field("content", FileControl.fileToString(file),//转到控制器
Field.Store.NO, Field.Index.TOKENIZED);
doc.add(field);
//文件路径对应的Filed
field = new Field("filepath", file.getPath(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
return doc;
}
/*单目录创建索引*/
public void createIndex(String inputDir) {
try {
/*MMAnalyzer作为分词工具创建一个IndexWriter*/
writer = new IndexWriter(dir,new MMAnalyzer(), false); /*第一次创建索引时为true*/
File filesDir = new File(inputDir);
/*取得所有需要建立索引的文件数组*/
File[] files = filesDir.listFiles();
/*遍历数组*/
if(files!=null)
for (int i = 0; i < files.length; i++) {
/*判断是否为文件*/
if(files[i].isFile()){
/*把Document加入IndexWriter*/
writer.addDocument(preIndexWrite(files[i]));
System.out.println( files[i].getPath());
}
}
writer.optimize(); /*索引优化*/
} catch (Exception e) { e.printStackTrace(); }
finally{
try{writer.close();
}catch(Exception ee){ ee.printStackTrace(); }
}
}
//初始化空索引库
public void makeSegments(){
if(new File(INDEX_STORE_PATH).list().length==0){
try {
IndexWriter iw = new IndexWriter(dir, new MMAnalyzer(), true);
writer.addDocument(preIndexWrite(new File(Param.INITFILE_PATH)));
} catch (Exception ex) { ex.printStackTrace(); }
finally{
try{writer.close();
}catch(Exception ee){ ee.printStackTrace(); }
}
}
}
public ArrayList getDirs(){
return this.DirList;
}
public void startIndex() {
makeSegments();
createIndexs();
}
public static void main(String[] args) {
IndexOnFS processor = new IndexOnFS("e:\\毕业论文");
//processor.searchDirectorys("e:\\1");
processor.startIndex();
}
}