posts - 8, comments - 0, trackbacks - 0, articles - 0

2008年4月6日

 

package indexer;
//package ch2.lucenedemo.process;

import java.io.File;

import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import parameters.Param;
import pretreat.FileControl;

public class IndexOnFS implements IIndexTool{
    
// 成员变量存储创建的索引文件存放的位置
    private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
        
//建立索引的目标文件
        private String INDEX_WANTED_PATH = "e:\\";
        
//目录数组总数
        private int NumOfDir = 0;
        
//存放根目录下的所有子目录
        private ArrayList<String> DirList = new ArrayList<String>();
        
//地址映射
        private Directory dir = null;
        
        
private IndexWriter writer;
        
        
public IndexOnFS(String path){
                
try {
                    dir 
= FSDirectory.getDirectory(INDEX_STORE_PATH);
                } 
catch (IOException ex) {
                    Logger.getLogger(IndexOnFS.
class.getName()).log(Level.SEVERE, null, ex);
                }
                INDEX_WANTED_PATH 
= path;
                makeSegments();
                searchDirectorys(path);
        }

        
//建立索引之前遍历所有目录并存放,这是为了迎合IndexWriter的同步机制
        public void searchDirectorys(String rootDir){
        
                File rootfile 
= new File(rootDir);
                File[] files 
= rootfile.listFiles();
                
if(files!=null)
                
for (int i = 0; i < files.length; i++){
                    
if(files[i].isDirectory()){
                       DirList.add(files[i].getPath());
                       searchDirectorys(files[i].getPath()); 
                    }
                }
        }
        
public void printAllDirectorys(){
                
for(int i = 0;i<DirList.size();i++)
                       System.out.println(DirList.get(i));
        }
        
public void createIndexs() {
                createIndex(INDEX_WANTED_PATH);
                
for(int k = 0;k<DirList.size();k++)
                    createIndex(DirList.get(k));
        }
        
public Document preIndexWrite(File file){
                
// 创建一个新的Document
            Document doc = new Document();
            
// 文件名对应的Field
            Field field = new Field("filename", file.getName(), 
                                 Field.Store.YES, Field.Index.TOKENIZED); 
        doc.add(field);
        
// 文件内容对应的Filed
        field = new Field("content", FileControl.fileToString(file),//转到控制器
                   Field.Store.NO, Field.Index.TOKENIZED);
        doc.add(field);
                
//文件路径对应的Filed
                field = new Field("filepath", file.getPath(), 
                           Field.Store.YES, Field.Index.TOKENIZED); 
        doc.add(field);
                
                
return doc;
        }
    
/*单目录创建索引*/
    
public void createIndex(String inputDir) {
        
try {
            
/*MMAnalyzer作为分词工具创建一个IndexWriter*/
                writer 
= new IndexWriter(dir,new MMAnalyzer(), false); /*第一次创建索引时为true*/
            File filesDir 
= new File(inputDir);
            
/*取得所有需要建立索引的文件数组*/
            File[] files 
= filesDir.listFiles();
            
/*遍历数组*/
                        
if(files!=null)
            
for (int i = 0; i < files.length; i++) { 
                              
/*判断是否为文件*/
                              
if(files[i].isFile()){ 
                    
/*把Document加入IndexWriter*/
                    writer.addDocument(preIndexWrite(files[i]));  
                                        System.out.println( files[i].getPath());
                        }
                                    } 
                        writer.optimize(); 
/*索引优化*/

        } 
catch (Exception e) { e.printStackTrace(); }
                
                
finally
                    
try{writer.close();
                    }
catch(Exception ee){ ee.printStackTrace(); }
                }

    }
        
//初始化空索引库
        public void makeSegments(){
            
if(new File(INDEX_STORE_PATH).list().length==0){
                
try {
                    IndexWriter iw 
= new IndexWriter(dir, new MMAnalyzer(), true);
                    writer.addDocument(preIndexWrite(
new File(Param.INITFILE_PATH)));
                } 
catch (Exception ex) { ex.printStackTrace(); } 
                
                
finally
                      
try{writer.close();
                      }
catch(Exception ee){ ee.printStackTrace(); }
                  }
            }
        }
        
public ArrayList getDirs(){
            
return this.DirList;
        }
        
        
public void startIndex() {
            makeSegments();
            createIndexs();
        }

    
public static void main(String[] args) {
        IndexOnFS processor 
= new IndexOnFS("e:\\毕业论文");
                
//processor.searchDirectorys("e:\\1");
                processor.startIndex();
    }


}

posted @ 2008-06-01 05:11 HanLab 阅读(221) | 评论 (0)编辑 收藏

这是我做毕业设计时画的,感觉还可以就放过来共享,有什么不足的地方,请多指点。

看Lucene代码也算是中享受,根据下图可以先看看关键类的代码。
Lucene2.3.1发布不久,源代码下载地址:http://apache.mirror.phpchina.com/lucene/java/


 

 

 org.apache.Lucene.search/

 搜索入口

 org.apache.Lucene.index/

 索引入口

 org.apache.Lucene.analysis/

 语言分析器

 org.apache.Lucene.queryParser/

查询分析器

 org.apache.Lucene.document/

 存储结构

 org.apache.Lucene.store/ 

 底层IO/存储结构

 org.apache.Lucene.util/

 一些公用的数据结构


 

 

posted @ 2008-04-06 17:02 HanLab 阅读(407) | 评论 (0)编辑 收藏