唯美古典
Java入门,Struts,Hibernate,Spring,Groovy,Grails
posts - 7,comments - 10,trackbacks - 0

1、首先从lucene官网上下载lucene2.4.0(也可以点击直接下载,我这里用的这个版本,现在最高版本是3.0

2、从极易软件下载极易分词器jar包(为汉语的世界,你好提供支持)

3、Eclipse中新建Java工程,并将所需jar包(lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar)加入工程

4、差不多该开始了,在开始之前还需要建立两个文件夹,我这里是luceneDataSource放文件(用来建立索引库),luceneIndexs(存放索引库的位置),最终的结构是:

5、好,我们开始,首先建立HelloWorld类,类里有两个方法createIndexsearch分别是创建索引库和搜索,搜索出来的结果高亮显示,具体实现为:

package com.lucene.helloworld;

import java.util.logging.SimpleFormatter;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Filter;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.search.highlight.Formatter;

import org.apache.lucene.search.highlight.Fragmenter;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.Scorer;

import org.apache.lucene.search.highlight.SimpleFragmenter;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.junit.Test;

import com.lucene.util.File2DocumentUtils;

public class HelloWorld {

    String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt";

    String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt";

    String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs";

// Analyzer analyzer = new StandardAnalyzer();

    Analyzer mmAnalyzer = new MMAnalyzer(); // 词库分析,极易分词

    /**

     * 创建索引

     *

     * @throws Exception

     *

     */

    @Test

    public void createIndex() throws Exception {

       IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);

//     Document doc = File2DocumentUtils.file2Document(filePath);

       Document zhDoc = File2DocumentUtils.file2Document(zhFilePath);

//     indexWriter.addDocument(doc);

       indexWriter.addDocument(zhDoc);

       indexWriter.close();

    }

    /**

     * 从索引库搜索

     *

     * @throws Exception

     */

    @Test

    public void search() throws Exception {

       // String queryString = "hello world";

       String queryString = "世界,你好";

       // 1、将搜索文件解析为Query对象

       String[] fields = { "name", "content" };

       QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer);

       Query query = queryParser.parse(queryString);

       // 2、查询

       IndexSearcher indexSearcher = new IndexSearcher(indexPath);

       Filter filter = null;

       TopDocs topDocs = indexSearcher.search(query, filter, 10000);

       System.out.println("总共有【" + topDocs.totalHits + "】条结果匹配");

       // start 准备高亮器

       Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>");

       Scorer scorer = new QueryScorer(query);

       Highlighter highlighter = new Highlighter(formatter, scorer);

       Fragmenter fragmenter = new SimpleFragmenter(50);

       highlighter.setTextFragmenter(fragmenter);

       // end 结束高亮器

       // 3、打印输出结果

       for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

           int docSn = scoreDoc.doc;

           Document doc = indexSearcher.doc(docSn);

           // start 高亮

           // 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null

           String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content"));

           if (hc == null) {

              String content = doc.get("content");

              int endIndex = Math.min(50, content.length());

              hc = content.substring(0, endIndex);

           }

           doc.getField("content").setValue(hc);

           // end 高亮

           File2DocumentUtils.printDocumentInfo(doc);

       }

    }

}

该类需要有一个工具类支持,来将file转换为Document,具体实现如下:

package com.lucene.util;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.NumberTools;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

publicclass File2DocumentUtils {

    publicstatic Document file2Document(String path) {

       File file = new File(path);

       Document doc = new Document();

       doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

       doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

       doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));

       doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

       return doc;

    }

    // public static void document2File(Document doc ){

    //    

    // }

    publicstatic String readFileContent(File file) {

       try {

           BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));

           StringBuffer content = new StringBuffer();

           for (String line = null; (line = reader.readLine()) != null;) {

              content.append(line).append(""n");

           }

           return content.toString();

       } catch (Exception e) {

           thrownew RuntimeException(e);

       }

    }

   

    publicstaticvoid printDocumentInfo(Document doc) {

       // Field f = doc.getField("name");

       // f.stringValue();

       System.out.println("------------------------------");

       System.out.println("name     = " + doc.get("name"));

       System.out.println("content = " + doc.get("content"));

       System.out.println("size     = " + NumberTools.stringToLong(doc.get("size")));

       System.out.println("path     = " + doc.get("path"));

    }

}

6、到此我们结束,看下成果,英文版的我就不写了,想对来说比较容易,来看下中文版的结果




唯美古典的工作室
posted on 2009-12-03 15:27 唯美古典 阅读(2885) 评论(0)  编辑  收藏 所属分类: Java入门lucene

只有注册用户登录后才能发表评论。


网站导航: