简单lucene搜索实现(搜索引擎)

首先下载lucene相关jar包，这里就不多说，自己网上找

在eclipse下建立web工程luceneTest

将jar包加载到你的web工程里面

新建类Index.java,代码如下：

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;

/*
* Create Date:2007-10-26 下午02:52:53
*
* Author:dingkm
*
* Version: V1.0
*
* Description：对进行修改的功能进行描述
*
*
*/

public class Index {

/**
* @Description 方法实现功能描述
* @param args
*            void
* @throws 抛出异常说明
*/
public static void main(String[] args) {
  // TODO Auto-generated method stub
  try {
   new Index().index();
   System.out.println("create index success!!!");
  } catch (CorruptIndexException e) {
   e.printStackTrace();
  } catch (LockObtainFailedException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
}

public void index() throws CorruptIndexException, LockObtainFailedException, IOException{
   long start = System.currentTimeMillis();

  // 建立索引的路径
     String path = "c:\\index2";
  Document doc1 = new Document();
        doc1.add( new Field("name", "中华人民共和国",Field.Store.YES,Field.Index.TOKENIZED));
        doc1.add( new Field("content", "标题或正文包括",Field.Store.YES,Field.Index.TOKENIZED));
        doc1.add( new Field("time", "20080715",Field.Store.YES,Field.Index.TOKENIZED));
        Document doc2 = new Document();
        doc2.add(new Field("name", "大中国中国",Field.Store.YES,Field.Index.TOKENIZED));
        IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(path, true), new StandardAnalyzer(), true);
        writer.setMaxMergeDocs(10);
        writer.setMaxFieldLength(3);
        writer.addDocument(doc1);
        writer.setMaxFieldLength(3);
        writer.addDocument(doc2);
        writer.close();


        System.out.println("=========================");
        System.out.print(System.currentTimeMillis() - start);
  System.out.println("total milliseconds");
  System.out.println("=========================");

}

执行这个类，可以看到结果：

=========================
375total milliseconds
=========================
create index success!!!

可以看到索引创建成功。

下面我们来创建搜索类，Search.java

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

/*
* Create Date:2007-10-26 下午02:56:12
*
* Author:dingkm
*
* Version: V1.0
*
* Description：对进行修改的功能进行描述
*
*
*/

public class Search {

/**
*   @Description 方法实现功能描述
*   @param args
*   void
*   @throws 抛出异常说明
*/
public static void main(String[] args) {
  // TODO Auto-generated method stub
   String path = "c:\\index2";
   try {
   new Search().search(path);
  } catch (CorruptIndexException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (ParseException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }

}

public void search(String path) throws CorruptIndexException, IOException, ParseException{
   IndexSearcher searcher = new IndexSearcher(path);
         Hits hits = null;
         Query query = null;
         QueryParser qp = new QueryParser("name",new StandardAnalyzer());

            query = qp.parse("中");
         hits = searcher.search(query);
            java.text.NumberFormat   format   =   java.text.NumberFormat.getNumberInstance();
         System.out.println("查找到共" + hits.length() + "个结果");
            for   (int   i   =   0;   i   <   hits.length();   i++)   {
                  //开始输出查询结果
                  Document   doc   =   hits.doc(i);
                  System.out.println(doc.get("name"));
                  System.out.println("content="+doc.get("content"));
                  System.out.println("time="+doc.get("time"));
                  System.out.println("准确度为："   +   format.format(hits.score(i)   *   100.0)   +   "%");
//                  System.out.println(doc.get("CONTENT"));
              }

}

}

执行它，会得到以下结果：

查找到共2个结果
中华人民共和国
content=标题或正文包括
time=20080715
准确度为：29.727%
大中国中国
content=null
time=null
准确度为：29.727%

这样就完成了我们的程序

这是我第一次发表文章
说的比较简单，可能很多地方说的不清楚
希望大家多多支持

有什么不明白的欢迎留言。

posted on 2008-09-04 13:06 老丁阅读(535) 评论(0) 编辑收藏所属分类: 搜索引擎 lucene

留言簿(4)

我参与的团队

文章分类(50)

文章档案(48)

相册

朋友

搜索

积分与排名

最新评论


只有注册用户登录后才能发表评论。




网站导航: 博客园 IT新闻 Chat2DB C++博客博问管理
相关文章: lucene增量索引的简单实现 lucene索引word/pdf/html/txt文件及检索(搜索引擎) Lucene的查询语法！(搜索引擎) lucene介绍(搜索引擎) 简单lucene搜索实现(搜索引擎)