平台:Lucene 2.1.0,JRE 1.4,Oracle 10g,IBM Web Sphere。
数据表:Article。字段:ID(自动增长),Title(String),Content(String)。共有550000条记录。
对Article建立索引:
1
import org.apache.lucene.analysis.*;
2
import org.apache.lucene.analysis.cn.*;
3
import org.apache.lucene.document.*;
4
import org.apache.lucene.index.*;
5
import java.sql.*;
6
import oracle.jdbc.pool.*;
7
8
public class Index
{
9
private String url="jdbc:oracle:thin:@//192.168.0.l:1521/Test";
10
private String user="terry";
11
private String password="dev";
12
private Connection con=null;
13
private Statement st=null;
14
private ResultSet rs=null;
15
private String indexUrl="E:\\ArticleIndex";
16
17
private ResultSet getResult() throws Exception
{
18
OracleDataSource ods=new OracleDataSource();
19
20
ods.setURL(this.url);
21
ods.setUser(this.user);
22
ods.setPassword(this.password);
23
24
this.con=ods.getConnection();
25
this.st=this.con.createStatement();
26
this.rs=this.st.executeQuery("SELECT * FROM Article");
27
28
return this.rs;
29
}
30
31
public void createIndex() throws Exception
{
32
ResultSet rs=this.getResult();
33
34
Analyzer chineseAnalyzer=new ChineseAnalyzer();
35
IndexWriter indexWriter=new IndexWriter(this.indexUrl,chineseAnalyzer,true);
36
indexWriter.setMergeFactor(100);
37
indexWriter.setMaxBufferedDocs(100);
38
39
java.util.Date startDate=new java.util.Date();
40
41
System.out.println("开始索引时间:"+startDate);
42
43
executeIndex(rs,indexWriter);
44
45
indexWriter.optimize();
46
47
indexWriter.close();
48
49
java.util.Date endDate=new java.util.Date();
50
51
System.out.println("索引结束时间:"+endDate);
52
System.out.println("共花费:"+(endDate.getTime()-startDate.getTime())+"ms");
53
}
54
55
private void executeIndex(ResultSet rs,IndexWriter indexWriter) throws Exception
{
56
int i=0;
57
58
while(rs.next())
{
59
int id=rs.getInt("ID");
60
String title=rs.getString("TITLE");
61
String info=rs.getString("CONTENT");
62
63
Document doc=new Document();
64
65
Field idField=new Field("ID",Integer.toString(id),Field.Store.YES,Field.Index.NO,Field.TermVector.NO);
66
Field titleField=new Field("Title",title,Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.YES);
67
Field infoField=new Field("Content",title,Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.YES);
68
69
doc.add(idField);
70
doc.add(titleField);
71
doc.add(infoField);
72
73
indexWriter.addDocument(doc);
74
75
i++;
76
}
77
78
this.close();
79
80
System.out.println("共处理记录:"+i);
81
}
82
83
private void close() throws Exception
{
84
this.rs.close();
85
this.st.close();
86
this.con.close();
87
}
88
}
查找:
1
import java.io.*;
2
import org.apache.lucene.analysis.cn.*;
3
import org.apache.lucene.search.*;
4
import org.apache.lucene.store.*;
5
import org.apache.lucene.document.*;
6
import org.apache.lucene.queryParser.QueryParser;
7
8
import java.util.*;
9
10
public class Search
{
11
12
private static final String indexUrl="E:\\ArticleIndex";
13
14
public static void main(String[] args) throws Exception
{
15
/**//*建立索引代码,查找时注释*/
16
//Index index=new Index();
17
18
//index.createIndex();
19
20
21
22
23
File indexDir=new File(indexUrl);
24
FSDirectory fdir=FSDirectory.getDirectory(indexDir);
25
26
IndexSearcher searcher=new IndexSearcher(fdir);
27
28
//对中文建立解析(必须)
29
QueryParser parser=new QueryParser("Title",new ChineseAnalyzer());
30
Query query=parser.parse("李湘");
31
32
Date startDate=new Date();
33
System.out.println("检索开始时间:"+startDate);
34
35
Hits result=searcher.search(query);
36
37
for(int i=0;i<result.length();i++)
{
38
Document doc=result.doc(i);
39
40
System.out.println("内容:"+doc.get("Content"));
41
}
42
43
Date endDate=new Date();
44
45
System.out.println("共有记录:"+result.length());
46
System.out.println("共花费:"+(endDate.getTime()-startDate.getTime()));
47
}
48
49
}
经测试,建立索引文件大概花了11分钟。一般情况下,和用SQL执行LIKE查询差不多。
当然,这只是我的粗略测试。最近一阶段,我会对Lucene进行代码深入研究。
posted on 2007-04-30 16:43
Terry Liang 阅读(2137)
评论(3) 编辑 收藏 所属分类:
Lucene 2.1研究