|
Posted on 2010-10-07 15:53 penngo 阅读(3327) 评论(4) 编辑 收藏 所属分类: Java
有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找,不过这种方式速度比较慢,而且影响数据库服务器性能。
其实我们可以先把数据从数据库查询出来,利用lucene建立索引。以后每次查找都从索引中查找,可以提高查询速度和减轻服务器负担。
本篇用到的技术:lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息,内容:
sql=select iId,title,content,credate from archeive //指定查找sql,需要建立索引的数据
update.field=iId
update.value=
search.condition=title,content //搜索时的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址
period=10000 //更新索引的时间间隔
com.search.util.SearchConfig主要是读取search.properties的信息。
 public class SearchConfig {
private Properties searchPro;
private String searchFile = "search.properties";
private String SQL = "sql";
private String CONDITION = "search.condition";
private String INDEX = "index.path";
 public SearchConfig() {
initSearch();
}
 public void initSearch() {
searchPro = PropertiesUtil.getProperties(searchFile);
}
 public String getSql() {
return searchPro.getProperty(SQL, "");
}
 public String getCondition() {
return searchPro.getProperty(CONDITION, "");
}
 public File getIndexPath() {
String path = searchPro.getProperty(INDEX, "");
File file = new File(path);
 if (!file.exists()) {
file.mkdir();
}
return file;
}
 public long getPeriod() {
String period = searchPro.getProperty("period", "0");
return Integer.valueOf(period);
}
 public String getUpdateField() {
return searchPro.getProperty("update.field", "");
}
 public String getUpdateValue() {
return searchPro.getProperty("update.value", "");
}
 public void save() {
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}
com.search.util.LuceneUtil代码介绍,主要是生成索引和搜索。
 public class LuceneUtil {
private File indexpath = null;
private String sql = null;
private String condition = null;
private String updateField = null;
private String updateValue = null;
private SearchConfig sc = null;
 public LuceneUtil() {
sc = new SearchConfig();
indexpath = sc.getIndexPath();
sql = sc.getSql();
condition = sc.getCondition();
updateField = sc.getUpdateField();
updateValue = sc.getUpdateValue();
 if(!updateValue.equals("")) {
sql = sql + " where " + updateField + " > " + updateValue;
}
}

 public void createIndex() {
System.out.println("==========正在生成数据库索引。");
//把数据库中的数据查询出来,
ResultSet rs = SQLHelper.getResultSet(sql);
 try {
//打开索引文件
FSDirectory directory = FSDirectory.open(indexpath);
Analyzer analyzer = new IKAnalyzer();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
 while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(rs.getInt(1)),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", rs.getString(2), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", rs.getString(3), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
directory.close();
 } catch (Exception e) {
e.printStackTrace();
}
}

 public List<Document> search(String keyword) {
List<Document> list = new ArrayList<Document>();
 try {
FSDirectory directory = FSDirectory.open(indexpath);
IndexReader reader = IndexReader.open(directory, true);
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
 if(keyword == null || keyword.equals("")) {
return list;
}
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

// 搜索相似度最高的10条记录
TopDocs topDocs = isearcher.search(query, 10);

// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

 for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
list.add(targetDoc);
}
isearcher.close();
directory.close();
 } catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
com.search.listener.Indexlistener启动索引更新程序
 public class Indexlistener implements ServletContextListener {
 public void contextInitialized(javax.servlet.ServletContextEvent arg0) {
new IndexTask();
}

 public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {
}
}
com.search.listener.IndexTask定时更新索引
 public class IndexTask {
 public IndexTask() {
Timer timer = new Timer();
SearchConfig sc = new SearchConfig();
timer.schedule(new Task(), new Date(), sc.getPeriod());
}
 static class Task extends TimerTask {
 public void run() {
LuceneUtil lu = new LuceneUtil();
lu.createIndex();
}
}
}
com.search.util.RedHighlighter关键词高亮显示
 public class RedHighlighter {

 public static String getBestFragment(String keyword, String field, String word) {
SearchConfig sc = new SearchConfig();
String condition = sc.getCondition();
 try {
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
String c = highlighter.getBestFragment(new IKAnalyzer(),
field, word);
return c;
}
 catch(Exception e) {
e.printStackTrace();
}
return "";
}
}
index.jsp搜索页面
 <% @ page language="java" contentType="text/html; charset=GBK"
pageEncoding="GBK"%>
 <% @page import="com.search.util.LuceneUtil" %>
 <% @page import="java.util.*" %>
 <% @page import="org.apache.lucene.document.Document" %>
 <% @page import="com.search.util.RedHighlighter" %>
 <% @page import="java.net.URLEncoder"%><html>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<title>搜索</title>
<link rel="stylesheet" href="./style/style.css" type="text/css">
</head>
 <%
//request.setCharacterEncoding("GBK");
String w = request.getParameter("w");
int size = 0;
long time = 0;
List<Document> list = null;
if(w != null && !w.equals("")){
w = new String(w.getBytes("ISO8859-1"), "GBK");
}
else{
w = "";
}
LuceneUtil lu = new LuceneUtil();
Date start = new Date();
list = lu.search(w);
Date end = new Date();
size = list.size();
time = end.getTime() - start.getTime();
%>
 <script type="text/javascript">
 function submit() {
}
</script>
<body>
<div class="seachInput" align="center">
<form method="get" action="index.jsp"><br>
<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"
><input type="submit"
class="btnSearch" onclick="submit" value="找一下"> <br>
</form>
</div>
<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相关内容<%=size%>篇,

用时<%=time%>毫秒
</div>
<div id="main">
<div id="searchResult">
<div class="forflow">
 <%
if(list != null && list.size() > 0){
for(Document doc:list){
String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
String content = RedHighlighter.getBestFragment(w, "content", doc.get

("content"));
%>
<div class="searchItem">
<a href="#" class="searchItemTitle" target="_blank"><%=title %></a>
<div class="searchCon">
<%=content %>
</div>
</div>
 <%
}
}
%>
</div>
</div>
</div>
</body>
</html>
运行效果:
附件: 完整代码
评论
# re: 利用lucene给网站、系统增加搜索功能[未登录] 回复 更多评论
2010-10-07 20:40 by
如何做得更强大,更专业呀
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-07 21:09 by
不错啊~ 不过要想搜索更准确点,就不能这么简单了貌似.
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-07 22:08 by
@os 搜索不准的话,可以研究下分词,我用的中文分词是IKAnalyzer。
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-12 15:58 by
没有数据库脚本。
|