|
Posted on 2010-10-07 15:53 penngo 阅读(3324) 评论(4) 编辑 收藏 所属分类: Java
有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找,不过这种方式速度比较慢,而且影响数据库服务器性能。
其实我们可以先把数据从数据库查询出来,利用lucene建立索引。以后每次查找都从索引中查找,可以提高查询速度和减轻服务器负担。
本篇用到的技术:lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息,内容:
sql=select iId,title,content,credate from archeive //指定查找sql,需要建立索引的数据
update.field=iId
update.value=
search.condition=title,content //搜索时的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址
period=10000 //更新索引的时间间隔
com.search.util.SearchConfig主要是读取search.properties的信息。
public class SearchConfig {
private Properties searchPro;
private String searchFile = "search.properties";
private String SQL = "sql";
private String CONDITION = "search.condition";
private String INDEX = "index.path";
public SearchConfig(){
initSearch();
}
public void initSearch(){
searchPro = PropertiesUtil.getProperties(searchFile);
}
public String getSql(){
return searchPro.getProperty(SQL, "");
}
public String getCondition(){
return searchPro.getProperty(CONDITION, "");
}
public File getIndexPath(){
String path = searchPro.getProperty(INDEX, "");
File file = new File(path);
if (!file.exists()) {
file.mkdir();
}
return file;
}
public long getPeriod(){
String period = searchPro.getProperty("period", "0");
return Integer.valueOf(period);
}
public String getUpdateField(){
return searchPro.getProperty("update.field", "");
}
public String getUpdateValue(){
return searchPro.getProperty("update.value", "");
}
public void save(){
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}
com.search.util.LuceneUtil代码介绍,主要是生成索引和搜索。
public class LuceneUtil {
private File indexpath = null;
private String sql = null;
private String condition = null;
private String updateField = null;
private String updateValue = null;
private SearchConfig sc = null;
public LuceneUtil() {
sc = new SearchConfig();
indexpath = sc.getIndexPath();
sql = sc.getSql();
condition = sc.getCondition();
updateField = sc.getUpdateField();
updateValue = sc.getUpdateValue();
if(!updateValue.equals("")){
sql = sql + " where " + updateField + " > " + updateValue;
}
}
public void createIndex() {
System.out.println("==========正在生成数据库索引。");
//把数据库中的数据查询出来,
ResultSet rs = SQLHelper.getResultSet(sql);
try {
//打开索引文件
FSDirectory directory = FSDirectory.open(indexpath);
Analyzer analyzer = new IKAnalyzer();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(rs.getInt(1)),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", rs.getString(2), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", rs.getString(3), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public List<Document> search(String keyword) {
List<Document> list = new ArrayList<Document>();
try {
FSDirectory directory = FSDirectory.open(indexpath);
IndexReader reader = IndexReader.open(directory, true);
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
if(keyword == null || keyword.equals("")){
return list;
}
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
// 搜索相似度最高的10条记录
TopDocs topDocs = isearcher.search(query, 10);
// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
list.add(targetDoc);
}
isearcher.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
com.search.listener.Indexlistener启动索引更新程序
public class Indexlistener implements ServletContextListener {
public void contextInitialized(javax.servlet.ServletContextEvent arg0) {
new IndexTask();
}
public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {
}
}
com.search.listener.IndexTask定时更新索引
public class IndexTask {
public IndexTask(){
Timer timer = new Timer();
SearchConfig sc = new SearchConfig();
timer.schedule(new Task(), new Date(), sc.getPeriod());
}
static class Task extends TimerTask{
public void run(){
LuceneUtil lu = new LuceneUtil();
lu.createIndex();
}
}
}
com.search.util.RedHighlighter关键词高亮显示
public class RedHighlighter {
public static String getBestFragment(String keyword, String field, String word){
SearchConfig sc = new SearchConfig();
String condition = sc.getCondition();
try{
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
String c = highlighter.getBestFragment(new IKAnalyzer(),
field, word);
return c;
}
catch(Exception e){
e.printStackTrace();
}
return "";
}
}
index.jsp搜索页面
<%@ page language="java" contentType="text/html; charset=GBK"
pageEncoding="GBK"%>
<%@page import="com.search.util.LuceneUtil" %>
<%@page import="java.util.*" %>
<%@page import="org.apache.lucene.document.Document" %>
<%@page import="com.search.util.RedHighlighter" %>
<%@page import="java.net.URLEncoder"%><html>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<title>搜索</title>
<link rel="stylesheet" href="./style/style.css" type="text/css">
</head>
<%
//request.setCharacterEncoding("GBK");
String w = request.getParameter("w");
int size = 0;
long time = 0;
List<Document> list = null;
if(w != null && !w.equals("")){
w = new String(w.getBytes("ISO8859-1"), "GBK");
}
else{
w = "";
}
LuceneUtil lu = new LuceneUtil();
Date start = new Date();
list = lu.search(w);
Date end = new Date();
size = list.size();
time = end.getTime() - start.getTime();
%>
<script type="text/javascript">
function submit(){
}
</script>
<body>
<div class="seachInput" align="center">
<form method="get" action="index.jsp"><br>
<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"
><input type="submit"
class="btnSearch" onclick="submit" value="找一下"> <br>
</form>
</div>
<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相关内容<%=size%>篇,
用时<%=time%>毫秒
</div>
<div id="main">
<div id="searchResult">
<div class="forflow">
<%
if(list != null && list.size() > 0){
for(Document doc:list){
String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
String content = RedHighlighter.getBestFragment(w, "content", doc.get
("content"));
%>
<div class="searchItem">
<a href="#" class="searchItemTitle" target="_blank"><%=title %></a>
<div class="searchCon">
<%=content %>
</div>
</div>
<%
}
}
%>
</div>
</div>
</div>
</body>
</html>
运行效果:
附件: 完整代码
评论
# re: 利用lucene给网站、系统增加搜索功能[未登录] 回复 更多评论
2010-10-07 20:40 by
如何做得更强大,更专业呀
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-07 21:09 by
不错啊~ 不过要想搜索更准确点,就不能这么简单了貌似.
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-07 22:08 by
@os 搜索不准的话,可以研究下分词,我用的中文分词是IKAnalyzer。
# re: 利用lucene给网站、系统增加搜索功能 回复 更多评论
2010-10-12 15:58 by
没有数据库脚本。
|