package
com.lucene.helloworld;
import
java.util.logging.SimpleFormatter;
import
jeasy.analysis.MMAnalyzer;
import
org.apache.lucene.analysis.Analyzer;
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.IndexWriter.MaxFieldLength;
import
org.apache.lucene.queryParser.MultiFieldQueryParser;
import
org.apache.lucene.queryParser.QueryParser;
import
org.apache.lucene.search.Filter;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.Query;
import
org.apache.lucene.search.ScoreDoc;
import
org.apache.lucene.search.TopDocs;
import
org.apache.lucene.search.highlight.Formatter;
import
org.apache.lucene.search.highlight.Fragmenter;
import
org.apache.lucene.search.highlight.Highlighter;
import
org.apache.lucene.search.highlight.QueryScorer;
import
org.apache.lucene.search.highlight.Scorer;
import
org.apache.lucene.search.highlight.SimpleFragmenter;
import
org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import
org.junit.Test;
import
com.lucene.util.File2DocumentUtils;
public
class HelloWorld {
String zhFilePath =
"F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt";
String filePath =
"F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter
addDocument's a javadoc .txt";
String indexPath =
"F:""java""workspaces""LuceneTest""luceneIndexs";
// Analyzer analyzer = new StandardAnalyzer();
Analyzer mmAnalyzer = new MMAnalyzer(); //
词库分析,极易分词
/**
* 创建索引
*
*
@throws Exception
*
*/
@Test
public void createIndex() throws Exception
{
IndexWriter indexWriter = new
IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);
// Document doc =
File2DocumentUtils.file2Document(filePath);
Document zhDoc =
File2DocumentUtils.file2Document(zhFilePath);
// indexWriter.addDocument(doc);
indexWriter.addDocument(zhDoc);
indexWriter.close();
}
/**
* 从索引库搜索
*
*
@throws Exception
*/
@Test
public void search() throws Exception {
// String queryString = "hello
world";
String queryString = "世界,你好";
// 1、将搜索文件解析为Query对象
String[] fields = { "name",
"content" };
QueryParser queryParser = new
MultiFieldQueryParser(fields, mmAnalyzer);
Query query =
queryParser.parse(queryString);
// 2、查询
IndexSearcher indexSearcher = new
IndexSearcher(indexPath);
Filter filter = null;
TopDocs topDocs =
indexSearcher.search(query, filter, 10000);
System.out.println("总共有【" + topDocs.totalHits + "】条结果匹配");
// start 准备高亮器
Formatter formatter = new
SimpleHTMLFormatter("<font color=red>",
"</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new
Highlighter(formatter, scorer);
Fragmenter fragmenter = new
SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
// end 结束高亮器
// 3、打印输出结果
for (ScoreDoc scoreDoc :
topDocs.scoreDocs) {
int docSn = scoreDoc.doc;
Document doc =
indexSearcher.doc(docSn);
// start 高亮
// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null
String hc =
highlighter.getBestFragment(mmAnalyzer, "content",
doc.get("content"));
if (hc == null) {
String content =
doc.get("content");
int endIndex = Math.min(50,
content.length());
hc = content.substring(0,
endIndex);
}
doc.getField("content").setValue(hc);
// end 高亮
File2DocumentUtils.printDocumentInfo(doc);
}
}
}
|