Posted on 2010-01-21 23:46
小强摩羯座 阅读(1223)
评论(0) 编辑 收藏 所属分类:
算法编程
/**
* 实现测试:串匹配和词频统计功能
*/
public void largerTextExample()
{
String text = "你好abc,ac,abc,def,ac,okt, ac,dfdfe, ac , what is it 你好啊,bc";
String[] terms = {"你好","ac", "abc", "bc"};
for (int i = 0; i < terms.length; i++)
{
tree.add(terms[i].getBytes(), terms[i]);
System.out.println( terms[i]);
}
tree.prepare();
Set termsThatHit = new HashSet();
Iterator iter = tree.search(text.getBytes());
// 统计词频
Map<String, Integer> freqCount = new HashMap<String, Integer>();
for (; iter.hasNext();)
{
SearchResult result = (SearchResult) iter.next();
Set set = result.getOutputs();
System.out.println(set);
for(Iterator it = set.iterator();it.hasNext();)
{
String str = (String)it.next();
if( freqCount.get(str) == null)
freqCount.put(str, 1);
else
freqCount.put(str, freqCount.get(str)+1);
}
}
for(String key: freqCount.keySet())
{
System.out.println( "key = " + key + ", value "+ freqCount.get(key) );
}
------------结果-------------------------
你好
ac
abc
bc
[你好]
[abc, bc]
[ac]
[abc, bc]
[ac]
[ac]
[ac]
[你好]
[bc]
key = abc, value 2
key = 你好, value 2
key = ac, value 4
key = bc, value 3