Posted on 2013-01-08 15:08
沙漠中的鱼 阅读(1239)
评论(0) 编辑 收藏 所属分类:
其他 、
Java 、
数据挖掘
public class ClusteringFlyStoneDocument {
public static void main(String[] args) {
final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);
//创建需要处理的属性对象
final Map<String, Object> attributes = Maps.newHashMap();
//需要处理的文档集合
List<Document> documents = SampleDocumentData.DOCUMENTS_DATA_MINING;
//将文档添加到聚类属性中
CommonAttributesDescriptor.attributeBuilder(attributes).documents(documents);
//设置处理的语言()
MultilingualClusteringDescriptor.attributeBuilder(attributes).defaultLanguage(LanguageCode.CHINESE_SIMPLIFIED);
//设置需要处理的对象,以及聚类的算法
final ProcessingResult englishResult = controller.process(attributes, LingoClusteringAlgorithm.class);
//获取结果,打印聚类主题及关联信息
final List<Cluster> clustersByTopic = englishResult.getClusters();
System.out.println("=======聚类主题=====");
for(Cluster cluster : clustersByTopic){
System.out.println("【主题 】" +cluster.getLabel());
List<Document> cDocLst = cluster.getAllDocuments();
for(Document doc : cDocLst){
System.out.println("--------" + doc.getTitle());
}
}
//通过URL进行聚类
final ProcessingResult byDomainClusters = controller.process(documents, null,ByUrlClusteringAlgorithm.class);
final List<Cluster> clustersByDomain = byDomainClusters.getClusters();
System.out.println("=======URL聚类=======");
for(Cluster cluster : clustersByDomain){
System.out.println("【URL】" +cluster.getLabel());
List<Document> cDocLst = cluster.getAllDocuments();
for(Document doc : cDocLst){
System.out.println("----" + doc.getTitle());
}
}
}
}