CDK提供了通过smiles值进行子结构搜索, org.openscience.cdk.smiles.smarts.SMARTSQueryTool
package com.founder.cdk;
import Java.io.File;
import Java.io.FileNotFoundException;
import Java.io.FileReader;
import Java.util.ArrayList;
import Java.util.List;
import org.openscience.cdk.ChemFile;
import org.openscience.cdk.ChemObject;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.smiles.smarts.SMARTSQueryTool;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
public class SMARTSQueryToolTest {
static SMARTSQueryTool sqt;static {
try {
sqt = new SMARTSQueryTool("c2ccc1ccccc1c2");
} catch (CDKException e) {
}
}
/**
* @param args
*/
public static void main(String[] args) {
String filename = "H:\\molecules.sdf";
try {
MDLV2000Reader reader = new MDLV2000Reader(new FileReader(new File(filename)));
ChemFile chemFile = (ChemFile) reader.read((ChemObject) new ChemFile());
List<IAtomContainer> containersList = ChemFileManipulator.getAllAtomContainers(chemFile);
List<IAtomContainer> substructureList = new ArrayList<IAtomContainer>();
sqt.setSmarts("c1ccc3c(c1)ccc4c2ccccc2ccc34"); //重新设置匹配的smiles值
boolean matched = false;
for (IAtomContainer molecule : containersList) {
matched = sqt.matches(molecule);
if (matched){
substructureList.add(molecule);
}
}
System.out.println(substructureList.size());
for (IAtomContainer molecule : substructureList) {
System.out.println(molecule.getProperty("ID"));
}
} catch (CDKException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
通过测试, matches方法速度很慢, 一般一个结构需要200ms-1000ms左右.
posted on 2009-10-20 08:33
周锐 阅读(570)
评论(0) 编辑 收藏 所属分类:
Java 、
CDK