近日在使用 DOM4J 的时候,遇到一个问题,现在有两个解决方法,先记下来,以便日后使用。
问题:对一个 XML 文件进行读写操作,但是发现当文件存在的时候,使用DOM4J读进来的时候,生成的 Document 对象会根据 DTD 里的定义,追加了一些 default 属性(实际不需要)。而且在读取的时间被延长。
有一个 XML 文件如下:
xml 代码
- <?xml version="1.0" encoding="UTF-8"?>
- <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
-
- <beans>
- ....
- </beans>
使用 DOM4J 的读取文件的一般性写法:
java 代码
- SAXReader reader = new SAXReader(false);
- document = reader.read(file);
- root = document.getRootElement();
对象 document 里的节点会被自动追加 DTD 里的定义的 default 属性,只有新增加的节点不受影响,如下。而且,如果文件的操作时间被延长。
xml 代码
- <?xml version="1.0" encoding="UTF-8"?>
- <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
-
- <beans default-lazy-init="false" default-autowire="no" default-dependency-check="none">
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase" lazy-init="default" autowire="default" dependency-check="default"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase"/>
- <bean id="OperateXmlByDom4jTestCase" class="test.OperateXmlByDom4jTestCase"/>
- </beans>
为了不让生成我们不需要的 default 属性和缩短文件的操作时间,我们可以调用 SAXReader.setFeature 方法来改变 DOM4J 的行为,片断代码如下:
java 代码
- // http://apache.org/xml/features/nonvalidating/load-external-dtd"
- saxReader.setFeature(
- Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE,
- false);
关于更多的 Feature 请参考 com.sun.org.apache.xerces.internal.impl.Constants
以下为片断代码:
java 代码
- // xerces features
-
- /** Xerces features prefix ("http://apache.org/xml/features/"). */
- public static final String XERCES_FEATURE_PREFIX = "http://apache.org/xml/features/";
-
- /** Schema validation feature ("validation/schema"). */
- public static final String SCHEMA_VALIDATION_FEATURE = "validation/schema";
-
- /** Expose schema normalized values */
- public static final String SCHEMA_NORMALIZED_VALUE = "validation/schema/normalized-value";
-
- /** Send schema default value via characters() */
- public static final String SCHEMA_ELEMENT_DEFAULT = "validation/schema/element-default";
-
- /** Schema full constraint checking ("validation/schema-full-checking"). */
- public static final String SCHEMA_FULL_CHECKING = "validation/schema-full-checking";
-
- /** Augment Post-Schema-Validation-Infoset */
- public static final String SCHEMA_AUGMENT_PSVI = "validation/schema/augment-psvi";
-
- /** Dynamic validation feature ("validation/dynamic"). */
- public static final String DYNAMIC_VALIDATION_FEATURE = "validation/dynamic";
-
- /** Warn on duplicate attribute declaration feature ("validation/warn-on-duplicate-attdef"). */
- public static final String WARN_ON_DUPLICATE_ATTDEF_FEATURE = "validation/warn-on-duplicate-attdef";
-
- /** Warn on undeclared element feature ("validation/warn-on-undeclared-elemdef"). */
- public static final String WARN_ON_UNDECLARED_ELEMDEF_FEATURE = "validation/warn-on-undeclared-elemdef";
-
- /** Warn on duplicate entity declaration feature ("warn-on-duplicate-entitydef"). */
- public static final String WARN_ON_DUPLICATE_ENTITYDEF_FEATURE = "warn-on-duplicate-entitydef";
-
- /** Allow Java encoding names feature ("allow-java-encodings"). */
- public static final String ALLOW_JAVA_ENCODINGS_FEATURE = "allow-java-encodings";
-
- /** Disallow DOCTYPE declaration feature ("disallow-doctype-decl"). */
- public static final String DISALLOW_DOCTYPE_DECL_FEATURE = "disallow-doctype-decl";
-
- /** Continue after fatal error feature ("continue-after-fatal-error"). */
- public static final String CONTINUE_AFTER_FATAL_ERROR_FEATURE = "continue-after-fatal-error";
-
- /** Load dtd grammar when nonvalidating feature ("nonvalidating/load-dtd-grammar"). */
- public static final String LOAD_DTD_GRAMMAR_FEATURE = "nonvalidating/load-dtd-grammar";
-
- /** Load external dtd when nonvalidating feature ("nonvalidating/load-external-dtd"). */
- public static final String LOAD_EXTERNAL_DTD_FEATURE = "nonvalidating/load-external-dtd";
-
- /** Defer node expansion feature ("dom/defer-node-expansion"). */
- public static final String DEFER_NODE_EXPANSION_FEATURE = "dom/defer-node-expansion";
-
- /** Create entity reference nodes feature ("dom/create-entity-ref-nodes"). */
- public static final String CREATE_ENTITY_REF_NODES_FEATURE = "dom/create-entity-ref-nodes";
-
- /** Include ignorable whitespace feature ("dom/include-ignorable-whitespace"). */
- public static final String INCLUDE_IGNORABLE_WHITESPACE = "dom/include-ignorable-whitespace";
-
- /** Default attribute values feature ("validation/default-attribute-values"). */
- public static final String DEFAULT_ATTRIBUTE_VALUES_FEATURE = "validation/default-attribute-values";
-
- /** Validate content models feature ("validation/validate-content-models"). */
- public static final String VALIDATE_CONTENT_MODELS_FEATURE = "validation/validate-content-models";
-
- /** Validate datatypes feature ("validation/validate-datatypes"). */
- public static final String VALIDATE_DATATYPES_FEATURE = "validation/validate-datatypes";
-
- /** Notify character references feature (scanner/notify-char-refs"). */
- public static final String NOTIFY_CHAR_REFS_FEATURE = "scanner/notify-char-refs";
-
- /** Notify built-in (&, etc.) references feature (scanner/notify-builtin-refs"). */
- public static final String NOTIFY_BUILTIN_REFS_FEATURE = "scanner/notify-builtin-refs";
-
- /** Standard URI conformant feature ("standard-uri-conformant"). */
- public static final String STANDARD_URI_CONFORMANT_FEATURE = "standard-uri-conformant";
-
- /** Internal performance related feature:
- * false - the parser settings (features/properties) have not changed between 2 parses
- * true - the parser settings have changed between 2 parses
- * NOTE: this feature should only be set by the parser configuration.
- */
- public static final String PARSER_SETTINGS = "internal/parser-settings";
-
- /** Feature to make XML Processor XInclude Aware */
- public static final String XINCLUDE_AWARE = "xinclude-aware";
-
- /** Ignore xsi:schemaLocation and xsi:noNamespaceSchemaLocation. */
- public static final String IGNORE_SCHEMA_LOCATION_HINTS = "validation/schema/ignore-schema-location-hints";
-
- /**
- * When true, the schema processor will change characters events
- * to ignorableWhitespaces events, when characters are expected to
- * only contain ignorable whitespaces.
- */
- public static final String CHANGE_IGNORABLE_CHARACTERS_INTO_IGNORABLE_WHITESPACES =
- "validation/change-ignorable-characters-into-ignorable-whitespaces";
除通过上面的 SAXReader.setFeature 文件之外,我们还可以通过自己的 EntityResolver 来解决这个问题。
PS:这个方法是从凝香小筑的BLOG里的一编主题是:Do not resolve DTD files when dom4j read xml file 的文章里看到的。地址:http://blog.csdn.net/lessoft/archive/2007/06/20/1659579.aspx
代码片断如下:
java 代码
- saxReader.setEntityResolver(new EntityResolver() {
-
- String emptyDtd = "";
-
- ByteArrayInputStream bytels = new ByteArrayInputStream(emptyDtd.getBytes());
-
- public InputSource resolveEntity(String publicId, String systemId)
- throws SAXException, IOException {
- return new InputSource(bytels);
- }
- });
完整的代码如下:
java 代码
- package test;
-
- import java.io.BufferedWriter;
- import java.io.ByteArrayInputStream;
- import java.io.File;
- import java.io.FileWriter;
- import java.io.IOException;
-
- import junit.framework.TestCase;
-
- import org.dom4j.Document;
- import org.dom4j.DocumentHelper;
- import org.dom4j.Element;
- import org.dom4j.io.OutputFormat;
- import org.dom4j.io.SAXReader;
- import org.dom4j.io.XMLWriter;
- import org.dom4j.tree.DefaultDocumentType;
- import org.xml.sax.EntityResolver;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
-
- import com.sun.org.apache.xerces.internal.impl.Constants;
-
- /**
- * A test case class for read and writer a xml file by Dom4j.
- * @author X.F.Yang [2007/07/03]
- * @version 1.0
- */
- public class OperateXmlByDom4jTestCase extends TestCase {
-
- /**
- * Default way to read and writer a xml file by Dom4j.
- * @throws Exception
- */
- public void testWriteXml() throws Exception {
- XmlFileOperation operation = new XmlFileOperation();
- operation.writer(new SAXReaderWrapper() {
- public void operation(SAXReader saxReader) throws Exception {
- // Nothing to do.
- }
- });
- }
-
- /**
- * Do not resolve DTD files when dom4j read xml file via the set feature.
- * @throws Exception
- */
- public void testWriteXmlSetFeature() throws Exception {
- XmlFileOperation operation = new XmlFileOperation();
- operation.writer(new SAXReaderWrapper() {
- public void operation(SAXReader saxReader) throws Exception {
- // http://apache.org/xml/features/nonvalidating/load-external-dtd"
- saxReader.setFeature(
- Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE,
- false);
- }
- });
- }
-
- /**
- * Do not resolve DTD files when dom4j read xml file via implement {@link EntityResolver}.
- * @throws Exception
- */
- public void testWriteXmlEntityResolver() throws Exception {
- XmlFileOperation operation = new XmlFileOperation();
- operation.writer(new SAXReaderWrapper() {
- public void operation(SAXReader saxReader) throws Exception {
- saxReader.setEntityResolver(new EntityResolver() {
-
- String emptyDtd = "";
-
- ByteArrayInputStream bytels = new ByteArrayInputStream(emptyDtd.getBytes());
-
- public InputSource resolveEntity(String publicId,
- String systemId) throws SAXException, IOException {
- return new InputSource(bytels);
- }
- });
- }
- });
- }
-
- /** */
- protected interface SAXReaderWrapper {
-
- /** operation {@link SAXReader} */
- void operation(SAXReader saxReader) throws Exception;
-
- }
-
- /**
- * when the target file was existed, read and append the new element.
- * else, create a new xml file and add the new element.
- */
- protected class XmlFileOperation {
-
- /** target file */
- private File file;
-
- public XmlFileOperation() {
- // target file
- file = new File("d:""spring.xml");
- }
-
- /**
- * Write xml file
- * @param wrapper
- * @throws Exception
- * @see {@link SAXReaderWrapper}
- */
- public void writer(SAXReaderWrapper wrapper) throws Exception {
- try {
- Document document = null;
- Element root = null;
-
- // read the xml file if target file was existed
- if (file.exists()) {
- SAXReader reader = new SAXReader(false);
-
- wrapper.operation(reader);
-
- document = reader.read(file);
- root = document.getRootElement();
- // if the target file was not existed, create a new one
- } else {
- document = DocumentHelper.createDocument();
- document.setDocType(new DefaultDocumentType("beans",
- "-//SPRING//DTD BEAN//EN",
- "http://www.springframework.org/dtd/spring-beans.dtd"));
- root = document.addElement("beans");
- }
- // create the element under the root element
- root.addElement("bean")
- .addAttribute("id", "OperateXmlByDom4jTestCase")
- .addAttribute("class", "test.OperateXmlByDom4jTestCase");
-
- // writer the document
- writer(document);
- } catch (Exception e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- protected void writer(Document document) throws Exception {
- XMLWriter xmlWriter = null;
- try {
- final OutputFormat format = OutputFormat.createPrettyPrint();
- xmlWriter = new XMLWriter(new BufferedWriter(new FileWriter(file)), format);
- xmlWriter.write(document);
- } finally {
- if (null != xmlWriter) {
- xmlWriter.flush();
- xmlWriter.close();
- }
- }
- }
- }
- }