Re: Parsing XML with Dom
Arne VajhHj wrote:
nuthinking@googlemail.com wrote:
The problem seemed it is that setIgnoringElementContentWhitespace
works if the xml refers to either to xsd or dtd.
To some extent that I think that makes sense.
Only with a DTD or XSD is it possible to identify something
as content whitespace.
Try look at the attached example.
Arne
====================================
package september;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;
import org.xml.sax.InputSource;
public class XMLandWS {
public static void parse(String xml) throws Exception {
System.out.print(xml);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setIgnoringElementContentWhitespace(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new InputSource(new StringReader(xml)));
TreeWalker walk = ((DocumentTraversal)
doc).createTreeWalker(doc.getDocumentElement(), NodeFilter.SHOW_TEXT,
null, false);
Node n;
while ((n = walk.nextNode()) != null) {
System.out.println("=" + n.getNodeValue().replace("\n",
"\\n").replace(" ", "_"));
}
}
public static void main(String[] args) throws Exception {
parse("<all>\n" +
" <one>A</one>\n" +
" <one>BB</one>\n" +
" <one>CCC</one>\n" +
"</all>\n");
parse("<!DOCTYPE all [\n" +
"<!ELEMENT all (one)*>\n" +
"<!ELEMENT one (#PCDATA)>\n" +
"]>\n" +
"<all>\n" +
" <one>A</one>\n" +
" <one>BB</one>\n" +
" <one>CCC</one>\n" +
"</all>\n");
parse("<!DOCTYPE all [\n" +
"<!ELEMENT all (#PCDATA|one)*>\n" +
"<!ELEMENT one (#PCDATA)>\n" +
"]>\n" +
"<all>\n" +
" <one>A</one>\n" +
" <one>BB</one>\n" +
" <one>CCC</one>\n" +
"</all>\n");
}
}