Re: Get "java.lang.OutOfMemoryError" when Parsing an XML useing DOM
On Mar 24, 11:31 pm, Lew <l...@nospam.lewscanon.com> wrote:
"NeoGeoSNK" <ny1...@gmail.com> wrote:
I just uesed the SAX to rewrite the code, and the performance
increased a lot,To my surprise, the DOM parsing the XML will consume
more than 6 hours, but the SAX take 6 seconds only:),
Andrew Thompson wrote:
Hmm... That is quite an impressive difference,
isn't it? Lew's estimate was not far off (I did
not comment at the time - but I really thought
his statement of '2 hour -> 1 to 2 seconds' was
unrealistic!).
Oh, ye of little faith! :-)
It would've been fine with me if I were wrong - I have been proven wrong in
this forum several times before. I just know how fast a good SAX
implementation can be, went out on a limb and was right this time.
I wonder if there weren't a particular problem with the DOM implementation,
though. Others in this thread have had better success with a DOM approach than
the OP did.
-- Lew
Thanks Lew
I pasted my source code below,maybe you can point out some problems of
my DOM implementation when you free:)
//The Set parsing(String filename) is implemented by DOM
//The Set parsing(String filename, boolean sax) is implemented by SAX
import java.io.*;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import java.util.*;
import javax.xml.xpath.*;
import org.xml.sax.helpers.*;
/**
* parsing a XML format log file and retrieval all subscribers info.
* @author yning
*
*/
class SAXhandler extends DefaultHandler{
public SAXhandler(Set subscribers){
this.subscribers = subscribers;
}
int ing;
int ed;
boolean inasub = false;
boolean callingflag = false;
boolean calledflag = false;
boolean lrnflag = false;
boolean dirflag = false;
Set subscribers;
SubInfo subscriber;
public void startElement(String namespaceURL, String lname, String
qname, Attributes attr){
if(qname.equals("string")){
//System.out.println("Sax parser = " + qname);
//System.out.println("attr = " + attr.getValue(0));
String value = attr.getValue(0);
if(value.equals("Sub_OAM_DirNumber")){
subscriber = new SubInfo();
dirflag = true;
}else{
if(value.equals("create")){
subscriber.setModifier("create");
}else{
if(value.equals("modify")){
subscriber.setModifier("modify");
}else{
if(value.equals("delete")){
subscriber.setModifier("delete");
}else{
if(value.trim().matches("dirNumberId.*")){
//System.out.println("dirNumberId = " +
value);
String dirnumber =
value.substring(value.indexOf("dirNumberId=") + 12,
value.indexOf(",sHLRSubsOrganizationId"));
String ndc =
value.substring(value.indexOf("nDCId=") + 6,
value.indexOf(",managedElementId=SHLR"));
// System.out.println("dirnumber=" +
dirnumber + ndc);
subscriber.setNDCId(ndc);
subscriber.setdirNumberId(dirnumber);
}else{
if(value.equals("callingList")){
callingflag = true;
}else{
if(callingflag == true){
if(value.equals("NULL"))
subscriber.removeCallingList();
else
subscriber.addCallingList(value);
// System.out.println("callingService = " +
value.trim());
//System.out.println("ing = " + ing++);
callingflag = false;
}else{
if(value.equals("calledList")){
calledflag = true;
}else{
if(calledflag == true){
if(value.equals("NULL"))
subscriber.removeCalledList();
else
subscriber.addCalledList(value);
// System.out.println("calledService = " +
value.trim());
// System.out.println("ed = " + ed++);
calledflag = false;
}else{
if(value.equals("lRNumberId")){
lrnflag = true;
}else{
if(lrnflag == true){
// System.out.println("lrnnumber = " + value);
subscriber.setlrnNumberId(value);
lrnflag = false;
}
}
}
}
}
}
}
}
}
}
}
}
}
public void endElement(String uri, String lname, String qname){
if(qname.equals("record") && dirflag == true){
subscribers.add(subscriber);
dirflag = false;
}
}
}
public class ParsingLog {
public Set parsing(String filename, boolean sax)throws Exception{
Set subset = new LinkedHashSet();
File f = new File(filename);
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser paser = factory.newSAXParser();
SAXhandler handler = new SAXhandler(subset);
paser.parse(f, handler);
return handler.subscribers;
}
public Set parsing(String filename) throws Exception{
Set subset = new LinkedHashSet();
File f = new File(filename);
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(f);
Element root = doc.getDocumentElement();
XPathFactory xpfactory = XPathFactory.newInstance();
XPath path = xpfactory.newXPath();
NodeList recoredlist = (NodeList)path.evaluate("/journal/record",
doc, XPathConstants.NODESET);
// System.out.println("frameIdlist.getLength()= " +
recoredlist.getLength());
//enumerate all record in a log
for(int i = 0; i < recoredlist.getLength(); i ++){
// System.out.println("recoredlist = " + recoredlist.item(i));
Node record = recoredlist.item(i);
Element recordelement = (Element)record;
//System.out.println(recordelement.getTagName());
//get operat type
String BEtype = (String)path.evaluate("header/header_generic/domain/
@value", recordelement);
// System.out.println("operation type = " + BEtype);
if(!BEtype.equals("SHLR::Subscription"))
continue;
SubInfo subscriber = new SubInfo();
NodeList framelist = (NodeList)path.evaluate("body/frame",
recordelement, XPathConstants.NODESET);
// System.out.println("framelist = " + framelist.getLength());
//enumerate frame list in a record
for(int j = 0; j < framelist.getLength(); j++){
// System.out.println("frame = " + framelist.item(j));
NodeList attriblist = (NodeList)path.evaluate("attribute/
attribute_value/string/@value", framelist.item(j),
XPathConstants.NODESET);
for(int k = 0; k < attriblist.getLength(); k++){
//System.out.println(attriblist.item(k));
//System.out.println(attriblist.item(k).getClass());
Node attribute = attriblist.item(k);
String value = attribute.getNodeValue();
//String value = att.getAttribute("Value");
// System.out.println("Value = " + value);
if(value.equals("create")){
subscriber.setModifier("create");
}else{
if(value.equals("modify")){
subscriber.setModifier("modify");
}else{
if(value.equals("delete")){
subscriber.setModifier("delete");
}else{
if(value.trim().matches("dirNumberId.*")){
//System.out.println("dirNumberId = " +
value);
String dirnumber =
value.substring(value.indexOf("dirNumberId=") + 12,
value.indexOf(",sHLRSubsOrganizationId"));
String ndc =
value.substring(value.indexOf("nDCId=") + 6,
value.indexOf(",managedElementId=SHLR"));
// System.out.println("dirnumber=" +
dirnumber + ndc);
subscriber.setNDCId(ndc);
subscriber.setdirNumberId(dirnumber);
}else{
if(value.equals("calledList")){
Node calledattr = attriblist.item(k + 1);
String calledvalue =
calledattr.getNodeValue();
// System.out.println("calledList = " +
calledvalue);
if(calledvalue.equals("NULL"))
subscriber.removeCalledList();
else
subscriber.addCalledList(calledvalue);
}else{
if(value.equals("callingList")){
Node callingattr = attriblist.item(k + 1);
String callingvalue =
callingattr.getNodeValue();
// System.out.println("callingList = " +
callingvalue);
if(callingvalue.equals("NULL"))
subscriber.removeCallingList();
else
subscriber.addCallingList(callingvalue);
}else{
if(value.equals("lRNumberId")){
Node lrnattr = attriblist.item(k + 1);
String lrnvalue = lrnattr.getNodeValue();
subscriber.setlrnNumberId(lrnvalue);
}
}
}
}
}
}
}
}
}
if(subscriber != null)
subset.add(subscriber);
}
return subset;
}
public static void main(String[] args)throws Exception{
System.out.println("start job:" + new Date());
ParsingLog a = new ParsingLog();
Set set = a.parsing("log_R2.2.xml");
System.out.println("\n\n\ntotal subscribers = " + set.size());
Iterator iterator = set.iterator();
SubInfo sub;
while(iterator.hasNext()){
System.out.println("subscriber to write");
sub = (SubInfo)iterator.next();
System.out.println("dirnumber:" + sub.getdirNumberId());
System.out.println("Modifier:" + sub.getModifier());
System.out.println("ndc:" + sub.getNDCId());
System.out.println("called list:" + sub.getCalledList());
System.out.println("calling list:" + sub.getCallingList());
System.out.println("lrn:" + sub.getlrnNumberId());
}
System.out.println("job finished:" + new Date());
/*
Set saxset;
SubInfo sub;
ParsingLog b = new ParsingLog();
saxset = b.parsing("log_R2.2.xml", true);
System.out.println("set size = " + saxset.size());
Iterator iterator = saxset.iterator();
while(iterator.hasNext()){
System.out.println("subscriber to write");
sub = (SubInfo)iterator.next();
System.out.println("dirnumber:" + sub.getdirNumberId());
System.out.println("Modifier:" + sub.getModifier());
System.out.println("ndc:" + sub.getNDCId());
System.out.println("called list:" + sub.getCalledList());
System.out.println("calling list:" + sub.getCallingList());
System.out.println("lrn:" + sub.getlrnNumberId());
}
*/
System.out.println("job finished:" + new Date());
//saxset = b.parsing("log_R2.2.xml",true);
//System.out.println("set size = " + saxset.size());
}
}