Notes/Domino HTML parsen
Hello guys,
I want to parse a HTML Frameset (by Java) which is generated by a Lotus
Domino server. When I use a plain text HTML generated by Domino server,
my parser works fine. In contrast, then I try to parse the frameset I
get the following exception:
##########
Exception:
##########
java.io.EOFException
at java.io.DataInputStream.readFully(DataInputStream.java:295)
at java.io.DataInputStream.readUTF(DataInputStream.java:661)
at conparse.main(conparse.java:30)
at conparseTest.testMain(conparseTest.java:17)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:85)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:58)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:60)
at java.lang.reflect.Method.invoke(Method.java:391)
at junit.framework.TestCase.runTest(TestCase.java:154)
at junit.framework.TestCase.runBare(TestCase.java:127)
at junit.framework.TestResult$1.protect(TestResult.java:106)
at junit.framework.TestResult.runProtected(TestResult.java:124)
at junit.framework.TestResult.run(TestResult.java:109)
at junit.framework.TestCase.run(TestCase.java:118)
at junit.framework.TestSuite.runTest(TestSuite.java:208)
at junit.framework.TestSuite.run(TestSuite.java:203)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:478)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:344)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:196)
###########
Code
###########
public class conparse{
conparse(){
}
public void main(){
try {
URL urlobj = new URL("http://dev0004/Test/frame.htm");
HttpURLConnection uc = null;
uc = (HttpURLConnection)urlobj.openConnection();
uc.setUseCaches(false);
DataInputStream is = new DataInputStream(uc.getInputStream());
String str =is.readUTF();
System.out.println(str);
HTMLEditorKit hKit = new HTMLEditorKit();
HTMLDocument hDoc = new HTMLDocument();
hKit.read(is, hDoc, 0);
HTMLDocument.Iterator it = hDoc.getIterator(HTML.Tag.FONT);
AttributeSet attSet = it.getAttributes();
String s = (String)attSet.getAttribute(HTML.Attribute.COLOR);
System.out.println(s);
//System.out.println(attSet.getAttributeCount());
}
catch ( Exception e ) {
e.printStackTrace();
}
}
}
################
Plain Text HTML:
################
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<script language="JavaScript" type="text/javascript">
<!--
document._domino_target = "_self";
function _doClick(v, o, t, h) {
var form = document._ContentRetrival;
if (form.onsubmit) {
var retVal = form.onsubmit();
if (typeof retVal == "boolean" && retVal == false)
return false;
}
var target = document._domino_target;
if (o.href != null) {
if (o.target != null)
target = o.target;
} else {
if (t != null)
target = t;
}
form.target = target;
form.__Click.value = v;
if (h != null)
form.action += h;
form.submit();
return false;
}
// -->
</script>
</head>
<body text="#000000" bgcolor="#FFFFFF">
<form method="post"
action="/Test/HET/PerformanceTestDB.nsf/ContentRetrival?OpenForm&Seq=1"
name="_ContentRetrival">
<input type="hidden" name="__Click" value="0"><b>Test Page for Content
Retrival</b><br>
<br>
<br>
<font color="#FF0000">Hello, here is some text without a meaning. This
text should show, how a printed</font><br>
</form>
</body>
</html>
#######################################
Frameset HTML (received by Internet Explorer from the Domino Server)
#######################################
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN">
<html>
<head>
<script language="JavaScript" type="text/javascript">
<!--
self._domino_name = "_Main";
// -->
</script>
</head>
<frameset cols="45%,55%">
<frame
src="/Test/HET/PerformanceTestDB.nsf/ContentDeliveryMeasurement?OpenForm">
<frameset rows="1*,1*">
<frame src="/Test/HET/PerformanceTestDB.nsf/DocsInserted?OpenView">
<frame name="docPreviewFrame"
src="/Test/HET/PerformanceTestDB.nsf/select?OpenForm">
</frameset>
</frameset>
</html>
Regards,
Marcus