The URL Fetch Java API
Hi all, i am trying the code given in the
http://code.google.com/appengine/docs/java/urlfetch/overview.html as
following
import java.net.MalformedURLException;
import java.net.URL;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
// ...
try {
URL url = new URL("http://www.example.com/atom.xml");
BufferedReader reader = new BufferedReader(new
InputStreamReader(url.openStream()));
String line;
while ((line = reader.readLine()) != null) {
// ...
}
reader.close();
} catch (MalformedURLException e) {
// ...
} catch (IOException e) {
// ...
}
============================================================
May i know why the input stream retrieved from the url is different
with the 'view source' in a browser. For example, when viewing the
source of www.google.com in a browser,
<!doctype html><html><head><meta http-equiv="content-type"
content="text/html; charset=UTF-8"><title>Google</
title><script>window.google=
{kEI:"R79eSt_sGYyQ8gTNgujVDQ",kEXPI:"17259,18167,19771,20760,21106",kCSIE:"17259,18167,19771,20760,21106",kCSI:
{e:"17259,18167,19771,20760,21106",ei:"R79eSt_sGYyQ8gTNgujVDQ"},kHL:"en"};
window.google.sn="webhp";window.google.timers={load:{t:{start:(new
Date).getTime()}}};try
{window.google.pt=window.gtbExternal&&window.gtbExternal.pageT()||
window.external&&window.external.pageT}catch(b){}
window.google.jsrt_kill=1;
var _gjwl=location;function _gjuc(){var b=_gjwl.href.indexOf("#");if
(b>=0){var a=_gjwl.href.substring(b+1);if(/(^|&)q=/.test(a)&&a.indexOf
("#")==-1&&!/(^|&)cad=h($|&)/.test(a)){_gjwl.replace("/
search?"+a.replace(/(^|&)fp=[^&]*/g,"")+"&cad=h");return 1}}return 0}
function _gjp(){!(window._gjwl.hash&&window._gjuc())&&setTimeout(_gjp,
500)};
window._gjp && _gjp()</script><style>td{line-height:.8em;}.gac_m td
{line-height:17px;}form{margin-bottom:20px;}body,td,a,p,.h{font-
family:arial,sans-serif}.h{color:#36c}.q{color:#00c}.ts td{padding:
0}.ts{border-collapse:collapse}#gbar{height:22px}.gbh,.gbd{border-top:
1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:
24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:
24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-
color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:
7px !important;text-align:right}#gbar,#guser{font-size:13px;padding-
top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.
5em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.
2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-
decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</
style><script>google.y={};google.x=function(e,g){google.y[e.id]=
[e,g];return false};window.clk=function(b,c,d,e,f,g,h){if
(document.images){var a=encodeURIComponent||escape;(new Image).src=["/
url?sa=T",c?"&oi="+a(c):"",d?"&cad="+a(d):"","&ct=",a
(e||"res"),"&cd=",a(f),b?"&url="+a(b.replace(/#.*/,"")).replace(/\+/
g,"%2B"):"","&ei=","R79eSt_sGYyQ8gTNgujVDQ",g].join("")}return true};
window.gbar={qs:function(){},tg:function(e){var o={id:'gbar'};for(i in
e)o[i]=e[i];google.x(o,function(){gbar.tg(o)})}};</script></head><body
bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000
onload="document.f.q.focus();if(document.images)new Image().src='/
images/nav_logo6.png'" topmargin=3 marginheight=3><textarea id=csi
style=display:none></textarea><div id=gbar><nobr><b class=gb1>Web</b>
<a href="http://images.google.com.my/imghp?hl=en&tab=wi"
onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://
news.google.com.my/nwshp?hl=en&tab=wn" onclick=gbar.qs(this)
class=gb1>News</a> <a href="http://groups.google.com.my/grphp?
hl=en&tab=wg" onclick=gbar.qs(this) class=gb1>Groups</a> <a
href="http://books.google.com.my/bkshp?hl=en&tab=wp" onclick=gbar.qs
(this) class=gb1>Books</a> <a href="http://scholar.google.com.my/schhp?
hl=en&tab=ws" onclick=gbar.qs(this) class=gb1>Scholar</a> <a
href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a>
<a href="http://www.google.com.my/intl/en/options/" onclick="this.blur
();gbar.tg(event);return !1" aria-haspopup=true class=gb3><u>more</u>
<small>▼</small></a><div id=gbi><a href="http://
blogsearch.google.com.my/?hl=en&tab=wb" onclick=gbar.qs(this)
class=gb2>Blogs</a> <a href="http://translate.google.com.my/?
hl=en&tab=wT" onclick=gbar.qs(this) class=gb2>Translate</a> <div
class=gb2><div class=gbd></div></div><a href="http://www.google.com/
calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://
docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://
www.google.com.my/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a
href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div
class=gb2><div class=gbd></div></div><a href="http://www.google.com.my/
intl/en/options/" class=gb2>even more »</a> </div></nobr></
div><div id=guser width=100%><nobr><b>choonching5u@gmail.com</b> | <a
href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com.my/ig%3Fhl%3Den
%26source%3Diglk&usg=AFQjCNHO3HnUsn1ir-YTXQz74XNTptRnqg">iGoogle</a> |
<a href="https://www.google.com/accounts/ManageAccount">My Account</a>
| <a href="/accounts/ClearSID?continue=http://www.google.com/accounts/
Logout%3Fcontinue%3Dhttp://www.google.com.my/">Sign out</a></nobr></
div><div class=gbh style=left:0></div><div class=gbh style=right:0></
div><center><br clear=all id=lgpd><div align=left style="background:url
(/intl/en_com/images/logo_plain.png) no-repeat;height:110px;width:
276px" title="Google" id=logo onload="window.lol&&lol()"><div nowrap
style="color:#666;font-size:16px;font-weight:bold;left:
208px;position:relative;top:78px">Malaysia</div></div><br><form
action="/search" name=f><table cellpadding=0 cellspacing=0><tr
valign=top><td width=25%> </td><td align=center nowrap><input
name=hl type=hidden value=en><input autocomplete="off" maxlength 48
name=q size=55 title="Google Search" value=""><br><input name=btnG
type=submit value="Google Search"><input name=btnI type=submit
value="I'm Feeling Lucky"></td><td nowrap width=25% align=left><font
size=-2> <a href=/advanced_search?hl=en>Advanced Search</
a><br> <a href=/preferences?hl=en>Preferences</
a><br> <a href=/language_tools?hl=en>Language Tools</a></
font></td></tr><tr><td align=center colspan=3><font size=-1><span
style="text-align:left">Search: <input id=all type=radio name=meta
value="" checked><label for=all> the web </label> <input id=cty
type=radio name=meta value="cr=countryMY"><label for=cty> pages from
Malaysia </label> </span></font></td></tr></table></form><br><font
size=-1>Google.com.my offered in: <a href="http://www.google.com.my/
setprefs?sig=0_vzeTBE_Ey-H44TH3lz1V0k0f70Q=&hl=ms">Bahasa Malaysia</a>
</font><br><br><br><font size=-1><a href="/intl/en/
ads/">Advertising Programs</a> - <a href="/intl/en/
about.html">About Google</a> - <a href=http://www.google.com/ncr>Go to
Google.com</a></font><p><font size=-2>©2009 - <a href="/intl/en/
privacy.html">Privacy</a></font></p></center><div id=xjsd></div><div
id=xjsi><script>if(google.y)google.y.first=[];if(google.y)
google.y.first=[];google.dstr=[];google.rein=[];window.setTimeout
(function(){var a=document.createElement("script");a.src="/extern_js/f/
CgJlbhICbXkrMAo4GywrMA44BSwrMBY4DiwrMBc4AywrMBg4BCwrMBk4BCwrMCE4IEABLCswJTjJiAEsKzAmOAUsKzAnOAIs/
sUWFd4CAkw8.js";(document.getElementById("xjsd")||
document.body).appendChild(a)},0);
;google.y.first.push(function(){google.ac.i
(document.f,document.f.q,'','')});google.xjs&&google.j&&google.j.xi&&google.j.xi
()</script></div><script>(function(){
function a(){google.timers.load.t.ol=(new Date).getTime
();google.report&&google.report(google.timers.load,google.kCSI)}if
(window.addEventListener)window.addEventListener("load",a,false);else
if(window.attachEvent)window.attachEvent
("onload",a);google.timers.load.t.prt=(new Date).getTime();
})();
</script>
============================================================
is different with output of the java.net api,
<!doctype html><html><head><meta http-equiv="content-type"
content="text/html; charset=ISO-8859-1"><title>Google</
title><script>window.google=
{kEI:"LZdeSpfvBo_u9gSjjK3aDQ",kEXPI:"17259,20760,21105",kCSIE:"17259,20760,21105",kCSI:
{e:"17259,20760,21105",ei:"LZdeSpfvBo_u9gSjjK3aDQ"},kHL:"en"};
window.google.sn="webhp";window.google.timers={load:{t:{start:(new
Date).getTime()}}};try
{window.google.pt=window.gtbExternal&&window.gtbExternal.pageT()||
window.external&&window.external.pageT}catch(b){}
window.google.jsrt_kill=1;
var _gjwl=location;function _gjuc(){var e=_gjwl.href.indexOf("#");if
(e>=0){var a=_gjwl.href.substring(e);if(a.indexOf("&q=")>0||a.indexOf
("#q=")>=0){a=a.substring(1);if(a.indexOf("#")==-1){for(var
c=0;c<a.length;){var d=c;if(a.charAt(d)=="&")++d;var b=a.indexOf
("&",d);if(b==-1)b=a.length;var f=a.substring(d,b);if(f.indexOf("fp=")
==0){a=a.substring(0,c)+a.substring(b,a.length);b=c}else if(f=="cad=h")
return 0;c=b}_gjwl.href="/search?"+a+"&cad=h";return 1}}}return 0}
function _gjp(){!(window._gjwl.hash&&
window._gjuc())&&setTimeout(_gjp,500)};
window._gjp && _gjp()</script><style>td{line-height:.8em;}.gac_m td
{line-height:17px;}form{margin-bottom:20px;}body,td,a,p,.h{font-
family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts
td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px}.gbh,.gbd
{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:
0;position:absolute;top:24px;width:100%}#guser{padding-bottom:7px !
important;text-align:right}#gbar,#guser{font-size:13px;padding-top:
1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.
5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb3{color:#00c !
important}.gb3{text-decoration:none}</style><script>google.y=
{};google.x=function(e,g){google.y[e.id]=[e,g];return false};</
script></head><body bgcolor=#ffffff text=#000000 link=#0000cc
vlink=#551a8b alink=#ff0000 onload="document.f.q.focus();if
(document.images)new Image().src='/images/nav_logo6.png'" topmargin=3
marginheight=3><textarea id=csi style=display:none></textarea><div
id=gbar><nobr><b class=gb1>Web</b> <a href="http://
images.google.com.my/imghp?hl=en&tab=wi" class=gb1>Images</a> <a
href="http://news.google.com.my/nwshp?hl=en&tab=wn" class=gb1>News</a>
<a href="http://groups.google.com.my/grphp?hl=en&tab=wg"
class=gb1>Groups</a> <a href="http://books.google.com.my/bkshp?
hl=en&tab=wp" class=gb1>Books</a> <a href="http://
scholar.google.com.my/schhp?hl=en&tab=ws" class=gb1>Scholar</a> <a
href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a>
<a href="http://www.google.com.my/intl/en/options/" class=gb3><u>more</
u> »</a></nobr></div><div id=guser width=100%><nobr><a href="/
url?sa=p&pref=ig&pval=3&q=http://www.google.com.my/ig%3Fhl%3Den
%26source%3Diglk&usg=AFQjCNHO3HnUsn1ir-YTXQz74XNTptRnqg">iGoogle</a> |
<a href="https://www.google.com/accounts/Login?hl=en&continue=http://
www.google.com.my/">Sign in</a></nobr></div><div class=gbh style=left:
0></div><div class=gbh style=right:0></div><center><br clear=all
id=lgpd><table cellpadding=0 cellspacing=0 border=0><tr><td
align=right valign=bottom><img src=images/hp0.gif width=158 height=78
alt="Google"></td><td valign=bottom><img src=images/hp1.gif width=50
height=78 alt=""></td><td valign=bottom><img src=images/hp2.gif
width=68 height=78 alt=""></td></tr><tr><td class=h align=right
valign=top><b></b></td><td valign=top><img src=images/hp3.gif width=50
height=32 alt=""></td><td valign=top class=h><font color=#666666
style=font-size:16px><b>Malaysia</b></font></td></tr></table><br><form
action="/search" name=f><table cellpadding=0 cellspacing=0><tr
valign=top><td width=25%> </td><td align=center nowrap><input
name=hl type=hidden value=en><input type=hidden name=ie
value="ISO-8859-1"><input autocomplete="off" maxlength 48 name=q
size=55 title="Google Search" value=""><br><input name=btnG
type=submit value="Google Search"><input name=btnI type=submit
value="I'm Feeling Lucky"></td><td nowrap width=25% align=left><font
size=-2> <a href=/advanced_search?hl=en>Advanced Search</
a><br> <a href=/preferences?hl=en>Preferences</
a><br> <a href=/language_tools?hl=en>Language Tools</a></
font></td></tr><tr><td align=center colspan=3><font size=-1><span
style="text-align:left">Search: <input id=all type=radio name=meta
value="" checked><label for=all> the web </label> <input id=cty
type=radio name=meta value="cr=countryMY"><label for=cty> pages from
Malaysia </label> </span></font></td></tr></table></form><br><font
size=-1><a href="/aclk?sa=L&ai=Cz04qt5JeSpTwI4O28gTa7dG0Aa-
Nq4YBgYPKxQzB2ZzZExABIMFUUOjw4qf8_____wFgywOqBAlP0GMDDxhI6IQ&num=1&sig=AGiWqtzH37K0ylScckK_SntwmNNrVEVATQ&q=http://
www.google.com.my/help/maps/favoriteplaces/">Explore the world</a>.
Local experts share their favourite places with you.</
font><br><br><font size=-1>Google.com.my offered in: <a href="http://
www.google.com.my/setprefs?sig=0_-feg0NlBlyb9ha4BXqg1AJVEzEQ=&hl=ms">Bahasa
Malaysia</a> </font><br><br><br><font size=-1><a href="/intl/en/
ads/">Advertising Programs</a> - <a href="/intl/en/
about.html">About Google</a> - <a href=http://www.google.com/ncr>Go to
Google.com</a></font><p><font size=-2>©2009 - <a href="/intl/en/
privacy.html">Privacy</a></font></p></center><div id=xjsd></div><div
id=xjsi><script>if(google.y)google.y.first=[];if(google.y)
google.y.first=[];google.dstr=[];google.rein=[];window.setTimeout
(function(){var a=document.createElement("script");a.src="/extern_js/f/
CgJlbhICbXkgACswCjgbLCswDjgFLCswGDgELCswJTjJiAEsKzAmOAUsKzAnOAIs/
bsJbf7z31AQ.js";(document.getElementById("xjsd")||
document.body).appendChild(a)},0);
;google.y.first.push(function(){google.ac.i
(document.f,document.f.q,'','')});google.xjs&&google.j&&google.j.xi&&google.j.xi
()</script></div><script>(function(){
function a(){google.timers.load.t.ol=(new Date).getTime
();google.report&&google.report(google.timers.load,google.kCSI)}if
(window.addEventListener)window.addEventListener("load",a,false);else
if(window.attachEvent)window.attachEvent
("onload",a);google.timers.load.t.prt=(new Date).getTime();
})();
</script>
============================================================
for example, the charset, function _gjuc(){var b != function _gjuc()
{var e, etc. Sometime, the code produced by java.net api is fewer than
the original one, for example the output of the java.net api, www.fsksm.utm.my,
<html>
<head>
<title>FSKSM Main Frame</title>
<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
</head>
<frameset rows="0,100" frameborder="NO" border="0" framespacing="0">
<frame name="topFrame" scrolling="NO" noresize src="">
<frame name="mainFrame" src="http://web.utm.my/fsksm">
</frameset><noframes></noframes>
</html>
============================================================
I need the exactly information as produced in the view source of
browser for some analysis. But i failed to find out the reason why it
happened. Hope someone can help. Thanks in advance.