How to map your neighborhood or any USA neighborhood
If I am not mistaken, this represents a signficant shift in the privacy
boundaries here in the USA - towards less privacy and more transparency
in our society.
import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.Socket;
/*
Transparent Society Program
by Andrew William Morrow
http://home.earthlink.net/~amorrow/
This program exists to iterate on http://www.zillow.com/ data records,
(which are indexed by an integer and
number in the tens of millions of American homes)
and do a reverse lookup via http://www.addresses.com/ on
the occupant and phone number of that home
and then create a tile on that location at WikiMapia,
http://www.wikimapia.org/
To use, you should visit zillow.com and find a home that interests you.
Look at its zillow number in the URL and then,
if you want to make just it, then pick a bunch size of just 1.
If you want the twenty houses around it, then subtract ten from
the zillow number and pick a bunch size of 20.
This program is intended to shift
the arbitrary boundaries of privacy in the USA.
The information about home and occupants will now be browseable.
The informaion is neutral (neither good nor evil),
but now it will be more easy to access,
making the socity of the USA more transparent.
Brief bibliography:
The Transparent Society by David Brin ISBN: 0-201-32802-X
Who Controls the Internet? by Jack Goldsmith ISBN 0-19-515266-2
HP's CoolTown (the apple of Calry Fiorina's eye)
This might provide a more open society for information to support ideas
such as
http://en.wikipedia.org/wiki/Augmented_reality
Welcome to a more transparent society.
*/
public class DoWikiMap {
// This controls the tile size drawn. Units: micro-degree
int tile_size = 70;
// WikiMapia governs how many records you can submit per minute.
// This is the delay (in seconds) to wait after submitting each new
record.
int record_delay = 15;
// This is my account id and encrypted password at WikiMapia
// You can use Ethereal (http://www.ethereal.com/) to figure out what
// to use for these fields for your account
String awm_uid = "9523";
String awm_guestname = "Andrew Morrow";
String awm_pw="ec9087ca14fb31ce71246ca6d149b46b";
// Utility to extract a string delimited by two other strings
public static String scrapeStr(String s, String begin, String end){
int i=s.indexOf(begin);
if(i==(-1)){
// System.out.println(begin+" not found!");
return null;
}
String s2=s.substring(i);
int j=s2.indexOf(end);
if(j==(-1)){
// System.out.println(end+" not found!");
return null;
}
String scrape = s.substring(i+begin.length(),i+j);
// System.out.println("scrape="+scrape);
return scrape;
}
/**
* Utility Converts certain chars to respective strings
*/
public static String convert(String o, char[] cFrom, String[] cTo) {
String r = "";
for (int i = 0; i < o.length(); i++) {
char x = o.charAt(i);
boolean added = false;
for (int j = 0; j < cFrom.length; j++) {
if ((x == cFrom[j]) && (added == false)) {
r = r+cTo[j];
added = true;
}
}
if (added == false) r = r + x;
}
return r;
}
/**
* Makes some adjustments for HTML
*/
public static String htmlize(String o) {
char[] cFrom = { '&', '<', '>' };
String[] cTo = { "&", "<", ">" };
return convert(o,cFrom,cTo);
}
public static String urlize(String o) {
char[] cFrom = { '&', '<', '>', ' ' };
String[] cTo = { "&", "<", ">", "%20" };
return convert(o,cFrom,cTo);
}
// Simple data record of what to transfer to WikiMapia
class myrec {
int zilnum;
int longi;
int lat;
String street_addr;
String specs;
String name;
String phone;
}
// Main routine: parameters are how many zillow entries to process
// and what index to start on
public static void main(String [] args) {
DoWikiMap as = new DoWikiMap ();
if(args.length != 2){
System.out.println(
"Usage: java DoWikiMap bunchSize startZillowNumber");
return;
}
int bunch = Integer.parseInt(args[0]);
int zilnum = Integer.parseInt(args[1]);
System.out.println("bunch="+bunch);
System.out.println("zilnum="+zilnum);
as.doit(bunch, zilnum);
}
// Workhorse routine to iterate over the Zillow entries
void doit(int bunch, int zilstart){
for(int i=zilstart; i < zilstart+bunch ; i++){
System.out.println("trying zil="+i);
myrec m = doit_addr(i);
String upcome = null;
if(m==null){
System.out.println("no address rec!");
}else{
upcome = doit_wm(m);
System.out.println("upcoming="+upcome);
}
// WikiMapia has a governing limit of how many entries any one IP is
allowed
// to create per minute. (three or five or something like that)
if(upcome!=null){
System.out.println("sleep");
try {
Thread.sleep(record_delay*1000);
} catch (InterruptedException e){
System.out.println("Interrupt e="+e);
}
}
}
}
// Given a Zillow number, the the Zillow info and do a reverse lookup
// on the street address.
// This merely scrapes the HTML for the reverse-looked up info
// and stores and returns a record
myrec doit_addr(int zilnum){
// This is a simple GET, no cookies
String resp = null;
String urlStr = "http://www.zillow.com/HomeDetails.htm?zprop="+zilnum;
// System.out.println("urlStr="+urlStr);
try {
InputStream in = (InputStream) new java.net.URL(urlStr).getContent();
// System.out.println("in="+in);
StringBuffer sb = new StringBuffer();
int ch = 0;
while ((ch = in.read()) != -1) {
sb.append((char)ch);
}
resp = sb.toString();
} catch (Exception ex) {
ex.printStackTrace();
}
// This is the home of the mayor of Concord, North Carolina
// <title>Zillow - 684 Wilshire Ave SW, Concord, NC 28027</title>
String full_addr = scrapeStr(resp, "<title>Zillow - ", "</title>");
String specs = scrapeStr(resp,"<span class=\"specs\">","</span>");
if(specs==null){
return null;
}
specs = specs.replace("\n"," ");
specs = specs.replace("\t"," ");
specs = specs.trim();
String lat = scrapeStr(resp,"\"latitude\" : \"" , "\",");
String longi = scrapeStr(resp,"\"longitude\" : \"" , "\",");
lat= lat.substring(0,lat.indexOf('.'))
+ lat.substring(lat.indexOf('.')+1);
longi= longi.substring(0,longi.indexOf('.'))
+ longi.substring(longi.indexOf('.')+1);
// Restore trailling zero: WikiMapia has rigid format
while(lat.length() < 8){
lat = lat + "0";
}
while(longi.length() < 10){
longi = longi + "0";
}
/* If we cannot find the address, then punt for now */
if(full_addr.length() < 10){
return null;
}
// Prepare for the next step
// Clean up the fields
// break out fields of addr and space->plus
// compact the specs
// remove dot from lat/long
// Discard 5 digit ZIP
String addr=full_addr.substring(0,full_addr.length()-6);
String state_code = addr.substring(addr.length()-2);
addr=addr.substring(0,addr.length()-4);
int i4 = addr.indexOf(", ");
String nice_street_addr = addr.substring(0, i4);
String city=addr.substring(i4+2);
// Replace blanks with plus chars
String street_addr= nice_street_addr.replace(' ','+');
/*
System.out.println("String street_addr=\""+nice_street_addr+"\";");
System.out.println("String specs=\""+specs+"\";");
*/
// System.out.println("city="+city);
// System.out.println("state_code="+state_code);
city= city.replace(' ','+');
Properties cooks = new Properties(); // Accumulate my cookies here
String post_url =
"http://reverse-address-lookup.addresses.com/redir.php";
String args3=
"qa="+street_addr
+"&qc="+city
+"&qs="+state_code
+"&SearchP.x=38&SerachP.y=7"
+"&NewSearchFlag=1&ReportType=34"
+"&refer=1271&searchform=name&sid=1&aid=&adword=ADDR%7CCRA.MOD";
String ok_targ = "HTTP/1.1 200 OK\r\n";
String s3 = doReq("POST", post_url, cooks, args3);
// System.out.println("s3="+s3);
String name= scrapeStr( s3,
"<td class=\"F5\" nowrap><b><font color=\"#000000\">" , "</td>");
String phone = null;
if(name != null){
name=name.toLowerCase();
name=name.trim();
StringBuffer sb=new StringBuffer(name);
sb.setCharAt(0, Character.toUpperCase( sb.charAt(0)));
for (int i3=1; i3 < sb.length() ; i3++){
if(sb.charAt(i3) == ' '){
sb.setCharAt(i3+1, Character.toUpperCase( sb.charAt(i3+1)));
}
}
name=new String(sb);
// That is TWO SPACES in their HTML
phone= scrapeStr(s3, "<td><span"+" "+" "+"class=\"F4\" nowrap>",
"</td>");
}
// Note: this program does NOT handle multiple names
// It only gets the first name reported in the HTML
// Properties headProps = new Properties();
// parseHeader(s3,ok_targ, headProps,cooks);
myrec m = new myrec();
m.zilnum = zilnum;
m.longi = Integer.parseInt(longi);
m.lat = Integer.parseInt(lat);
m.street_addr = nice_street_addr;
m.specs= specs;
m.name=name;
m.phone=phone;
return m;
}
// Attempt to submit the info as a new record in WikiMapia
String doit_wm( myrec m){
int zilnum = m.zilnum;
int longi = m.longi;
int lat = m.lat;
String street_addr = m.street_addr;
String specs= m.specs;
String name=m.name;
String phone=m.phone;
Properties cooks = new Properties(); // Accumulate my cookies here
// Use my own account, for now
cooks.put("uid",awm_uid);
cooks.put("guestname", awm_guestname);
cooks.put("pw",awm_pw);
/* I do not yet know what this stuff is exactly */
// Urchin Tracking Module
// http://www.urchin.com/
// http://www.google.com/analytics/
// But it has something ultimately to do with Google AdSense, I think
// It probably refers to the IP address I made the query from
// at the time I was doing the reverse engineering with Ethereal
cooks.put("fp","96e23dd8a6ba85725b561095cc3321ab");
cooks.put("__utmb","213878930");
cooks.put("__utmz","213878930.1166457116.320.279.utmccn=(referal)"
+"|utmcsr=home.earthlink.net|utmcct=/~amorrow/|utmcmd=referral");
String post_url = "http://www.wikimapia.org/save3.php?inf=1";
/* If the person name is good, then use it,
otherwise just use the street address */
String some_name=null;
if(name==null){
some_name=urlize(street_addr);
}else{
some_name=urlize(name);
}
/* "status" is the public/private enum */
// http://reverse-address-lookup.addresses.com/reverse-address.php
// I need to update the urlsize function.
// For now, just pre-urlize these handy links
// The message is quite arbitrary, but the rev-addr URL is the only
constant.
// The rest have parameter so I will just hand-assemble them for now
String args3= "id=0&up=&xy="
+(longi-tile_size)+"x"+(lat+tile_size)+"x"
+(longi+tile_size)+"x"+ (lat-tile_size)+ "&langid=0"
+"&status=2"
+"&form_name="+some_name
+ "&form_description=http%3A%2F%2Fwww.zillow.com%2F"
+"HomeDetails.htm%3Fzprop%3D" + zilnum
+ "%0A" +
"See%20also%20http%3A%2F%2Freverse-address-lookup.addresses.com%2F"
+"reverse-address.php"
+ "%0A" + urlize(street_addr)
+ "%0A" + urlize(specs) ;
System.out.println("name="+name);
if(name!=null){
args3 = args3
+ "%0A" + urlize(name)
+ "%0A" + urlize(phone)+"&form_tabs=&wikipedia=";
}
String s3 = doReq("POST", post_url, cooks, args3);
if(s3 == null){
return null;
}
// The WikiMapia record number returned is another integer key
// It looks like this in the response
// <!-- id:1020282 -->
//
// (ignoring the one in the Javascript jwindow thing)
String wi_num = scrapeStr(s3, "<!-- id:", " -->");
if(wi_num==null){
// If it failed, then show the whole response in hopes that a helpful
// diagnostic is provided
System.out.println("wi_num is null");
System.out.println("s3="+s3);
}else{
System.out.println("wi_num="+wi_num);
}
// String targ = "HTTP/1.1 200 OK\r\n";
// Properties headProps = new Properties();
// parseHeader(s3,targ, headProps3,cooks);
return wi_num;
}
// Parse the HTTP response for header info,
// especially cookies, which we accumulte
void parseHeader(String s1, String targ,
Properties headProps, Properties cooks){
int l1 = targ.length();
if( ! s1.startsWith(targ) ){
System.out.println("Could not find target!");
return;
}
String s3 = s1.substring(targ.length());
// Now get just the header stuff...
int t3 = s3.indexOf("\r\n\r\n");
if( t3 < 0){
System.out.println("no double-line!");
return;
}
String s5 = s3.substring(0,t3);
// Gather the response headers and cookies
// Note: I am note dealing with other repeated headers...
StringTokenizer tok = new StringTokenizer (s5,"\r\n");
while( tok.hasMoreTokens()){
String s7 = tok.nextToken();
int i7 = s7.indexOf(": ");
String name = s7.substring(0,i7);
String value = s7.substring(i7+2);
if(name.equals("Set-Cookie")){
int ic = value.indexOf("=");
String namec = value.substring(0,ic);
String valuec = value.substring(ic+1);
int locSem = valuec.indexOf(";");
if(0<locSem){
valuec=valuec.substring(0,locSem);
}
// System.out.println("namec="+namec);
// System.out.println("valuec="+valuec);
cooks.put(namec,valuec);
}
headProps.put(name,value);
}
}
// Do the HTTP request. This a REALLY primitive connector,
// but it can handle redirect. Only supports HTTP/1.0
String doReq(String cmd, String urlStr,
Properties cooks, String post_args){
String resp = null;
try {
Socket my_socket = null;
InputStream in = null;
OutputStreamWriter out = null;
boolean moreRedir = true;
while(moreRedir){
URL url1 = new URL(urlStr);
int port = url1.getPort();
if(port == (-1)){
if(urlStr.startsWith("http:")){
port = 80;
}
if(urlStr.startsWith("https:")){
port = 443;
}
}
//Note: this should include port, etc.
String host = url1.getHost();
// Note: this does NOT work with HTTP/1.1 ,
// only HTTP/1.0 because it always closes the connection
try {
my_socket = new Socket(host, port);
} catch (java.net.ConnectException e){
e.printStackTrace();
return null;
}
in = my_socket.getInputStream();
out = new OutputStreamWriter(my_socket.getOutputStream());
String path = url1.getPath();
String query = url1.getQuery();
// Note: 1.1 can get a chunked response, which is more complex
String http_10 = "HTTP/1.0";
String http_11 = "HTTP/1.1";
String http_ver = http_10;
String myReq = null;
String allCooks = "Cookie:";
// Semicolons betweens args only: none at the end
boolean firstCook = true;
Enumeration e = cooks.propertyNames() ;
while ( e.hasMoreElements() ) {
String pn = (String) e.nextElement();
String separ = firstCook ? " " : "; ";
firstCook = false;
allCooks = allCooks + separ + pn + "=" + cooks.get(pn);
}
myReq = cmd + " "+path;
if(query != null){
myReq = myReq + "?" + query ;
}
myReq= myReq + " "+http_ver+"\r\n"+
"Host: "+host+"\r\n"+
allCooks + "\r\n" ;
// Note: We do not need the Keep-Alive and Connection headers for
// simple HTTP/1.0 support
if(cmd.equals("GET")){
myReq = myReq + "\r\n";
}
if(cmd.equals("POST")){
myReq = myReq +
"Content-Type: application/x-www-form-urlencoded\r\n" +
"Content-Length: "+ post_args.length() + "\r\n" +
"\r\n" + post_args;
}
// Important
// System.out.println("writing="+myReq);
out.write(myReq,0,myReq.length());
out.flush();
// System.out.println("done write.");
resp = inToOut(in);
moreRedir=resp.startsWith("HTTP/1.1 301 Moved Permanently\r\n");
// System.out.println("moreRedir="+moreRedir);
if(moreRedir){
// System.out.println("resp="+resp);
/* Huge assumption that Location: is followed by "Content-Length: 0" */
int i = resp.indexOf("Location: ");
int j = resp.indexOf("Content-Length: 0");
/* Ultra HACK. Note that I ignore port, etc. Very sloppy. */
cmd = "GET";
urlStr="http://"+host+"/"+resp.substring(i+10, j-2);
// System.out.println("new urlStr="+urlStr);
Properties headProps3 = new Properties();
parseHeader(resp,"HTTP/1.1 301 Moved Permanently\r\n",
headProps3,cooks);
} /* if moreRedir */
} /* while moreRedir */
/* This does not obey keep-alive */
/* This does not yet follow redirect */
out.close();
in.close();
my_socket.close();
} catch (Exception exception) {
exception.printStackTrace();
}
return resp;
}
// Just read the whole response until eof. Support for HTTP/1.0 only
String inToOut(InputStream in) throws IOException{
StringBuffer sb = new StringBuffer() ;
int ch = 0;
while ((ch = in.read()) != -1) {
sb.append((char)ch);
}
return new String(sb);
}
}