Re: HashMap vs linear table lookup
On Fri, 15 Feb 2008 20:29:30 GMT, Roedy Green
<see_website@mindprod.com.invalid> wrote, quoted or indirectly quoted
someone who said :
I wrote this code to experiment. Turns out binary search is the pits.
HashSet starts to get better around 11 items.
The sample keys are nicely scrambled to make HashSet happier than it
deserves to be.
package com.mindprod.example;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
/*
* Test relative speed of 3 methods of looking up a key to see if it is
is the list of legal keys.
* HashSet, binary search, linear search for different numbers of keys
n.
* <p/>
* composed with IntelliJ IDEA
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0, 2008-02-15
*/
public class TestFinding
{
// ------------------------------ FIELDS
------------------------------
/**
* lookup the random strings
*/
private static HashSet<String> h;
/**
* random number generator
*/
private final static Random wheel = new Random();
/**
* list of strings we want to look up
*/
private static String[] keys;
// -------------------------- STATIC METHODS
--------------------------
/**
* build lookup structure
*/
private static void build()
{
// build HashSet
h = new HashSet<String>( Arrays.asList( keys ) );
// build binary search
Arrays.sort( keys );
}
/**
* find all the keys by BinarySearch
*/
private static void findAllViaBinarySearch()
{
for ( String lookFor : keys )
{
int whereFound = Arrays.binarySearch( keys, lookFor );
// -ve means not found +ve tells where found.
}
}
/**
* find all the keys by HashSet
*/
private static void findAllViaHashSet()
{
for ( String lookFor : keys )
{
boolean found = h.contains( lookFor );
}
}
/**
* find all the keys by linear search
*/
private static void findAllViaLinearSearch()
{
for ( String lookFor : keys )
{
boolean found = false;
for ( String candidate : keys )
{
if ( lookFor.equals( candidate ) )
{
found = true;
break;
}
}
}
}
/**
* generate N random keys and their lookup structure
*/
private static void generate( int n )
{
keys = new String[n];
for ( int i = 0; i < n; i++ )
{
keys[ i ] = Long.toString( wheel.nextLong() );
}
}
// --------------------------- main() method
---------------------------
/**
* main method
*
* @param args not used
*/
public static void main( String[] args )
{
for ( int n = 1; n <= 10000; n *= 10 )
{
generate( n );
build();
final long t1 = System.nanoTime();
findAllViaBinarySearch();
final long t2 = System.nanoTime();
findAllViaHashSet();
final long t3 = System.nanoTime();
findAllViaLinearSearch();
final long t4 = System.nanoTime();
System.out
.println( "n:" +
n +
" binary search: " +
( t2 - t1 ) +
" HashSet: " +
( t3 - t2 ) +
" Linear: " +
( t4 - t3 ) );
}
// Here in output on my machine With JDK 1.6.0_04
//Linear is best for 10 or fewer, then HashSet.
// Binary search is never optimal.
// n:1 binary search: 21600 HashSet: 9720 Linear: 6400*
// n:10 binary search: 58720 HashSet: 11360 Linear: 11120*
// n:100 binary search: 707320 HashSet: 97520* Linear: 695960
// n:1000 binary search: 1294360 HashSet: 1255480* Linear:
10911040
// n:10000 binary search: 9823600 HashSet: 3920200* Linear:
1513846600
// Here in output on my machine with Jet
// n:1 binary search: 5520 HashSet: 4760 Linear: 1240*
// n:10 binary search: 3560 HashSet: 2480 Linear: 2400*
// n:100 binary search: 28160 HashSet: 5480* Linear: 44840
// n:1000 binary search: 408840 HashSet: 43560* Linear:
7862960
// n:10000 binary search: 9142440 HashSet: 2295320* Linear:
1852690520
}
}
--
Roedy Green Canadian Mind Products
The Java Glossary
http://mindprod.com