Re: My prime counting function
Back when I thought that developing a really fast algorithm would
being attention to my work I got a program in Java that would count
prime up to 10^14 in about an hour, with todays computers it'd take
maybe half an hour. It was still way too slow though to compete with
the fastest algorithms known, and after a time I quit that effort.
The program is PrimeCountH.java and is out there in cyberspace
somewhere for those who wish to run it.
I dug around in cyberspace and found PrimeCountH.java at
http://hismath.blogspot.com/2005_03_01_archive.html.
After converting the htlm back into something javac would accept I got
it running. I have to admit I was impressed with the speed. The recent
discussion of primes was terminated with Arne as the winner ( IMHO )
with code that could find pi(Integer.MAX_VALUE) in approximately 30
seconds on my machine ( dual core 3 GHz XP). JSH's code found the same
value is about 0.05 sec. That can also be compared to a sieve of Atkin
code written in C at about 1.3 sec.
Not being able to leave well enough alone I embarked on a quest to
obtain even more speed for JSH's code. And I am finally done. Some
results:
--------------------------------------------------------
pi(1,000,000,000) = 50,847,534 0.031 sec
--------------------------------------------------------
pi(2,147,483,648) = 105,097,565 0.047 sec
--------------------------------------------------------
pi(1,000,000,000,000) = 37,607,912,018 3.188 sec
--------------------------------------------------------
pi(10,000,000,000,000) = 346,065,536,839 23.282 sec
--------------------------------------------------------
pi(100,000,000,000,000) = 3,204,941,750,802 190.125 sec
----------------------------------------------------------
pi(1,000,000,000,000,000) = 29,844,570,422,669 1784.734 sec
The next to last entry is 10^14 in just over 3 minutes. So computer
speeds have progressed far more that JSH suspected. The final entry is
10^15 in less than half an hour. This code automatically detects and
uses multiple cores, so using a quad core machine should yield even
better results.
Here is the code. I have used Andrew Thompson's Text Width Checker to
verify that all lines are 64 characters or less. I have squeezed it
down to about 400 lines, so don't expect it to be pretty.
--%<------- cut here
// from http://hismath.blogspot.com/2005_03_01_archive.html
//Written by James S. Harris. Copyright 2002-2003.
// All rights reserved.
//My suggestion is to write your own version modifying this one.
//Commercial use is not permitted without my written permission.
//Proper attribution might get me some credit for having found
// the prime counting function
//Play with this, share it, *please* mention where you got it.
/**
*
* @author S.J. Marsh PhD ( physics ), based on JSH code
* @version 1.0
* @date August 2008
*/
import java.text.DecimalFormat;
public class PrimeCountH {
private int numThreads;
private int[] primeList;
private int primeCount;
private int maxPrime;
private final long startTime;
private long[] dS;
private int[] smallPI;
private int subLoopSize = 64;
/**
*
* @param args number [true],
* calculates pi(number),
* optional argument "true" prints a couple of extra lines
* If number is missing, pi(2^31-1) is calculated.
* extra speed can be obtained by using -Xmx declaration
* java -cp . -Xmx1560m PrimesCountH 100,000,000,000,000
* yields
* pi(100,000,000,000,000) = 3,204,941,750,802 in 193.031 sec
* on a E6850 3 gHz dual core processor running XP
*/
public static void main(String[] args) {
long N = Integer.MAX_VALUE; // sjm add default
boolean printFlag = false;
if (args.length>0) {
// sjm: you may optionally enter first argument as
// 1,000,000,000,000,000 or 1000000000000000
try {
args[0] = args[0].replaceAll(",", "");
N = Long.parseLong(args[0]);
} catch (NumberFormatException e) {
if (args[0].equals("true")) {
printFlag = true;
} else {
System.out
.println("usage: java PrimeCountH number [true]");
System.exit(1);
}
}
}
// for jokers who want to enter negatives
N = Math.abs(N);
// The program only uses even N so make it even
if (N>1) { N += N&1; }
// Put "true" in as your second argument to see more output
if (args.length>1&&args[1].equals("true")) {
printFlag = true;
}
Runtime rt = Runtime.getRuntime();
int processors = rt.availableProcessors();
int numThreads = 2*processors;
boolean multiCore = processors > 1;
PrimeCountH p = new PrimeCountH(N, numThreads, printFlag);
DecimalFormat df = new DecimalFormat("###,###");
System.out.print("pi("+df.format(N)+") = "
+df.format(p.pi(N, multiCore)));
System.out.println(" in "+p.elapsedTime()+" sec");
}
public PrimeCountH(long N, int numThreads, boolean printFlag){
this.numThreads = numThreads;
dS = new long[numThreads];
startTime = System.currentTimeMillis();
if (N>120) {
int m_max = (int) Math.sqrt(N)+1;
// Note: build a bigger prime list than necessary
// It will work with m_max,
// but a bigger prime list speeds things up
int multiplier = doSieve(m_max);
if (printFlag) {
System.out.println("Sieve Time: "+elapsedTime()+" sec");
System.out.println("m_max="+multiplier+"*"+m_max);
}
}
}
private double elapsedTime() {
return (System.currentTimeMillis()-startTime)/1000.;
}
public int doSieve(int m_max) {
int multiplier = memoryForSpeedMultiplier(m_max);
m_max = multiplier*m_max;
Primes p = new Primes(m_max);
byte[] b = p.findPrimes();
primeCount = p.countPrimes(b);
primeList = p.getPrimes(primeCount+1);
primeList[primeCount] = m_max;
maxPrime = primeList[primeCount-1];
// pi(odd) == pi(odd+1), so only store even numbers
smallPI = new int[(maxPrime+1)/2]; // memory hog
int s=0;
for (int i = 0; i<primeCount; i++) {
for ( ; s<primeList[i]+1; s += 2) { smallPI[s/2] = i; }
}
return multiplier;
}
int memoryForSpeedMultiplier(int max) {
Runtime rt = Runtime.getRuntime();
long maxMemory = rt.maxMemory();
long possibleInts = (maxMemory/4)*2; // only evens
long mult = 4*(int) (possibleInts/max)/5;// don't hog it all
mult = Math.min(mult, max); // don't burn time for small max
return (int) Math.max(1, Math.min(mult, 32));
}
public int pi(int N) {
N += ((N&1)==0 ? 0 : 1);
if (N<121) {
if (N< 9) { return N/2; }
if (N<25) { return N/2-(N-4)/6; }
if (N<49) { return N/2-(N-4)/6-(N-16)/10+(N-16)/30; }
return
N/2-(N-4)/6-(N-16)/10+(N-16)/30-(N-36)/14+(N-22)/42;
}
if (N<maxPrime) { return smallPI[N>>1]; }
int S = 0;
int dS = 0;
int p = 11;
boolean preTransition = true;
int m_max = (int) Math.sqrt(N)+1;
for (int c = 4; p<m_max; c++) {
int x = N/p;
x += x&1;
if (preTransition) {
if (p<(int) Math.sqrt(x)+1) {
dS = pi(x, p)-c;
} else {
dS = pi(x)-c;
preTransition = false;
}
} else
dS = pi(x)-c;
S += dS;
p = primeList[c+1];
}
return N/2-(N-4)/6-(N-16)/10+(N-16)/30-(N-8)/14+(N-22)/42
+(N-106)/70-(N-106)/210+2-S;
}
public int pi(int N, int m_max) {
int S = 0;
int dS = 0;
int p = 11;
boolean preTransition = true;
for (int c = 4; p<m_max; c++) {
int x = N/p;
x += x&1;
if (preTransition) {
if (p<(int) Math.sqrt(x)+1) {
dS = pi(x, p)-c;
} else {
dS = pi(x)-c;
preTransition = false;
}
} else {
dS = pi(x)-c;
}
S += dS;
p = primeList[c+1];
}
return N/2-(N-4)/6-(N-16)/10+(N-16)/30-(N-8)/14+(N-22)/42
+(N-106)/70-(N-106)/210+2-S;
}
public int transition(long N) {
int p = 11;
int m_max = (int) Math.sqrt(N)+1;
for (int c = 4; p<m_max; c++) {
long y = ((N+p-1)/(2*p))<<1;
if (p>=(int) Math.sqrt(y)+1) {
return c;
}
p = primeList[c+1];
}
return m_max; // this should never be reached
}
class OneCore implements Runnable {
long N;
int p, c, core, loopMax;
boolean preTransition;
OneCore(long N, int c, boolean preTransition, int core,
int loopMax) {
this.N = N;
this.c = c+core*loopMax;
this.preTransition = preTransition;
this.core = core;
this.loopMax = loopMax;
}
public void run() {
dS[core] = 0;
if (preTransition) {
for (int i = 0; i<loopMax; i++) {
int p = primeList[c+i];
long y = ((N+p-1)/(2*p))<<1;
dS[core] += pi(y, p)-(c+i);
}
} else {
for (int i = 0; i<loopMax; i++) {
int p = primeList[c+i];
long y = ((N+p-1)/(2*p))<<1;
dS[core] += pi(y, false)-(c+i);
}
}
}
}
/**
*
* @param N count primes up to N (should be even)
* @param multiCore if true use mutiple threads
* @return pi(N) the number of primes less than N
*/
public long pi(long N, boolean multiCore) {
if (N<Integer.MAX_VALUE) { return pi((int) N); }
long S = 0;
int p = 11;
int m_max = (int) Math.sqrt(N)+1;
int trans = transition(N);
Thread[] t = new Thread[numThreads];
// preTransition
for (int c = 4; c<trans;) {
if (multiCore&&c+(numThreads*subLoopSize)<=trans) {
for (int i = 0; i<numThreads; i++) {
t[i] = new Thread(new OneCore(N, c, true, i,
subLoopSize));
t[i].start();
}
c += numThreads*subLoopSize;
try {
for (int i = 0; i<numThreads; i++) {
t[i].join();
S += dS[i];
}
} catch (InterruptedException e) {
System.out.println("program interupted");
System.exit(1);
}
} else {
// original or tidy up end of loop
p = primeList[c];
long y = ((N+p-1)/(2*p))<<1;
S += pi(y, p)-c;
c++;
}
}
// postTransition
for (int c = trans; p<m_max;) {
if (multiCore&&c+numThreads*subLoopSize<primeList.length-1
&&primeList[c+numThreads*subLoopSize]<m_max) {
for (int i = 0; i<numThreads; i++) {
t[i] = new Thread(new OneCore(N, c, false, i,
subLoopSize));
t[i].start();
}
c += numThreads*subLoopSize;
try {
for (int i = 0; i<numThreads; i++) {
t[i].join();
S += dS[i];
}
} catch (InterruptedException e) {
System.out.println("program interupted");
System.exit(1);
}
} else {
p = primeList[c];
long y = ((N+p-1)/(2*p))<<1;
long dSx = pi(y, false)-c;
S += dSx;
c++;
}
p = primeList[c];
}
return N/2-(N-4)/6-(N-16)/10+(N-16)/30-(N-8)/14+(N-22)/42
+(N-106)/70-(N-106)/210+2-S;
}
public long pi(long N, int m_max) {
if (N<Integer.MAX_VALUE) { return pi((int) N, m_max); }
long S = 0;
long dS = 0;
int p = 11;
boolean preTransition = true;
for (int c = 4; p<m_max; c++) {
long y = ((N+p-1)/(2*p))<<1;
if (preTransition) {
if (p<(int) Math.sqrt(y)+1) {
dS = pi(y, p)-c;
} else {
dS = pi(y, false)-c;
preTransition = false;
}
} else
dS = pi(y, false)-c;
S += dS;
p = primeList[c+1];
}
return N/2-(N-4)/6-(N-16)/10+(N-16)/30-(N-8)/14+(N-22)/42
+(N-106)/70-(N-106)/210+2-S;
}
}
/**
* class to find small ( up to Integer.MAX_VALUE ) primes.
*/
// based on code from Arne Vajh?j in c.l.j.p
class Primes {
private int size;
private byte[] b;
private final static int[] MASK = {
0x01, 0x02, 0x04, 0x08,0x10, 0x20, 0x40, 0x80 };
public boolean isPrime(byte[] b, int ix) {
return (b[ix>>4]&MASK[(ix&0x0F)>>1])==0;
}
private void setBit(byte[] b, int ix) {
b[ix>>4] |= MASK[(ix&0x0F)>>1];
}
private static final int[] countMask = new int[256];
static {
for (int b = 0; b<256; b++) {
for (int i = 0; i<8; i++) {
countMask[b] += ((b>>i)&1)!=0 ? 1 : 0;
}
}
}
public int countPrimes(byte[] b) {
int bitCount = 0;
for (int i = 0; i<b.length; i++)
{ bitCount += countMask[0xff&~b[i]]; }
return bitCount;
}
public byte[] findPrimes() {
b = new byte[size/16+1];
// tidy of last few bits beyond size
for (int i = size; i<16*b.length; i+=2) {setBit(b,i);}
int sqrtSize = (int) Math.sqrt(size);
for (int p = 3; p<=sqrtSize; p += 2) {
if (isPrime(b, p)) {
for (int j = p*p; j<=size&&j>0; j += (2*p)) {
setBit(b, j);
}
}
}
return b;
}
public Primes(int size) { this.size = size; }
public int [] getPrimes(int num) {
int [] primes = new int[num];
int ndx = 0;
primes[ndx++]=2;
for(int i=3; i<size && ndx<num; i+=2) {
if( isPrime(b,i) ) {
primes[ndx++] =i ;
}
}
return primes;
}
}