Re: C++ programming challenge
Peter Jansson wrote:
Dear news group,
I have created a small programming challenge for those of you who are
interested in challenging your Standard C++ programming skills. The
challenge is about counting character frequency in large texts,
perhaps useful for spam filtering or classical crypto analysis. You
can read more about it here:
http://blog.p-jansson.com/2009/06/programming-challenge-letter-frequency.html
With kind regards,
Peter Jansson
I don't give major importants with the contest, but your timings don't look OK to me.
For example the code that you marked as fastest:
/*
CStopWatch s;
s.startTimer();
The_Stuff_You_Want_To_Time_Executes_Here();
s.stopTimer();
You may then get the elapsed time in seconds
via the method getElapsedTime.
*/
#ifdef WIN32
#include <windows.h>
class CStopWatch
{
private:
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
stopWatch timer;
LARGE_INTEGER frequency;
double LIToSecs( LARGE_INTEGER & L)
{
return ((double)L.QuadPart/(double)frequency.QuadPart);
}
public:
CStopWatch()
{
timer.start.QuadPart=0;
timer.stop.QuadPart=0;
QueryPerformanceFrequency( &frequency );
}
void startTimer() { QueryPerformanceCounter(&timer.start); }
void stopTimer() { QueryPerformanceCounter(&timer.stop); }
double getElapsedTime()
{
LARGE_INTEGER time;
time.QuadPart=timer.stop.QuadPart-timer.start.QuadPart;
return LIToSecs( time );
}
};
#else
#include <sys/time.h>
class CStopWatch
{
private:
typedef struct {
timeval start;
timeval stop;
} stopWatch;
stopWatch timer;
public:
void startTimer( ) { gettimeofday(&(timer.start),0); }
void stopTimer( ) { gettimeofday(&(timer.stop),0); }
double getElapsedTime()
{
timeval res;
timersub(&(timer.stop),&(timer.start),&res);
return res.tv_sec + res.tv_usec/1000000.0;
}
};
#endif
#include <limits.h>
#include <stdio.h>
#include <ctype.h>
#include <iostream>
int main()
{
size_t f [UCHAR_MAX+1] = { 0 };
size_t size = 0;
int c;
CStopWatch timer;
timer.startTimer();
while ( (c = getchar()) != EOF )
{
f [c]++;
size++;
}
for ( c = 0; c <= UCHAR_MAX; ++c )
if ( islower( c ) )
f [toupper( c )] += f [c];
for ( c = 0; c <= UCHAR_MAX; ++c )
if ( f [c] && !islower( c ) && isprint( c ) )
printf( "%c %.3f%%\n", c, f[c]*100.0/size );
timer.stopTimer();
std::cout<< std::fixed<< "\nIt took "<< timer.getElapsedTime()<< "seconds.\n\n";
return 0;
}
at the first run it produces:
john@john-laptop:~/Projects/anjuta/cpp/src$ g++ -ansi -pedantic-errors -Wall -O3 main.cc -o foobar
john@john-laptop:~/Projects/anjuta/cpp/src$ ./foobar <LetterFrequencyInput.txt
16.602%
" 0.233%
' 0.068%
( 0.128%
) 0.171%
, 0.891%
- 0.068%
.. 0.620%
/ 0.057%
0 0.040%
1 0.080%
2 0.037%
3 0.026%
4 0.014%
5 0.014%
6 0.023%
7 0.023%
8 0.006%
9 0.011%
: 0.031%
; 0.048%
< 0.028%
> 0.028%
A 5.454%
B 0.916%
C 3.315%
D 2.615%
E 9.179%
F 2.017%
G 1.494%
H 3.013%
I 6.163%
J 0.080%
K 0.504%
L 2.677%
M 1.866%
N 5.412%
O 7.395%
P 2.208%
Q 0.100%
R 6.200%
S 4.780%
T 6.954%
U 2.344%
V 0.930%
W 1.181%
X 0.159%
Y 1.838%
Z 0.031%
` 0.011%
It took 3.024299seconds.
john@john-laptop:~/Projects/anjuta/cpp/src$
and after some runs it produces:
john@john-laptop:~/Projects/anjuta/cpp/src$ ./foobar <LetterFrequencyInput.txt
16.602%
" 0.233%
' 0.068%
( 0.128%
) 0.171%
, 0.891%
- 0.068%
.. 0.620%
/ 0.057%
0 0.040%
1 0.080%
2 0.037%
3 0.026%
4 0.014%
5 0.014%
6 0.023%
7 0.023%
8 0.006%
9 0.011%
: 0.031%
; 0.048%
< 0.028%
> 0.028%
A 5.454%
B 0.916%
C 3.315%
D 2.615%
E 9.179%
F 2.017%
G 1.494%
H 3.013%
I 6.163%
J 0.080%
K 0.504%
L 2.677%
M 1.866%
N 5.412%
O 7.395%
P 2.208%
Q 0.100%
R 6.200%
S 4.780%
T 6.954%
U 2.344%
V 0.930%
W 1.181%
X 0.159%
Y 1.838%
Z 0.031%
` 0.011%
It took 2.846462seconds.
john@john-laptop:~/Projects/anjuta/cpp/src$
while mine with your timer:
/*
CStopWatch s;
s.startTimer();
The_Stuff_You_Want_To_Time_Executes_Here();
s.stopTimer();
You may then get the elapsed time in seconds
via the method getElapsedTime.
*/
#ifdef WIN32
#include <windows.h>
class CStopWatch
{
private:
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
stopWatch timer;
LARGE_INTEGER frequency;
double LIToSecs( LARGE_INTEGER & L)
{
return ((double)L.QuadPart/(double)frequency.QuadPart);
}
public:
CStopWatch()
{
timer.start.QuadPart=0;
timer.stop.QuadPart=0;
QueryPerformanceFrequency( &frequency );
}
void startTimer() { QueryPerformanceCounter(&timer.start); }
void stopTimer() { QueryPerformanceCounter(&timer.stop); }
double getElapsedTime()
{
LARGE_INTEGER time;
time.QuadPart=timer.stop.QuadPart-timer.start.QuadPart;
return LIToSecs( time );
}
};
#else
#include <sys/time.h>
class CStopWatch
{
private:
typedef struct {
timeval start;
timeval stop;
} stopWatch;
stopWatch timer;
public:
void startTimer( ) { gettimeofday(&(timer.start),0); }
void stopTimer( ) { gettimeofday(&(timer.stop),0); }
double getElapsedTime()
{
timeval res;
timersub(&(timer.stop),&(timer.start),&res);
return res.tv_sec + res.tv_usec/1000000.0;
}
};
#endif
#include <valarray>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <string>
#include <cctype>
#include <ctime>
int main(int argc, char **argv)
{
using namespace std;
// We disable synchronisation with stdio, to speed up C++ standard I/O.
ios_base::sync_with_stdio(false);
cout<< fixed;
// If argc!= 2, then either the number of arguments is not correct, or the platform does not
// support arguments.
if(argc!= 2)
{
cerr<< "\nUsage: "<< argv[0]<< " fileNameToRead\n\n";
return EXIT_FAILURE;
}
// The C++ basic character set is using the value range [0, 127].
// If we used vector<unsigned long>, it would not have any run-time difference in any modern compiler.
valarray<unsigned long> characterFrequencies(128);
// The array where the read characters will be stored.
char buffer[4* BUFSIZ]= {};
string characters= "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
CStopWatch timer;
timer.startTimer();
// We open the file
ifstream inputFile(argv[argc -1]);
// An error happened
if(not inputFile)
{
cerr<< "\nCould not open file for reading, exiting...\n\n";
return EXIT_FAILURE;
}
do
{
inputFile.read(buffer, sizeof(buffer));
for(streamsize i= 0; i< inputFile.gcount(); ++i)
++characterFrequencies[ buffer[i] ];
}while(inputFile);
// Since rule 1 is: "Your program should be case insensitive when it counts letters",
// we add the results of lowercase characters and their equivallent uppercase letters together.
cout<< "\n\n\nThe letter frequencies are:\n";
unsigned long totalcharacterFrequencies= 0LU;
for(string::size_type i= 0; i< characters.size(); ++i)
totalcharacterFrequencies+= characterFrequencies[ characters[i] ]+ characterFrequencies[
tolower(characters[i]) ];
for(string::size_type i= 0; i< characters.size(); ++i)
{
double percentage= static_cast<double>(characterFrequencies[ characters[i] ]+ characterFrequencies[
tolower(characters[i]) ])/ totalcharacterFrequencies* 100;
cout<< characters[i]<< ": "<< percentage<< "%\n";
}
timer.stopTimer();
// We convert the timing to seconds.
double totalTimeInSeconds= timer.getElapsedTime();
cout<<"\n\nThe whole process took "<< totalTimeInSeconds<< " seconds.\n";
cout<<"\n\nHave a nice day!\n";
}
at the first run produces:
hn@john-laptop:~/Projects/anjuta/cpp/src$ ./foobar LetterFrequencyInput.txt
The letter frequencies are:
A: 6.919578%
B: 1.162287%
C: 4.205169%
D: 3.317211%
E: 11.644528%
F: 2.559197%
G: 1.895033%
H: 3.822553%
I: 7.818366%
J: 0.101068%
K: 0.638897%
L: 3.396621%
M: 2.367889%
N: 6.865435%
O: 9.381317%
P: 2.801040%
Q: 0.126336%
R: 7.865290%
S: 6.064106%
T: 8.821831%
U: 2.974300%
V: 1.180335%
W: 1.497979%
X: 0.202137%
Y: 2.331793%
Z: 0.039705%
The whole process took 0.456543 seconds.
Have a nice day!
john@john-laptop:~/Projects/anjuta/cpp/src$
and after some runs it produces:
ohn@john-laptop:~/Projects/anjuta/cpp/src$ ./foobar LetterFrequencyInput.txt
The letter frequencies are:
A: 6.919578%
B: 1.162287%
C: 4.205169%
D: 3.317211%
E: 11.644528%
F: 2.559197%
G: 1.895033%
H: 3.822553%
I: 7.818366%
J: 0.101068%
K: 0.638897%
L: 3.396621%
M: 2.367889%
N: 6.865435%
O: 9.381317%
P: 2.801040%
Q: 0.126336%
R: 7.865290%
S: 6.064106%
T: 8.821831%
U: 2.974300%
V: 1.180335%
W: 1.497979%
X: 0.202137%
Y: 2.331793%
Z: 0.039705%
The whole process took 0.482213 seconds.
Have a nice day!
john@john-laptop:~/Projects/anjuta/cpp/src$
I think this challenge is bogus.
--
Ioannis A. Vranos
C95 / C++03 Developer
http://www.cpp-software.net