Re: C++ programming challenge
 
Peter Jansson wrote:
Dear news group,
I have created a small programming challenge for those of you who are
interested in challenging your Standard C++ programming skills. The
challenge is about counting character frequency in large texts,
perhaps useful for spam filtering or classical crypto analysis. You
can read more about it here:
http://blog.p-jansson.com/2009/06/programming-challenge-letter-frequency.html
With kind regards,
Peter Jansson
High level C++ solution:
Code 1:
#include <map>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <string>
#include <cctype>
#include <ctime>
int main(int argc, char **argv)
{
     using namespace std;
     // Warning: long double has problems with MINGW compiler for Windows.
     map<char, long double> characterFrequencies;
     // The array where the read characters will be stored.
     char buffer[BUFSIZ];
     // If argc!= 2, then either the number of arguments is not correct, or the platform does not
     // support arguments.
     if(argc!= 2)
     {
         cerr<< "\nUsage: "<< argv[0]<< " fileNameToRead\n\n";
         return EXIT_FAILURE;
     }
     // We disable synchronisation with stdio, to speed up C++ I/O.
     ios_base::sync_with_stdio(false);
     string characters= "ABDEFGHIJKLMNOPQRSTUVWXYZ";
     clock_t time1, time2;
     // We start timing.
     time1= clock();
     // We open the file
     ifstream inputFile(argv[argc -1]);
     // An error happened
     if(not inputFile.good())
     {
         cerr<< "\nCould not open file for reading, exiting...\n\n";
         return EXIT_FAILURE;
     }
     do
     {
         inputFile.read(buffer, sizeof(buffer));
         for(streamsize i= 0; i< inputFile.gcount(); ++i)
             ++characterFrequencies[ buffer[i] ];
     }while(not inputFile.eof());
     // Since rule 1 is: "Your program should be case insensitive when it counts letters",
     // we add the results of lowercase characters and their equivallent uppercase letters together.
     cout<<fixed<< "\n\n\nThe letter frequencies are:\n";
     long double totalcharacterFrequencies= 0;
     for(string::size_type i= 0; i< characters.size(); ++i)
         totalcharacterFrequencies+= characterFrequencies[ characters[i] ]+ characterFrequencies[ 
tolower(characters[i]) ];
     for(string::size_type i= 0; i< characters.size(); ++i)
         cout<< characters[i]<< ": "<< (characterFrequencies[ characters[i] ]+ characterFrequencies[ 
tolower(characters[i]) ])/ totalcharacterFrequencies* 100<< "%\n";
     // We "stop" timing.
     time2= clock();
     // We convert the timing to seconds.
     double totalTimeInSeconds=  (time2- time1)/ CLOCKS_PER_SEC;
     cout<<"\n\nThe whole process took "<< fixed<< totalTimeInSeconds<< " seconds.\n";
     cout<<"\n\nHave a nice day!\n";
}
Code 2:
The above code more "inlined" (denoted with "==>").
This code is not any faster with my compiler (as it shouldn't), than the original code above.
#include <map>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <string>
#include <cctype>
#include <ctime>
int main(int argc, char **argv)
{
     using namespace std;
     // Warning: long double has problems with MINGW compiler for Windows.
     map<char, long double> characterFrequencies;
     // The array where the read characters will be stored.
     char buffer[BUFSIZ];
     // If argc!= 2, then either the number of arguments is not correct, or the platform does not
     // support arguments.
     if(argc!= 2)
     {
         cerr<< "\nUsage: "<< argv[0]<< " fileNameToRead\n\n";
         return EXIT_FAILURE;
     }
     // We disable synchronisation with stdio, to speed up C++ I/O.
     ios_base::sync_with_stdio(false);
     string characters= "ABDEFGHIJKLMNOPQRSTUVWXYZ";
     clock_t time1, time2;
     // We start timing.
     time1= clock();
     // We open the file
     ifstream inputFile(argv[argc -1]);
     // An error happened
     if(not inputFile.good())
     {
         cerr<< "\nCould not open file for reading, exiting...\n\n";
         return EXIT_FAILURE;
     }
     do
     {
         inputFile.read(buffer, sizeof(buffer));
         ==> ADDED: const streamsize SIZE= inputFile.gcount();
         ==> CHANGED: for(streamsize i= 0; i< SIZE; ++i)
             ++characterFrequencies[ buffer[i] ];
     }while(not inputFile.eof());
     // Since rule 1 is: "Your program should be case insensitive when it counts letters",
     // we add the results of lowercase characters and their equivallent uppercase letters together.
     cout<<fixed<< "\n\n\nThe letter frequencies are:\n";
     long double totalcharacterFrequencies= 0;
     for(string::size_type i= 0; i< characters.size(); ++i)
         totalcharacterFrequencies+= characterFrequencies[ characters[i] ]+ characterFrequencies[ 
tolower(characters[i]) ];
     for(string::size_type i= 0; i< characters.size(); ++i)
         cout<< characters[i]<< ": "<< (characterFrequencies[ characters[i] ]+ characterFrequencies[ 
tolower(characters[i]) ])/ totalcharacterFrequencies* 100<< "%\n";
     // We "stop" timing.
     time2= clock();
     // We convert the timing to seconds.
     double totalTimeInSeconds=  (time2- time1)/ CLOCKS_PER_SEC;
     cout<<"\n\nThe whole process took "<< fixed<< totalTimeInSeconds<< " seconds.\n";
     cout<<"\n\nHave a nice day!\n";
}
-- 
Ioannis A. Vranos
C95 / C++03 Developer
http://www.cpp-software.net