Re: No. of 'ab' or specific word in a text file
This is a multi-part message in MIME format.
--------------080302060000010300080408
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Umesh wrote:
/*Calculate the no. of occurance of 'ab'. But is this one OK/
EFFICIENT?*/
#include<stdio.h>
int main(void)
{
FILE *f;
long int c,c1,ab=1;
f=fopen("c:/1.txt","r");
while ((c=getc(f))!=EOF && (c1=getc(f))!=EOF) {
if(c=='a' && c1=='b') ab++;}
fclose(f);
printf("No. of 'ab' = %ld\n",ab);
return 0;
}
That will not work, it will fail for sequences like 'xab'.
The code I attached below shows how you can create a state machine to
search for any sequence of characters. You'll need to adapt it to read
from a file (should be trivial). It's not designed for speed.
--------------080302060000010300080408
Content-Type: text/plain;
name="simple_scanner.cpp"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="simple_scanner.cpp"
#include <map>
#include <vector>
#include <memory>
#include <cassert>
// ======== IteratorTranverser =======================================
/**
* IteratorTranverser is a template class that iterates through
* a pointer or iterator. The pointers passed to it must be valid
* for the life-time of this object.
*/
template <typename Itr, typename t_key_char>
struct IteratorTranverser
{
Itr m_from;
const Itr m_end;
IteratorTranverser( const Itr & i_from, const Itr & i_end )
: m_from( i_from ),
m_end( i_end )
{
}
bool GetChar( t_key_char & l_char )
{
if ( m_from != m_end )
{
l_char = * ( m_from ++ );
return true;
}
return false;
}
bool HasInput( bool i_wait )
{
return m_from != m_end;
}
};
// ======== CombiningTraverser ========================================
/**
*
*
*/
template <typename TraverserTypeFirst, typename TraverserTypeSecond, typename t_key_char>
struct CombiningTraverser
{
TraverserTypeFirst & m_first;
TraverserTypeSecond & m_second;
bool m_use_second;
CombiningTraverser(
TraverserTypeFirst & io_first,
TraverserTypeSecond & io_second
)
: m_first( io_first ),
m_second( io_second ),
m_use_second( false )
{
}
bool GetChar( t_key_char & l_char )
{
if ( ! m_use_second )
{
if ( m_first.GetChar( l_char ) )
{
return true;
}
m_use_second = true;
}
return m_second.GetChar( l_char );
}
bool HasInput( bool i_wait )
{
if ( ! m_use_second )
{
if ( m_first.HasInput( i_wait ) )
{
return true;
}
m_use_second = true;
}
return m_second.HasInput( i_wait );
}
};
/**
* SimpleScanner is a simple scanner generator
*/
template <typename t_key_char, typename t_result>
class SimpleScanner
{
/**
* DFA_State contains a list of transitionstransitions
*/
struct DFA_State
{
typedef std::map<t_key_char, DFA_State *> t_map_type;
typedef typename t_map_type::iterator t_iterator;
t_map_type m_transitions;
t_result m_terminal;
bool m_has_val;
DFA_State()
: m_terminal(),
m_has_val( false )
{
}
/**
* FindOrInsertTransition is used to construct the scanner
*/
DFA_State * FindOrInsertTransition( t_key_char i_char )
{
std::pair<t_iterator, bool> l_insert_result =
m_transitions.insert( typename t_map_type::value_type( i_char, 0 ) );
if ( ! l_insert_result.second )
{
return l_insert_result.first->second;
}
return l_insert_result.first->second = new DFA_State;
}
/**
* FindTransition is used to traverse the scanner
*/
DFA_State * FindTransition( t_key_char i_char )
{
t_iterator l_insert_result =
m_transitions.find( i_char );
if ( l_insert_result != m_transitions.end() )
{
return l_insert_result->second;
}
return 0;
}
};
struct DFA_Machine
{
DFA_State * m_initial_state;
DFA_State * m_current_state;
DFA_State * m_last_accept_state;
std::vector<t_key_char> m_str;
DFA_Machine( DFA_State * i_initial_state )
: m_initial_state( i_initial_state ),
m_current_state( i_initial_state ),
m_last_accept_state( 0 )
{
}
/**
* NextChar will traverse the state machine with the next
* character and return the terminal t_result if one exists.
* If i_char does not make a valid transition, o_valid
* is set to false.
*/
bool NextChar( t_key_char i_char )
{
m_str.push_back( i_char );
DFA_State * l_next_state = m_current_state->FindTransition( i_char );
if ( l_next_state )
{
m_current_state = l_next_state;
// If there is an accepting state then we
// can roll back the push-back buffer.
if ( l_next_state->m_has_val )
{
m_last_accept_state = l_next_state;
m_str.clear();
}
return true;
}
m_current_state = m_initial_state;
return false;
}
template <typename Traverser>
bool ScanStream( Traverser & io_traverser, t_result & o_result )
{
t_key_char l_char;
while ( io_traverser.GetChar( l_char ) )
{
bool i_valid;
i_valid = NextChar( l_char );
DFA_State * l_last_accept_state = m_last_accept_state;
// If there are no more transitions or the last
if ( ( ! i_valid ) || ( m_current_state->m_transitions.size() == 0 ) )
{
if ( l_last_accept_state )
{
m_last_accept_state = 0;
m_current_state = m_initial_state;
if ( l_last_accept_state->m_has_val )
{
o_result = l_last_accept_state->m_terminal;
return true;
}
}
return false;
}
// There are transitions ...
assert( m_current_state->m_transitions.size() != 0 );
// If there are transitions (true here) and this is an interactive
// scan (waiting for user input) then wait a little longer, if there
// are no accept states - wait forever (which means calling GetChar).
if ( l_last_accept_state )
{
if ( ! io_traverser.HasInput( true ) )
{
// there is no longer any pending input. We're done.
m_last_accept_state = 0;
m_current_state = m_initial_state;
o_result = l_last_accept_state->m_terminal;
return true;
}
}
}
return false;
}
template <typename TraverserType>
bool DoScan( TraverserType & io_traverser, t_result & o_result )
{
std::vector<t_key_char> l_str = std::vector<t_key_char>();
l_str.swap( m_str );
if ( l_str.size() != 0 )
{
IteratorTranverser< typename std::vector<t_key_char>::iterator, t_key_char > l_tvsr(
l_str.begin(),
l_str.end()
);
CombiningTraverser<
IteratorTranverser< typename std::vector<t_key_char>::iterator, t_key_char >,
TraverserType,
t_key_char
> l_combined( l_tvsr, io_traverser );
bool l_scanned = ScanStream( l_combined, o_result );
// may still have content locally - push that back into the
m_str.insert( m_str.end(), l_tvsr.m_from, l_tvsr.m_end );
return l_scanned;
}
else
{
return ScanStream( io_traverser, o_result );
}
return false;
}
bool HasInput( bool )
{
return m_str.size() != 0;
}
bool GetChar( t_key_char & l_char )
{
if ( m_str.size() != 0 )
{
l_char = m_str.front();
m_str.erase( m_str.begin() );
return true;
}
return false;
}
};
struct Scanner
{
DFA_State * m_initial_state;
Scanner()
: m_initial_state( new DFA_State )
{
}
DFA_Machine * NewMachine()
{
return new DFA_Machine( m_initial_state );
}
/**
* AddTerminal will add a terminal and will return the colliding
* terminal (if there is one)
*/
template <typename t_iterator>
bool AddTerminal(
int i_length,
t_iterator i_str,
const t_result & i_kd,
t_result & o_result
) {
DFA_State * l_curr_state = m_initial_state;
t_iterator l_str = i_str;
for ( int i = 0; i < i_length; ++ i )
{
DFA_State * l_next_state = l_curr_state->FindOrInsertTransition( * l_str );
++ l_str;
l_curr_state = l_next_state;
}
if ( l_curr_state->m_has_val )
{
// We have a collision !
o_result = l_curr_state->m_terminal;
return true;
}
l_curr_state->m_terminal = i_kd;
l_curr_state->m_has_val = true;
#if 0
// actually test the scanner to make sure that we decode what we expect
// to decode
std::auto_ptr<DFA_Machine> l_machine( NewMachine() );
IteratorTranverser< t_iterator > l_tvsr( i_str, i_str + i_length );
const t_result * l_kd2 = l_machine->ScanStream( l_tvsr );
// assert( l_kd2 == i_kd );
return 0;
#endif
return false;
}
};
Scanner m_scanner;
public:
struct Machine
{
DFA_Machine * m_machine;
Machine()
: m_machine( 0 )
{
}
~Machine()
{
if ( m_machine )
{
delete m_machine;
}
}
bool HasInput( bool )
{
if ( m_machine )
{
return m_machine->HasInput( false );
}
return false;
}
bool GetChar( t_key_char & l_char )
{
if ( m_machine )
{
return m_machine->GetChar( l_char );
}
return false;
}
private:
// no copies allowed
Machine( const Machine & );
Machine & operator=( const Machine & );
};
template <typename TraverserType>
bool Traverse( Machine & i_machine, TraverserType & io_traverser, t_result & o_kd )
{
DFA_Machine * l_machine = i_machine.m_machine;
if ( ! l_machine )
{
l_machine = i_machine.m_machine = m_scanner.NewMachine();
}
return l_machine->DoScan( io_traverser, o_kd );
}
bool AddTerminal(
int i_length,
const t_key_char * i_str,
const t_result & i_kd,
t_result & o_result
) {
return m_scanner.AddTerminal( i_length, i_str, i_kd, o_result );
}
bool AddTerminal(
const t_key_char * i_str,
const t_result & i_kd,
t_result & o_result
) {
return m_scanner.AddTerminal( std::strlen( i_str ), i_str, i_kd, o_result );
}
template < typename t_container >
bool AddTerminal(
const t_container i_str,
const t_result & i_kd,
t_result & o_result
) {
return m_scanner.AddTerminal( i_str.size(), i_str.begin(), i_kd, o_result );
}
}; // SimpleScanner
#include <string>
#include <iostream>
#include <ostream>
#include <istream>
class NoisyStr
{
public:
std::string m_value;
NoisyStr()
: m_value( "unassigned" )
{
}
NoisyStr( const std::string & i_value )
: m_value( i_value )
{
}
NoisyStr( const char * i_value )
: m_value( i_value )
{
}
NoisyStr( const NoisyStr & i_value )
: m_value( i_value.m_value )
{
std::cout << "Copied " << m_value << "\n";
}
NoisyStr & operator=( const NoisyStr & i_value )
{
std::cout << "Assigned " << m_value;
m_value = i_value.m_value;
std::cout << " to " << m_value << "\n";
return * this;
}
const char * c_str()
{
return m_value.c_str();
}
};
typedef std::string KeyType;
int main()
{
SimpleScanner< char, KeyType > l_scanner;
KeyType l_collision;
l_scanner.AddTerminal( "abcde", "ZZ", l_collision );
l_scanner.AddTerminal( "xyz", "YY", l_collision );
l_scanner.AddTerminal( "dx_", "DX", l_collision );
static const char l_test[] = "abcde_test_abcdx_xyz";
std::cout << "scanning " << l_test << std::endl;
IteratorTranverser< const char *, char > l_trav( l_test, l_test + sizeof( l_test ) -1 );
SimpleScanner< char, KeyType >::Machine machine;
KeyType l_result;
while (true )
{
if ( l_scanner.Traverse( machine, l_trav, l_result ) )
{
std::cout << l_result.c_str();
}
else
{
char l_ch;
if ( ! machine.GetChar( l_ch ) )
{
if ( ! l_trav.GetChar( l_ch ) )
{
break;
}
}
std::cout << l_ch;
}
}
std::cout << std::endl;
} // main
--------------080302060000010300080408--