--- On Wed, 3/20/13, S. Fisher <expandaf...@yahoo.com> wrote: > The program reads a text file and counts the number of > unique words, > and also displays the number of times the most common word > was found. >
For comparison, here's a C++ program. Unlike the Pascal version, it shows the 20 most common words. Also, it doesn't use a regular expression engine, since I didn't have one. An associative array that maps strings to integers was simply obtained by: unordered_map<string,int> w_table; // c++ -std=c++11 -o map.exe map.cpp #include <unordered_map> #include <iostream> #include <fstream> #include <list> #include <utility> // for pair #include <vector> #include <algorithm> // for sort using namespace std; // Convert "a-f" to "abcdef". string gen_cset( string s ) { string acc; char ch; int last = s.size() - 1; for (int i = 0; i <= last; i++) { ch = s[i]; if ('-' == ch) if ((0 == i) || (last == i)) acc.push_back( ch ); else for (int j = s[i-1] + 1; j < s[i+1]; j++) acc.push_back( j ); else acc.push_back( ch ); } return acc; } const string letters = gen_cset( "a-zA-Z" ); list<string> scan_string( string s, string chars ) { list<string> lst; size_t p, q; p = 0; while ( p != string::npos ) { p = s.find_first_of( chars, p ); if ( p != string::npos ) { q = s.find_first_not_of( chars, p ); lst.push_back( s.substr( p, q - p ) ); p = q; } } return lst; } typedef pair<string,int> word_and_count; bool compare_pair( word_and_count a, word_and_count b ) { return ( a.second > b.second ); } int main() { unordered_map<string,int> w_table; ifstream f_stream ( "Bartlett--Quotations.txt" ); string line; list<string> word_list; if ( f_stream.is_open() ) { while ( f_stream.good() ) { getline( f_stream, line); word_list = scan_string( line, gen_cset("a-zA-Z" )); for ( string x: word_list ) w_table[ x ] += 1; } f_stream.close(); } cout << "Number of unique words: " << w_table.size() << endl; cout << "Most common words:" << endl; vector<word_and_count> pair_vec; for ( auto x: w_table ) pair_vec.push_back( x ); sort( pair_vec.begin(), pair_vec.end(), compare_pair ); word_and_count pr; for ( int i = 0; i<20; i++ ) { pr = pair_vec[i]; cout << pr.first << " " << pr.second << endl; } return 0; } // Number of unique words: 8911 // Most common words: // the 2200 // of 1406 // a 1118 // to 1011 // and 975 // in 740 // is 581 // s 510 // Act 484 // Sc 483 // that 454 // The 403 // I 369 // Line 345 // not 333 // his 323 // with 306 // And 303 // be 277 // i 268 _______________________________________________ fpc-pascal maillist - fpc-pascal@lists.freepascal.org http://lists.freepascal.org/mailman/listinfo/fpc-pascal