--- On Wed, 3/20/13, S. Fisher <expandaf...@yahoo.com> wrote: > The program reads a text file and counts the number of > unique words, > and also displays the number of times the most common word > was found. >
Added an iterator of sorts for regular expressions. This allows if re.exec( line ) then begin si_table_inc( table, re.match[0] ); while re.execNext do si_table_inc( table, re.match[0] ); end; to be replaced by re_each_match( re, line, @count_the_word ); with no loss of speed. {$mode objfpc} uses AvgLvlTree, regexpr, classes, dateutils, Sysutils; const file_to_process = 'Bartlett--Quotations.txt'; type // string-integer table t_si_table = TStringToPointerTree; t_si_item = PStringToPointerItem; procedure si_table_put( table: t_si_table; k: string; v: longint ); begin table[ k ] := pointer( v ) end; function si_table_get( table : t_si_table; key : string ): longint; begin result := longint( table[ key ] ) end; procedure si_table_inc( table: t_si_table; k: string ); begin table[ k ] := pointer( si_table_get( table, k ) + 1 ) ; end; function si_table_key( item : t_si_item ): string; begin result := item^.name end; function si_table_val( item : t_si_item ): longint; begin result := longint( item^.value ) end; function si_table_new( sensitive : boolean ): t_si_table; begin result := t_si_table.create( sensitive ) end; type t_regex_proc = procedure( const text : string ); procedure re_each_match( re : TRegExpr; const text: string; proc: t_regex_proc ); overload; begin if re.exec( text ) then begin proc( re.match[0] ); while re.execNext do proc( re.match[0] ); end; end; procedure re_each_match( const re_str, text: string; proc: t_regex_proc ); overload; var re : TRegExpr; begin re := TRegExpr.Create; re.Expression := re_str; re_each_match( re, text, proc ); re.free end; var table : t_si_table ; table_item : t_si_item ; line, word : string; re : TRegExpr; max_count, count : int32; lines : TStringList; time_1 : TDateTime; procedure count_the_word( const s : string ); begin si_table_inc( table, s ) end; begin time_1 := time; re := TRegExpr.Create; re.Expression := '(?i)[a-z]+' ; // case-insensitive table := si_table_new( true ) ; // true to make it case-sensitive. lines := TStringList.Create; lines.LoadFromFile( file_to_process ); for line in lines do re_each_match( re, line, @count_the_word ); lines.free; writeln( 'Unique word count: ', table.count ); max_count := 0; for table_item in table do begin count := si_table_val( table_item ); if count > max_count then begin word := si_table_key( table_item ); max_count := count end; end; writeln( 'Commonest word: ', word, ' (', max_count, ')' ); writeln( 'Elapsed time: ', milliSecondsBetween( time_1, time ), ' milliseconds.' ); table.free; re.free end. _______________________________________________ fpc-pascal maillist - fpc-pascal@lists.freepascal.org http://lists.freepascal.org/mailman/listinfo/fpc-pascal