--- L <[EMAIL PROTECTED]> wrote: > > > No more strlen: > > > http://www.hu.freepascal.org/fpcircbot/cgipastebin?msgid=1432 > > > > This doesn't work if you have spaces in front of the < tags > > > <sometag> > <sometag> > > I'm not sure if the Perl one fails too though. > I don't have perl installed and can't test it ;-) > > A real parser doesn't care about whitespace in front. > And will be a bit slower.. because of that check.
{$MODE OBJFPC} {$H+} uses sysutils, strings, contnrs; const chars : set of char = ['a'..'z','0'..'9']; var f: text; line : ansistring; p, pword : pchar; saved: char; wc : longint; counting, good : boolean; unique: TFPStringHashTable; textbuf: array[1..4096] of byte; when : tDateTime; function do_tag( var s: ansistring; var p: pchar):boolean; var pword: pchar; begin pword := p; while p^ <> '>' do inc(p); p^ := #0; result := ('<title'=pword) or ('<text'=pword); end; begin when := time; assign(f, 'Koleksi.dat'); reset(f); SetTextBuf(f, textbuf, sizeof(textbuf)); wc := 0; counting := false; unique := TFPStringHashTable.Create; while not eof(f) do begin readln(f, line ); if '' = line then continue; line := lowercase( line ); p := pchar( line ); repeat // Skip junk. while (p^ <> #0) and (not (p^ in chars)) do begin if '<' = p^ then counting := do_tag( line, p ); inc(p); end; // Build word. pword := p; good := true; while p^ in chars do begin if not (p^ in ['a'..'z']) then good := false; inc(p); end; if counting and good then if pword <> p then begin saved := p^; p^ := #0; inc( wc ); if unique.Find( pword) = nil then unique.Add( pword,''); p^ := saved; end until #0 = p^; end; close(f); writeln( ((time-when)*secsPerDay):0:3 ); WriteLn('Word count:',wc, #10'Unique word count:', unique.Count); end. { Word count: 126944 Unique word count: 11793 } __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com _______________________________________________ fpc-pascal maillist - fpc-pascal@lists.freepascal.org http://lists.freepascal.org/mailman/listinfo/fpc-pascal