Here is a patch that will apply cleanly. It does work for indexing local files.
Thanks. On Sun, Mar 16, 2003 at 11:26:22PM +0100, Robert Ribnitz wrote: > Quoting Norman Jordan <[EMAIL PROTECTED]>: > > > I need a couple of patches applied to htdig for it to be able to index > > the KDE and QT documentation. If you apply the patches, then I will > > sponsor your htdig packages. If not, then I will produce my own htdig > > packages. > > > > The patches are in bugs #113858 and #113857. > > > > Thanks. > > Hello Norman, > > the bugs cited above contain patches, alright. My only problem is that with > those patches some fail, since they were done against an old version (and as > the > changelog suggests, the package has undergone quite some development since you > submitted those patches). Inspection of the source shows that most of it > changed, from the logic point of view. > > As you can see below, patch to 113858 completely fails, and the patch against > htString.h kinda runs only half through. > > It would therefore be helpful if you could send me patches against the current > version of the package (my version is 3.1.6-3, last comment by Stijn dates > feb. > 25,2003) > > Then applying the patches shouldn't be too much of a problem, and I'll do it > as > soon as I have the rights to the package. > > At the moment, Stijn is still officially responsible for the package, but it > looks like I might be able to take over, so I dont send this through the bts. > > looking forward to hearing from you > > Robert > > Output of the patching: > > patch -p1 -u --dry-run --verbose < ../patch_113858.txt > Hmm... Looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.x/htdig/HTML.cc htdig3.1.x.patched/htdig/HTML.cc > |--- htdig3.1.x/htdig/HTML.cc Fri Dec 3 20:15:28 1999 > |+++ htdig3.1.x.patched/htdig/HTML.cc Mon Jan 10 23:26:40 2000 > -------------------------- > Patching file htdig/HTML.cc using Plan A... > Hunk #1 FAILED at 652. > Hunk #2 FAILED at 910. > 2 out of 2 hunks FAILED -- saving rejects to file htdig/HTML.cc.rej > Hmm... Ignoring the trailing garbage. > done > > patch -p1 -u --dry-run --verbose < ../patch_113857.txt > Hmm... Looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.6/htcommon/DocumentDB.cc > htdig3.1.6.patched/htcommon/Documen > tDB.cc > |--- htdig3.1.6/htcommon/DocumentDB.cc Wed Jan 27 03:52:08 1999 > |+++ htdig3.1.6.patched/htcommon/DocumentDB.cc Mon Jan 10 23:24:33 2000 > -------------------------- > Patching file htcommon/DocumentDB.cc using Plan A... > Hunk #1 succeeded at 231 (offset 14 lines). > Hunk #2 succeeded at 420 (offset 135 lines). > Hmm... The next patch looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.6/htdig/Retriever.cc htdig3.1.6.patched/htdig/Retriever.cc > |--- htdig3.1.6/htdig/Retriever.cc Fri Dec 3 21:11:02 1999 > |+++ htdig3.1.6.patched/htdig/Retriever.cc Mon Jan 10 23:24:33 2000 > -------------------------- > Patching file htdig/Retriever.cc using Plan A... > Hunk #1 succeeded at 679 (offset 11 lines). > Hmm... The next patch looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.6/htdig/htdig.cc htdig3.1.6.patched/htdig/htdig.cc > |--- htdig3.1.6/htdig/htdig.cc Tue Dec 7 01:26:46 1999 > |+++ htdig3.1.6.patched/htdig/htdig.cc Mon Jan 10 23:31:12 2000 > -------------------------- > Patching file htdig/htdig.cc using Plan A... > Hunk #1 succeeded at 255 with fuzz 2 (offset 7 lines). > Hmm... The next patch looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.6/htlib/String.cc htdig3.1.6.patched/htlib/String.cc > |--- htdig3.1.6/htlib/String.cc Sat Nov 27 00:59:26 1999 > |+++ htdig3.1.6.patched/htlib/String.cc Mon Jan 10 23:24:33 2000 > -------------------------- > Patching file htlib/String.cc using Plan A... > Hunk #1 succeeded at 634 with fuzz 2 (offset 13 lines). > Hmm... The next patch looks like a unified diff to me... > The text leading up to this was: > -------------------------- > |diff -rNu htdig3.1.6/htlib/htString.h htdig3.1.6.patched/htlib/htString.h > |--- htdig3.1.6/htlib/htString.h Mon Feb 1 07:02:26 1999 > |+++ htdig3.1.6.patched/htlib/htString.h Mon Jan 10 23:24:33 2000 > -------------------------- > Patching file htlib/htString.h using Plan A... > Hunk #1 FAILED at 10. > Hunk #2 succeeded at 151 with fuzz 2 (offset 12 lines). > 1 out of 2 hunks FAILED -- saving rejects to file htlib/htString.h.rej > Hmm... Ignoring the trailing garbage. > done > > > > -- Norman Jordan <[EMAIL PROTECTED]>
diff -rNu htdig-3.1.6.old/htcommon/DocumentDB.cc htdig-3.1.6/htcommon/DocumentDB.cc --- htdig-3.1.6.old/htcommon/DocumentDB.cc 2002-01-31 15:47:17.000000000 -0800 +++ htdig-3.1.6/htcommon/DocumentDB.cc 2003-03-18 09:03:02.000000000 -0800 @@ -231,7 +231,8 @@ while ((key = dbf->Get_Next())) { dbf->Get(key, data); - if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0) + if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 || + strncmp(HtURLCodec::instance()->decode(key), "file:", 5) == 0) { ref = new DocumentRef; ref->Deserialize(data); @@ -419,7 +420,8 @@ while ((coded_key = dbf->Get_Next())) { String key = HtURLCodec::instance()->decode(coded_key); - if (mystrncasecmp(key, "http:", 5) == 0) + if (mystrncasecmp(key, "http:", 5) == 0 || + mystrncasecmp(key, "file:", 5) == 0) { DocumentRef *ref = (*this)[key]; if (ref) diff -rNu htdig-3.1.6.old/htdig/Retriever.cc htdig-3.1.6/htdig/Retriever.cc --- htdig-3.1.6.old/htdig/Retriever.cc 2002-01-31 15:47:17.000000000 -0800 +++ htdig-3.1.6/htdig/Retriever.cc 2003-03-18 09:03:02.000000000 -0800 @@ -679,7 +679,7 @@ // Currently, we only deal with HTTP URLs. Gopher and ftp will // come later... ***FIX*** // - if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0) + if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "file://", 7) != 0)) { if (debug > 2) cout << endl <<" Rejected: Not an http or relative link!"; diff -rNu htdig-3.1.6.old/htdig/htdig.cc htdig-3.1.6/htdig/htdig.cc --- htdig-3.1.6.old/htdig/htdig.cc 2002-01-31 15:47:17.000000000 -0800 +++ htdig-3.1.6/htdig/htdig.cc 2003-03-18 09:03:02.000000000 -0800 @@ -291,7 +291,20 @@ if (minimalFile.length() == 0) { List *list = docs.URLs(); + + if (optind < ac && !strcmp(av[optind], "-")) + { + String tmp; + while (cin >> tmp) + { + if (tmp.length() != 0) + retriever.Initial(tmp, 1); + } + } else + { retriever.Initial(*list); + } + delete list; // Add start_url to the initial list of the retriever. diff -rNu htdig-3.1.6.old/htlib/String.cc htdig-3.1.6/htlib/String.cc --- htdig-3.1.6.old/htlib/String.cc 2002-01-31 15:47:17.000000000 -0800 +++ htdig-3.1.6/htlib/String.cc 2003-03-18 09:03:02.000000000 -0800 @@ -634,6 +634,43 @@ " Data: " << ((void*) Data) << " '" << *this << "'\n"; } +istream & +operator >> (istream &in, String &line) +{ + line.Length = 0; + line.allocate_fix_space(2048); + + while (in.get(line.Data + line.Length, line.Allocated - line.Length)) + { + line.Length += strlen(line.Data + line.Length); + int c = in.get(); + if (c == '\n') + { + // + // A full line has been read. Return it. + // + break; + } + if (line.Allocated > line.Length + 2) + { + // + // Not all available space filled. Probably EOF? + // + + line.Data[line.Length++] = char(c); + continue; + } + // + // Only a partial line was read. Increase available space in + // string and read some more. + // + + line.reallocate_space(line.Allocated << 1); + line.Data[line.Length++] = char(c); + } + + return in; +} int String::readLine(FILE *in) { diff -rNu htdig-3.1.6.old/htlib/htString.h htdig-3.1.6/htlib/htString.h --- htdig-3.1.6.old/htlib/htString.h 2002-01-31 15:47:17.000000000 -0800 +++ htdig-3.1.6/htlib/htString.h 2003-03-18 09:03:02.000000000 -0800 @@ -150,6 +150,7 @@ friend int operator >= (String &a, String &b); friend ostream &operator << (ostream &o, String &s); + friend istream &operator >> (istream &in, String &line); int readLine(FILE *in);