And there are other problems.
Some doc-base control files list _directories_ in their Files: lines.
While index++ is smart (or dumb?) enough to recurse into them, this
defeats any uniqueness algorithm (either name or inode based) for the
actual files indexed. It seems best to do our own recusrion into
subdirectories, and this is what the index++ manpage recommends when
using "-" on the command line to pass the list of files via a pipe.
Lastly, as a side effect of having directories in the list, some files
which clearly should not be indexed (e.g. *.gif) get in. Again this
could be handled by index++ (modifying /usr/share/dwww/swish++.conf)
but we get more control watching for these patterns right in dwww-index++.
Thw following cummulative patch addresses these issues (together with
the ones listed earlier in the thread).
--- dwww-index++.distrib 2007-12-07 13:04:29.000000000 -0500
+++ dwww-index++ 2007-12-07 14:54:58.000000000 -0500
@@ -45,6 +45,8 @@
# 'ps',
);
+my $stopfiles = qr(\.(css|dsl|gif|jpg|lfig|mp4|sasl)(\.gz)?$);
+
my $dwww_url = "/cgi-bin/dwww";
my $dwww_swish_conf = "/usr/share/dwww/swish++.conf";
my $dwww_swish_index = "/var/cache/dwww/dwww.swish++.index";
@@ -52,12 +54,12 @@
my $dwww_swish_index_tmp = "/var/cache/dwww/dwww.swish++.tmp.index";
my $dwww_swish_index_res = $dwww_swish_index_tmp;
my @files = (); # list of files to index;
-our($opt_v, $opt_f); # set by getopt
+our($opt_v, $opt_f, $opt_l); # set by getopt
my $dwwwconf = &DwwwInitialize("/etc/dwww/dwww.conf");
&DwwwSetupDirs($dwwwconf);
-my @index_command = ('/usr/bin/index++', '--config-file',
"$dwww_swish_conf",
+my @index_command = ('/usr/bin/index++', '--no-recurse', '--config-file',
"$dwww_swish_conf",
'--index-file', "$dwww_swish_index_tmp");
if (! -x $index_command[0]) {
print STDERR "Can't find index++ command.\n";
@@ -65,7 +67,7 @@
exit(1);
}
$Getopt::Std::STANDARD_HELP_VERSION=1;
-&getopts('vf');
+&getopts('vfl');
my $do_index = $dwwwconf->{'DWWW_INDEX_DOCUMENTATION'};
if (!$opt_f and defined $do_index and lc($do_index) eq "no") {
@@ -77,7 +79,7 @@
my $m2h_merge = $dwwwconf->{'DWWW_MERGE_MAN2HTML_INDEX'};
my $m2h_idx_file = '/var/cache/man2html/man2html.swish++.index';
-if (defined $m2h_merge and lc($m2h_merge) eq "yes" and -r $m2h_idx_file) {
+if (!$opt_l and defined $m2h_merge and lc($m2h_merge) eq "yes" and -r
$m2h_idx_file) {
if (copy($m2h_idx_file, $dwww_swish_index_tmp)) {
$dwww_swish_index_res = $dwww_swish_index_tmp . '.new';
push(@index_command, '--incremental');
@@ -96,18 +98,33 @@
&FilesFromDocBaseDir("/usr/share/doc-base");
&FilesFromDocBaseDir("/var/lib/dwww/menu-method");
-print STDERR "Sorting list of files\n" if $opt_v;
[EMAIL PROTECTED] = sort @files;
+my %filenames_hash = ( );
+$filenames_hash{$_} = [ stat ] foreach (@files);
+
+my ($last_d, $last_ino) = (-1, -1);
+if ($opt_l) {
+ LISTFILE:
+ foreach my $f (sort { $filenames_hash{$a}->[0] <=>
$filenames_hash{$b}->[0]
+ or $filenames_hash{$a}->[1] <=>
$filenames_hash{$b}->[1] } (keys %filenames_hash)) {
+ next LISTFILE if $filenames_hash{$f}->[0] == $last_d and
$filenames_hash{$f}->[1] == $last_ino;
+ syswrite STDOUT, "$f\n";
+ ($last_d, $last_ino) = ($filenames_hash{$f}->[0],
$filenames_hash{$f}->[1])
+ }
+ exit 0;
+}
print STDERR "Executing: @index_command\n" if $opt_v;
open (INDEX, '|-')
|| exec { $index_command[0] } @index_command;
-# try to avoid indexing the same file twice
-for (my $i = 0; $i <= $#files; $i++) {
- syswrite INDEX, "$files[$i]\n" unless ($i > 0 and $files[$i] eq
$files[$i - 1]);
+INDEXFILE:
+foreach my $f (sort { $filenames_hash{$a}->[0] <=> $filenames_hash{$b}->[0]
+ or $filenames_hash{$a}->[1] <=> $filenames_hash{$b}->[1] }
(keys %filenames_hash)) {
+ next INDEXFILE if $filenames_hash{$f}->[0] == $last_d and
$filenames_hash{$f}->[1] == $last_ino;
+ syswrite INDEX, "$f\n";
# sleep 150 ms
select(undef, undef, undef, 0.15);
+ ($last_d, $last_ino) = ($filenames_hash{$f}->[0],
$filenames_hash{$f}->[1])
}
close INDEX;
@@ -186,9 +203,21 @@
}
}
- if ($#globbed >= 0) {
- push(@files, @globbed);
- return;
+ while ($#globbed >= 0) {
+ my $d = shift @globbed;
+ if (not -d $d) {
+ next if $d =~ $stopfiles;
+ push(@files, $d);
+
+ }
+ if (not opendir DOCSUBDIR, $d) {
+ print STDERR "Can't open directory $d: $!\n"
if $opt_v, next;
+ }
+ while (my $f = readdir (DOCSUBDIR)) {
+ next if $f =~ /^\./;
+ push(@globbed, "$d/$f");
+ }
+ closedir DOCSUBDIR;
}
}
}
@@ -209,6 +238,7 @@
print STDOUT "Usage: $prog [-v] [-f] [-- swish_option [...]]\n";
print STDOUT " -v be more verbose\n";
print STDOUT " -f build the index even if it's disabled in the
configuration file\n";
+ print STDOUT " -l do not really index, only output the list of
files to index\n";
print STDOUT " -- opt option passed to swish's index++ program\n";
}
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]