commit 7d8c1fcf644bd921736f61f8f4461255e4325cbb Author: Kornel Benko <kor...@lyx.org> Date: Wed Oct 30 11:08:31 2024 +0100
Cmake url tests: try to check also some modified urls If the url contains 'ctan', try to determine the correct url depending of the components to deside if using 'https://www.ctan.org' or rather https://mirrors.ctan.org. Without 'ctan' try to check https:// instead of ftp:// or http:// --- development/checkurls/CheckURL.pm | 58 ++++++++++++++++++++++++++++++- development/checkurls/knownToRegisterURLS | 1 - development/checkurls/search_url.pl | 45 +++++++++++++++--------- 3 files changed, 85 insertions(+), 19 deletions(-) diff --git a/development/checkurls/CheckURL.pm b/development/checkurls/CheckURL.pm index 15839410f3..b2eec3adfa 100755 --- a/development/checkurls/CheckURL.pm +++ b/development/checkurls/CheckURL.pm @@ -19,7 +19,7 @@ our (@EXPORT, @ISA); BEGIN { use Exporter (); @ISA = qw(Exporter); - @EXPORT = qw(check_url); + @EXPORT = qw(check_url constructExtraTestUrl); } # Prototypes @@ -260,6 +260,7 @@ sub check_unknown_url($$$$) { # Main entry sub check_url($$$$) { my ($url, $use_curl, $fex, $fsx) = @_; + $url =~ s/%20/ /g; $fe = $fex; $fs = $fsx; my $file = undef; @@ -308,4 +309,59 @@ sub check_url($$$$) { } } +sub constructExtraTestUrl($) { + my ($url) = @_; + + my $urlok = $url; + my $protokol; + if ($urlok =~ s/^(ftp|https?):\/\///) { + $protokol = $1; + if ($protokol eq 'http') { + $protokol = 'https'; + } + if (($protokol eq 'ftp') && ($urlok =~ /ctan/)) { + $protokol = 'https'; + } + } + $urlok =~ s/^([^\/]+)//; + my $server = $1; + $urlok =~ s/^\///; + if ($server =~ /ctan/) { + $urlok =~ s/\/\/+/\//g; + $urlok =~ s/^ctan\///; + if ($urlok =~ /[\w][.](pdf|html|dvi)$/) { + if ($urlok =~ s/^(tex-archive|CTAN)\///) { + $server = 'mirrors.ctan.org'; + } + elsif ($urlok =~ /(pgf)\//) { + $server = 'www.ctan.org'; + } + } + else { + if ($urlok =~ s/\/$//) { + $server = 'www.cpan.org'; + if ($urlok ne '') { + if ("$urlok/" =~ + /^(biblio|bibliography|digest|documentation|dviware|fonts|graphics|help|indexing|info|install|languages?|macros|obsolete|support|systems|tds|usergrps|web)\// + ) + { + $urlok = 'tex-archive/' . $urlok; + } + if ("$urlok/" !~ /^(pkg|topic|tex-archive|author)\//) { + die(""); + } + } + } + } + } + my $url2; + if ($urlok eq '') { + $url2 = "$protokol://$server"; + } + else { + $url2 = "$protokol://$server/$urlok"; + } + return($url2); +} + 1; diff --git a/development/checkurls/knownToRegisterURLS b/development/checkurls/knownToRegisterURLS index 6d75c27446..80e2e9d4c2 100644 --- a/development/checkurls/knownToRegisterURLS +++ b/development/checkurls/knownToRegisterURLS @@ -11,7 +11,6 @@ https://texample.net/media/tikz/examples/TEX/free-body-diagrams.tex # Urls probably exist, but to check # we need to register and login first -http://www.issn.org/en/node/344 http://www.springer.de/author/tex/help-journals.html http://www.wkap.nl/jrnllist.htm/JRNLHOME http://www.wkap.nl/kaphtml.htm/STYLEFILES diff --git a/development/checkurls/search_url.pl b/development/checkurls/search_url.pl index 24bc275a7f..a1a7497fb3 100755 --- a/development/checkurls/search_url.pl +++ b/development/checkurls/search_url.pl @@ -30,6 +30,7 @@ # (c) 2013 Scott Kostyshak <skot...@lyx.org> use strict; +use warnings; BEGIN { use File::Spec; @@ -38,7 +39,6 @@ BEGIN { unshift(@INC, "$p"); } -use warnings; use Cwd qw(abs_path); use CheckURL; use Try::Tiny; @@ -46,6 +46,8 @@ use locale; use POSIX qw(locale_h); use Readonly; +binmode(STDOUT, ":encoding(UTF-8)"); + Readonly::Scalar my $NR_JOBS => 10; setlocale(LC_CTYPE, ""); @@ -71,6 +73,7 @@ my %revertedURLS = (); my %extraURLS = (); my %selectedURLS = (); my %knownToRegisterURLS = (); +my %extraTestURLS = (); my $summaryFile = undef; my $checkSelectedOnly = 0; @@ -80,7 +83,7 @@ for my $arg (@ARGV) { if ($type eq "filesToScan") { #The file should be a list of files to search in - if (open(FLIST, $val)) { + if (open(FLIST, '<', $val)) { while (my $l = <FLIST>) { chomp($l); parse_file($l); @@ -105,7 +108,7 @@ for my $arg (@ARGV) { readUrls($val, %knownToRegisterURLS); } elsif ($type eq "summaryFile") { - if (open(SFO, '>', "$val")) { + if (open(SFO, '>:encoding(UTF8)', "$val")) { $summaryFile = $val; } } @@ -143,10 +146,15 @@ for my $u (@urls) { next if ($checkSelectedOnly && !defined($selectedURLS{$u})); $URLScount++; push(@testvals, {u => $u, use_curl => $use_curl,}); - if ($u =~ s/^http:/https:/) { - if (!defined($selectedURLS{$u})) { # check also the corresponging 'https:' url - push(@testvals, {u => $u, use_curl => $use_curl, extra => 1,}); - $URLScount++; + my $uorig = $u; + $u = constructExtraTestUrl($uorig); + if ($u ne $uorig) { + if (!defined($selectedURLS{$u})) { + if (!defined($extraTestURLS{$u})) { + $extraTestURLS{$u} = 1; # omit multiple tests + push(@testvals, {u => $u, use_curl => $use_curl, extra => 1}); + $URLScount++; + } } } } @@ -206,7 +214,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of subprocesses my $use_curl = $rentry->{use_curl}; my $extra = defined($rentry->{extra}); - print $fe "Checking($entryidx-$subprocess) '$u': "; + print $fe "Checking($entryidx-$subprocess) '$u': time=" . time() . ' '; my ($res, $prnt, $outSum); try { $res = check_url($u, $use_curl, $fe, $fs); @@ -248,6 +256,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of subprocesses else { my $succes; if ($extra) { + # This url is created $succes = "Extra_OK url:"; } else { @@ -274,7 +283,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of subprocesses sub readsublog($) { my ($i) = @_; - open(my $fe, '<', "$tempdir/xxxError$i"); + open(my $fe, '<:encoding(UTF-8)', "$tempdir/xxxError$i"); while (my $l = <$fe>) { if ($l =~ /^NumberOfErrors\s(\d+)/) { $errorcount += $1; @@ -345,29 +354,32 @@ sub printNotUsedURLS($\%) { } } if (@msg) { - print "\n$txt URLs not found in sources: " . join(' ', @msg) . "\n"; + print "\n$txt URLs: " . join(' ', @msg) . "\n"; } } sub replaceSpecialChar($) { my ($l) = @_; $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/$2/; + $l =~ s/ /%20/g; return ($l); } sub readUrls($\%) { my ($file, $rUrls) = @_; - die("Could not read file $file") if (!open(ULIST, $file)); + die("Could not read file $file") if (!open(ULIST, '<:encoding(UTF-8)', $file)); + print "Read urls from $file\n"; my $line = 0; while (my $l = <ULIST>) { $line++; - $l =~ s/[\r\n]+$//; # remove eol - $l =~ s/\s*\#.*$//; # remove comment - $l = &replaceSpecialChar($l); + chomp($l); # remove eol + $l =~ s/^\s+//; + next if ($l =~ /^\#/); # discard comment lines next if ($l eq ""); + $l = &replaceSpecialChar($l); my $use_curl = 0; - if ($l =~ s/^\s*UseCurl\s*//) { + if ($l =~ s/^UseCurl\s*//) { $use_curl = 1; } if (!defined($rUrls->{$l})) { @@ -382,13 +394,12 @@ sub parse_file($) { my $status = "out"; # outside of URL/href #return if ($f =~ /\/attic\//); - if (open(FI, $f)) { + if (open(FI, '<:encoding(UTF-8)', $f)) { my $line = 0; while (my $l = <FI>) { $line++; chomp($l); - # $l =~ s/[\r\n]+$//; # Simulate chomp if ($status eq "out") { # searching for "\begin_inset Flex URL" -- lyx-cvs mailing list lyx-cvs@lists.lyx.org https://lists.lyx.org/mailman/listinfo/lyx-cvs