commit 7d8c1fcf644bd921736f61f8f4461255e4325cbb
Author: Kornel Benko
Date: Wed Oct 30 11:08:31 2024 +0100
Cmake url tests: try to check also some modified urls
If the url contains 'ctan', try to determine the correct url depending
of the components to deside if using 'https://www.ctan.org' or rather
https://mirrors.ctan.org.
Without 'ctan' try to check https:// instead of ftp:// or http://
---
development/checkurls/CheckURL.pm | 58 ++-
development/checkurls/knownToRegisterURLS | 1 -
development/checkurls/search_url.pl | 45 +++-
3 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/development/checkurls/CheckURL.pm
b/development/checkurls/CheckURL.pm
index 15839410f3..b2eec3adfa 100755
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@@ -19,7 +19,7 @@ our (@EXPORT, @ISA);
BEGIN {
use Exporter ();
@ISA= qw(Exporter);
- @EXPORT = qw(check_url);
+ @EXPORT = qw(check_url constructExtraTestUrl);
}
# Prototypes
@@ -260,6 +260,7 @@ sub check_unknown_url() {
# Main entry
sub check_url() {
my ($url, $use_curl, $fex, $fsx) = @_;
+ $url =~ s/%20/ /g;
$fe = $fex;
$fs = $fsx;
my $file = undef;
@@ -308,4 +309,59 @@ sub check_url() {
}
}
+sub constructExtraTestUrl($) {
+ my ($url) = @_;
+
+ my $urlok = $url;
+ my $protokol;
+ if ($urlok =~ s/^(ftp|https?):\/\///) {
+$protokol = $1;
+if ($protokol eq 'http') {
+ $protokol = 'https';
+}
+if (($protokol eq 'ftp') && ($urlok =~ /ctan/)) {
+ $protokol = 'https';
+}
+ }
+ $urlok =~ s/^([^\/]+)//;
+ my $server = $1;
+ $urlok =~ s/^\///;
+ if ($server =~ /ctan/) {
+$urlok =~ s/\/\/+/\//g;
+$urlok =~ s/^ctan\///;
+if ($urlok =~ /[\w][.](pdf|html|dvi)$/) {
+ if ($urlok =~ s/^(tex-archive|CTAN)\///) {
+$server = 'mirrors.ctan.org';
+ }
+ elsif ($urlok =~ /(pgf)\//) {
+$server = 'www.ctan.org';
+ }
+}
+else {
+ if ($urlok =~ s/\/$//) {
+$server = 'www.cpan.org';
+if ($urlok ne '') {
+ if ("$urlok/" =~
+
/^(biblio|bibliography|digest|documentation|dviware|fonts|graphics|help|indexing|info|install|languages?|macros|obsolete|support|systems|tds|usergrps|web)\//
+ )
+ {
+$urlok = 'tex-archive/' . $urlok;
+ }
+ if ("$urlok/" !~ /^(pkg|topic|tex-archive|author)\//) {
+die("");
+ }
+}
+ }
+}
+ }
+ my $url2;
+ if ($urlok eq '') {
+$url2 = "$protokol://$server";
+ }
+ else {
+$url2 = "$protokol://$server/$urlok";
+ }
+ return($url2);
+}
+
1;
diff --git a/development/checkurls/knownToRegisterURLS
b/development/checkurls/knownToRegisterURLS
index 6d75c27446..80e2e9d4c2 100644
--- a/development/checkurls/knownToRegisterURLS
+++ b/development/checkurls/knownToRegisterURLS
@@ -11,7 +11,6 @@
https://texample.net/media/tikz/examples/TEX/free-body-diagrams.tex
# Urls probably exist, but to check
# we need to register and login first
-http://www.issn.org/en/node/344
http://www.springer.de/author/tex/help-journals.html
http://www.wkap.nl/jrnllist.htm/JRNLHOME
http://www.wkap.nl/kaphtml.htm/STYLEFILES
diff --git a/development/checkurls/search_url.pl
b/development/checkurls/search_url.pl
index 24bc275a7f..a1a7497fb3 100755
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@@ -30,6 +30,7 @@
# (c) 2013 Scott Kostyshak
use strict;
+use warnings;
BEGIN {
use File::Spec;
@@ -38,7 +39,6 @@ BEGIN {
unshift(@INC, "$p");
}
-use warnings;
use Cwd qw(abs_path);
use CheckURL;
use Try::Tiny;
@@ -46,6 +46,8 @@ use locale;
use POSIX qw(locale_h);
use Readonly;
+binmode(STDOUT, ":encoding(UTF-8)");
+
Readonly::Scalar my $NR_JOBS => 10;
setlocale(LC_CTYPE,"");
@@ -71,6 +73,7 @@ my %revertedURLS= ();
my %extraURLS = ();
my %selectedURLS= ();
my %knownToRegisterURLS = ();
+my %extraTestURLS = ();
my $summaryFile = undef;
my $checkSelectedOnly = 0;
@@ -80,7 +83,7 @@ for my $arg (@ARGV) {
if ($type eq "filesToScan") {
#The file should be a list of files to search in
-if (open(FLIST, $val)) {
+if (open(FLIST, '<', $val)) {
while (my $l = ) {
chomp($l);
parse_file($l);
@@ -105,7 +108,7 @@ for my $arg (@ARGV) {
readUrls($val, %knownToRegisterURLS);
}
elsif ($type eq "summaryFile") {
-if (open(SFO, '>', "$val")) {
+if (open(SFO, '>:encoding(UTF8)', "$val")) {
$summaryFile = $val;
}
}
@@ -143,10 +146,15 @@ for my $u (@urls) {
next if ($checkSelectedOnly && !defined($selectedURLS{$u}));
$URLScount++;
push(@testvals, {u => $u, use_curl => $use_curl,});
- if ($u =~ s/^http:/https:/) {
-if (!defined($selectedURLS{$u})) {# check also the corresponging
'https:' url
- push(@testvals, {