commit 7d8c1fcf644bd921736f61f8f4461255e4325cbb
Author: Kornel Benko <kor...@lyx.org>
Date:   Wed Oct 30 11:08:31 2024 +0100
    Cmake url tests: try to check also some modified urls
    
    If the url contains 'ctan', try to determine the correct url depending
    of the components to deside if using 'https://www.ctan.org' or rather
    https://mirrors.ctan.org.
    Without 'ctan' try to check https:// instead of ftp:// or http://
---
 development/checkurls/CheckURL.pm         | 58 ++++++++++++++++++++++++++++++-
 development/checkurls/knownToRegisterURLS |  1 -
 development/checkurls/search_url.pl       | 45 +++++++++++++++---------
 3 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/development/checkurls/CheckURL.pm 
b/development/checkurls/CheckURL.pm
index 15839410f3..b2eec3adfa 100755
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@@ -19,7 +19,7 @@ our (@EXPORT, @ISA);
 BEGIN {
   use Exporter ();
   @ISA    = qw(Exporter);
-  @EXPORT = qw(check_url);
+  @EXPORT = qw(check_url constructExtraTestUrl);
 }
 
 # Prototypes
@@ -260,6 +260,7 @@ sub check_unknown_url($$$$) {
 # Main entry
 sub check_url($$$$) {
   my ($url, $use_curl, $fex, $fsx) = @_;
+  $url =~ s/%20/ /g;
   $fe = $fex;
   $fs = $fsx;
   my $file = undef;
@@ -308,4 +309,59 @@ sub check_url($$$$) {
   }
 }
 
+sub constructExtraTestUrl($) {
+  my ($url) = @_;
+
+  my $urlok = $url;
+  my $protokol;
+  if ($urlok =~ s/^(ftp|https?):\/\///) {
+    $protokol = $1;
+    if ($protokol eq 'http') {
+      $protokol = 'https';
+    }
+    if (($protokol eq 'ftp') && ($urlok =~ /ctan/)) {
+      $protokol = 'https';
+    }
+  }
+  $urlok =~ s/^([^\/]+)//;
+  my $server = $1;
+  $urlok =~ s/^\///;
+  if ($server =~ /ctan/) {
+    $urlok =~ s/\/\/+/\//g;
+    $urlok =~ s/^ctan\///;
+    if ($urlok =~ /[\w][.](pdf|html|dvi)$/) {
+      if ($urlok =~ s/^(tex-archive|CTAN)\///) {
+        $server = 'mirrors.ctan.org';
+      }
+      elsif ($urlok =~ /(pgf)\//) {
+        $server = 'www.ctan.org';
+      }
+    }
+    else {
+      if ($urlok =~ s/\/$//) {
+        $server = 'www.cpan.org';
+        if ($urlok ne '') {
+          if ("$urlok/" =~
+              
/^(biblio|bibliography|digest|documentation|dviware|fonts|graphics|help|indexing|info|install|languages?|macros|obsolete|support|systems|tds|usergrps|web)\//
+          )
+          {
+            $urlok = 'tex-archive/' . $urlok;
+          }
+          if ("$urlok/" !~ /^(pkg|topic|tex-archive|author)\//) {
+            die("");
+          }
+        }
+      }
+    }
+  }
+  my $url2;
+  if ($urlok eq '') {
+    $url2 = "$protokol://$server";
+  }
+  else {
+    $url2 = "$protokol://$server/$urlok";
+  }
+  return($url2);
+}
+
 1;
diff --git a/development/checkurls/knownToRegisterURLS 
b/development/checkurls/knownToRegisterURLS
index 6d75c27446..80e2e9d4c2 100644
--- a/development/checkurls/knownToRegisterURLS
+++ b/development/checkurls/knownToRegisterURLS
@@ -11,7 +11,6 @@ 
https://texample.net/media/tikz/examples/TEX/free-body-diagrams.tex
 
 # Urls probably exist, but to check
 # we need to register and login first
-http://www.issn.org/en/node/344
 http://www.springer.de/author/tex/help-journals.html
 http://www.wkap.nl/jrnllist.htm/JRNLHOME
 http://www.wkap.nl/kaphtml.htm/STYLEFILES
diff --git a/development/checkurls/search_url.pl 
b/development/checkurls/search_url.pl
index 24bc275a7f..a1a7497fb3 100755
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@@ -30,6 +30,7 @@
 #           (c) 2013 Scott Kostyshak <skot...@lyx.org>
 
 use strict;
+use warnings;
 
 BEGIN {
   use File::Spec;
@@ -38,7 +39,6 @@ BEGIN {
   unshift(@INC, "$p");
 }
 
-use warnings;
 use Cwd qw(abs_path);
 use CheckURL;
 use Try::Tiny;
@@ -46,6 +46,8 @@ use locale;
 use POSIX qw(locale_h);
 use Readonly;
 
+binmode(STDOUT, ":encoding(UTF-8)");
+
 Readonly::Scalar my $NR_JOBS => 10;
 
 setlocale(LC_CTYPE,    "");
@@ -71,6 +73,7 @@ my %revertedURLS        = ();
 my %extraURLS           = ();
 my %selectedURLS        = ();
 my %knownToRegisterURLS = ();
+my %extraTestURLS       = ();
 my $summaryFile         = undef;
 
 my $checkSelectedOnly = 0;
@@ -80,7 +83,7 @@ for my $arg (@ARGV) {
   if ($type eq "filesToScan") {
 
     #The file should be a list of files to search in
-    if (open(FLIST, $val)) {
+    if (open(FLIST, '<', $val)) {
       while (my $l = <FLIST>) {
         chomp($l);
         parse_file($l);
@@ -105,7 +108,7 @@ for my $arg (@ARGV) {
     readUrls($val, %knownToRegisterURLS);
   }
   elsif ($type eq "summaryFile") {
-    if (open(SFO, '>', "$val")) {
+    if (open(SFO, '>:encoding(UTF8)', "$val")) {
       $summaryFile = $val;
     }
   }
@@ -143,10 +146,15 @@ for my $u (@urls) {
   next if ($checkSelectedOnly && !defined($selectedURLS{$u}));
   $URLScount++;
   push(@testvals, {u => $u, use_curl => $use_curl,});
-  if ($u =~ s/^http:/https:/) {
-    if (!defined($selectedURLS{$u})) {    # check also the corresponging 
'https:' url
-      push(@testvals, {u => $u, use_curl => $use_curl, extra => 1,});
-      $URLScount++;
+  my $uorig = $u;
+  $u = constructExtraTestUrl($uorig);
+  if ($u ne $uorig) {
+    if (!defined($selectedURLS{$u})) {
+      if (!defined($extraTestURLS{$u})) {
+        $extraTestURLS{$u} = 1; # omit multiple tests
+        push(@testvals, {u => $u, use_curl => $use_curl, extra => 1});
+        $URLScount++;
+      }
     }
   }
 }
@@ -206,7 +214,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) {    # Number of 
subprocesses
         my $use_curl = $rentry->{use_curl};
         my $extra    = defined($rentry->{extra});
 
-        print $fe "Checking($entryidx-$subprocess) '$u': ";
+        print $fe "Checking($entryidx-$subprocess) '$u': time=" . time() . ' ';
         my ($res, $prnt, $outSum);
         try {
           $res = check_url($u, $use_curl, $fe, $fs);
@@ -248,6 +256,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) {    # Number of 
subprocesses
         else {
           my $succes;
           if ($extra) {
+            # This url is created
             $succes = "Extra_OK url:";
           }
           else {
@@ -274,7 +283,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) {    # Number of 
subprocesses
 
 sub readsublog($) {
   my ($i) = @_;
-  open(my $fe, '<', "$tempdir/xxxError$i");
+  open(my $fe, '<:encoding(UTF-8)', "$tempdir/xxxError$i");
   while (my $l = <$fe>) {
     if ($l =~ /^NumberOfErrors\s(\d+)/) {
       $errorcount += $1;
@@ -345,29 +354,32 @@ sub printNotUsedURLS($\%) {
     }
   }
   if (@msg) {
-    print "\n$txt URLs not found in sources: " . join(' ', @msg) . "\n";
+    print "\n$txt URLs: " . join(' ', @msg) . "\n";
   }
 }
 
 sub replaceSpecialChar($) {
   my ($l) = @_;
   $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/$2/;
+  $l =~ s/ /%20/g;
   return ($l);
 }
 
 sub readUrls($\%) {
   my ($file, $rUrls) = @_;
 
-  die("Could not read file $file") if (!open(ULIST, $file));
+  die("Could not read file $file") if (!open(ULIST, '<:encoding(UTF-8)', 
$file));
+  print "Read urls from $file\n";
   my $line = 0;
   while (my $l = <ULIST>) {
     $line++;
-    $l =~ s/[\r\n]+$//;    # remove eol
-    $l =~ s/\s*\#.*$//;    # remove comment
-    $l = &replaceSpecialChar($l);
+    chomp($l);                  # remove eol
+    $l =~ s/^\s+//;
+    next if ($l =~ /^\#/);      # discard comment lines
     next if ($l eq "");
+    $l = &replaceSpecialChar($l);
     my $use_curl = 0;
-    if ($l =~ s/^\s*UseCurl\s*//) {
+    if ($l =~ s/^UseCurl\s*//) {
       $use_curl = 1;
     }
     if (!defined($rUrls->{$l})) {
@@ -382,13 +394,12 @@ sub parse_file($) {
   my $status = "out";    # outside of URL/href
 
   #return if ($f =~ /\/attic\//);
-  if (open(FI, $f)) {
+  if (open(FI, '<:encoding(UTF-8)', $f)) {
     my $line = 0;
     while (my $l = <FI>) {
       $line++;
       chomp($l);
 
-      # $l =~ s/[\r\n]+$//;    #  Simulate chomp
       if ($status eq "out") {
 
         # searching for "\begin_inset Flex URL"
-- 
lyx-cvs mailing list
lyx-cvs@lists.lyx.org
https://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to