Package: perl-modules Version: 5.10.0-11.1 Severity: normal Tags: patch
Note: this bug has already been reported upstreams and I've submitted my patch there as well. http://rt.cpan.org/Public/Bug/Display.html?id=17441 There are a number of issues with the way the CGI.pm constructs script_name() to work-around an alledged bug in Apache. If /path/to/script.cgi?x=// is requested, script_name() returns /path/to/script.cgi?x=// instead of /path/to/script.cgi. That is known to break gnatsweb.pl at least. If /path/to/script.cgi/script.cgi is requested, script_name() returns /path/to instead of /path/to/script.cgi The bug was introduced in CGI.pm 3.11. I'm attaching a patch. In my opinion, the patch still isn't the right thing to do as I don't think Apache behavior is a bug, but because we may not want to break scripts that wrongly relied on dupplicated "/" to be preserved, it tries to accomodate with them. The patch includes a comment that tries to clarify why we do that which I reproduce here: # This function returns a potentially modified version of SCRIPT_NAME # and PATH_INFO. Some HTTP servers do sanitise the paths in those # variables. It is the case of at least Apache 2. If for instance the # user requests: /path/./to/script.cgi/x//y/z/../x?y, Apache will set: # REQUEST_URI=/path/./to/script.cgi/x//y/z/../x?y # SCRIPT_NAME=/path/to/env.cgi # PATH_INFO=/x/y/x # # This is all fine except that some bogus CGI scripts expect # PATH_INFO=/http://foo when the user requests # http://xxx/script.cgi/http://foo # # Old versions of this module used to accomodate with those scripts, so # this is why we do this here to keep those scripts backward compatible. # Basically, we accomodate with those scripts but within limits, that is # we only try to preserve the number of / that were provided by the user # if $REQUEST_URI and "$SCRIPT_NAME$PATH_INFO" only differ by the number # of consecutive /. # # So for instance, in: http://foo/x//y/script.cgi/a//b, we'll return a # script_name of /x//y/script.cgi and a path_info of /a//b, but in: # http://foo/./x//z/script.cgi/a/../b//c, we'll return the versions # possibly sanitised by the HTTP server, so in the case of Apache 2: # script_name == /foo/x/z/script.cgi and path_info == /b/c. # # Future versions of this module may no longer do that, so one should # avoid relying on the browser, proxy, server, and CGI.pm preserving the # number of consecutive slashes as no guarantee can be made there. -- System Information: Debian Release: lenny/sid APT prefers unstable APT policy: (500, 'unstable') Architecture: i386 (i686) Kernel: Linux 2.6.26 (PREEMPT) Locale: LANG=en_GB.ISO-8859-15, LC_CTYPE=en_US.ISO-8859-15 (charmap=ISO-8859-15) Shell: /bin/sh linked to /bin/bash Versions of packages perl-modules depends on: ii perl 5.10.0-11.1 Larry Wall's Practical Extraction perl-modules recommends no packages. perl-modules suggests no packages. -- debconf-show failed
--- CGI.pm.orig 2008-07-29 16:00:05.000000000 +0100 +++ CGI.pm 2008-08-06 08:31:04.000000000 +0100 @@ -2849,30 +2849,58 @@ } END_OF_FUNC -# WE USE THIS TO COMPENSATE FOR A BUG IN APACHE 2 PRESENT AT LEAST UP THROUGH 2.0.54 +# This function returns a potentially modified version of SCRIPT_NAME +# and PATH_INFO. Some HTTP servers do sanitise the paths in those +# variables. It is the case of at least Apache 2. If for instance the +# user requests: /path/./to/script.cgi/x//y/z/../x?y, Apache will set: +# REQUEST_URI=/path/./to/script.cgi/x//y/z/../x?y +# SCRIPT_NAME=/path/to/env.cgi +# PATH_INFO=/x/y/x +# +# This is all fine except that some bogus CGI scripts expect +# PATH_INFO=/http://foo when the user requests +# http://xxx/script.cgi/http://foo +# +# Old versions of this module used to accomodate with those scripts, so +# this is why we do this here to keep those scripts backward compatible. +# Basically, we accomodate with those scripts but within limits, that is +# we only try to preserve the number of / that were provided by the user +# if $REQUEST_URI and "$SCRIPT_NAME$PATH_INFO" only differ by the number +# of consecutive /. +# +# So for instance, in: http://foo/x//y/script.cgi/a//b, we'll return a +# script_name of /x//y/script.cgi and a path_info of /a//b, but in: +# http://foo/./x//z/script.cgi/a/../b//c, we'll return the versions +# possibly sanitised by the HTTP server, so in the case of Apache 2: +# script_name == /foo/x/z/script.cgi and path_info == /b/c. +# +# Future versions of this module may no longer do that, so one should +# avoid relying on the browser, proxy, server, and CGI.pm preserving the +# number of consecutive slashes as no guarantee can be made there. '_name_and_path_from_env' => <<'END_OF_FUNC', sub _name_and_path_from_env { - my $self = shift; - my $raw_script_name = $ENV{SCRIPT_NAME} || ''; - my $raw_path_info = $ENV{PATH_INFO} || ''; - my $uri = unescape($self->request_uri) || ''; - - my $protected = quotemeta($raw_path_info); - $raw_script_name =~ s/$protected$//; - - my @uri_double_slashes = $uri =~ m^(/{2,}?)^g; - my @path_double_slashes = "$raw_script_name $raw_path_info" =~ m^(/{2,}?)^g; - - my $apache_bug = @uri_double_slashes != @path_double_slashes; - return ($raw_script_name,$raw_path_info) unless $apache_bug; - - my $path_info_search = quotemeta($raw_path_info); - $path_info_search =~ s!/!/+!g; - if ($uri =~ m/^(.+)($path_info_search)/) { - return ($1,$2); - } else { - return ($raw_script_name,$raw_path_info); - } + my $self = shift; + my $script_name = $ENV{SCRIPT_NAME} || ''; + my $path_info = $ENV{PATH_INFO} || ''; + my $uri = $self->request_uri || ''; + + $uri =~ s/\?.*//s; + $uri = unescape($uri); + + if ($uri ne "$script_name$path_info") { + my $script_name_pattern = quotemeta($script_name); + my $path_info_pattern = quotemeta($path_info); + $script_name_pattern =~ s{(?:\\/)+}{/+}g; + $path_info_pattern =~ s{(?:\\/)+}{/+}g; + + if ($uri =~ /^($script_name_pattern)($path_info_pattern)$/s) { + # REQUEST_URI and SCRIPT_NAME . PATH_INFO only differ by the + # numer of consecutive slashes, so we can extract the info from + # REQUEST_URI: + ($script_name, $path_info) = ($1, $2); + } + } + return ($script_name,$path_info); } END_OF_FUNC