Remove Tar archive single top-level directory restriction. Now archives with multiple top-level directories or no top-level directory at all are processed correctly.
Remove the URL regexp restriction. Now it processes any URL, as expected by a local requester. Security concerns raised by remote requests should be handled elsewhere. Define wget as the standard fallback method for processing an URL when no other method is suitable. * gsv-eval-remote.sh (fetch_package): new function. * gsv-eval-remote.sh (process_package): new function. * gsv-eval-remote.sh (TARBALL_*): replaced by DOWNLOAD_*. --- gsv-eval-remote.sh | 163 ++++++++++++++++++++++++++++------------------------- 1 file changed, 85 insertions(+), 78 deletions(-) diff --git a/gsv-eval-remote.sh b/gsv-eval-remote.sh index 14f3534..3298dc6 100755 --- a/gsv-eval-remote.sh +++ b/gsv-eval-remote.sh @@ -1,6 +1,7 @@ #!/bin/sh # Copyright (C) 2014 Assaf Gordon ([email protected]) +# Copyright (C) 2015 Bruno Félix Rezende Ribeiro <[email protected]> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,18 +22,8 @@ ## on a given directory, then generates an HTML report for it. ## -# Size limit of downloadable tarballs (in bytes) -TARBALL_SIZE_LIMIT=10000000 - -# Ugly hack: -# When given a tarball to download, limit the accepted URLs to this -# (very partial) character set. -# Since this script will be used from a website, and users can post -# which even URLs they want, this regex will hopefully avoid some potential -# problems (such as URLs doing 'GET' requests with CGI parameters). -# The downside is that some legitimate URLs will not work (e.g. -# some SourceForge URLs with extra CGI parameters). -TARBALL_REGEX='^(https?|ftp)://[A-Za-z0-9\_\.\/-]*\.tar\.(gz|bz2|xz)$' +# Size limit of downloadable file (in bytes) +DOWNLOAD_SIZE_LIMIT=10000000 OUTPUT_FILE= @@ -49,6 +40,7 @@ usage() BASE=$(basename "$0") echo "GNU-Savannah Evaluation - helper script Copyright (C) 2014 A. Gordon ([email protected]) +Copyright (C) 2015 Bruno Félix Rezende Ribeiro <[email protected]> License: GPLv3-or-later Usage: $BASE [OPTIONS] OUTPUT-HTML PROJECT-NAME SOURCE-URL @@ -56,15 +48,6 @@ Usage: $BASE [OPTIONS] OUTPUT-HTML PROJECT-NAME SOURCE-URL Will download SOURCE-URL, run the gnu-savannal evaluation perl script on the download files, and produce an HTML file named OUTPUT-HTML. -SOURCE-URL can be: - http:// - https:// - ftp:// - git:// - tar.gz - tar.bz2 - tar.xz - Options: -h = show this help screen. @@ -86,9 +69,80 @@ and generate '/tmp/out.html' report: exit 0 } +fetch_package() { + + ## Find size before download + DOWNLOAD_HEAD=$(curl -f --silent -L --insecure --head "$1") \ + || die "Failed to get size of '$1' (using HTTP HEAD)" + DOWNLOAD_SIZE=$(echo "$DOWNLOAD_HEAD" | + tr -d '\r' | + grep Content-Length | + tail -n 1 | + awk '{print $2}' ) \ + || die "failed to get size (content-length) of '$1'" + test -z "$DOWNLOAD_SIZE" \ + && die "failed to get size (content-length) of '$1'" + test "$DOWNLOAD_SIZE" -le "$DOWNLOAD_SIZE_LIMIT" \ + || die "tarball '$1' size too big ($DOWNLOAD_SIZE)," \ + "current limit is $DOWNLOAD_SIZE_LIMIT bytes." + + ## a remote wget-fetchable source + TMP1=$(basename "$1") \ + || die "failed to get basename of '$1'" + wget -q --no-check-certificate -O "$TMP1" "$1" \ + || die "failed to download '$1'" + + echo "$TMP1" +} + +process_package() { + local DIRECTORY + + case $(echo $(file -b --mime-type "$1") $(file -b "$1")) in + *application/gzip*) + gunzip "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-bzip2*) + bunzip2 "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-lzip*) + lzip -d "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-lzma*) + unlzma "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *'lzop compressed data'*) + lzop -d "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-xz*) + unxz "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-compress*) + compress -d "$1" || die "failed to decompress '$1'" + process_package "${1%.*}" + ;; + *application/x-tar*) + DIRECTORY=$(mktemp -d x-tar.XXXXXX) \ + || die "failed to create temporary directory" + tar -xf "$1" -C "$DIRECTORY" || die "failed to extract files from '$1'" + cd "$DIRECTORY" + pwd + ;; + *) + die "there is no known method to process '$1'" + ;; + esac +} + test "x$1" = "x-h" && usage -OUTPUT_HTML=$1 +OUTPUT_HTML=$(realpath $1) PROJECT_NAME=$2 SOURCE=$3 @@ -101,6 +155,7 @@ test -z "$SOURCE" \ touch "$OUTPUT_HTML" \ || die "failed to create output file '$OUTPUT_HTML'" + ## From here on, we can at least log the errors into the output HTML file OUTPUT_FILE="$OUTPUT_HTML" @@ -116,19 +171,8 @@ CSS_FILE="$SCRIPTPATH/gsv-eval.css" test -e "$CSS_FILE" \ || die "CSS file ($CSS_FILE) not found" -# Ugly Hack: -# If given a URL, but one that doesn't match the stricter REGEX, exit -# with a detailed explanation -if echo "$SOURCE" | grep -E -q '^(https?|ftp)://' ; then - if ! echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ; then - die "the given URL ($SOURCE) does not match the stricter URL " \ - " limitations of this script (which are '$TARBALL_REGEX'). " \ - "Consider running this script locally." - fi -fi - ## -## Create temporary directroy to process the file +## Create temporary directory to process the file ## DIRECTORY=$(mktemp -d /tmp/gnu_eval.XXXXXX) \ || die "failed to create temporary directory" @@ -151,59 +195,22 @@ if echo "$SOURCE" | grep -E -q '^git://|\.git$' ; then cd "$SOURCEDIR" \ || die "failed to CD into source directory '$SOURCEDIR' " \ "(based on 'git clone $SOURCE')" - -elif echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ; - then - ## - ## a Tarball source - ## - - ## Find size before download - TARBALL_HEAD=$(curl -f --silent -L --insecure --head "$SOURCE") \ - || die "Failed to get size of '$SOURCE' (using HTTP HEAD)" - TARBALL_SIZE=$(echo "$TARBALL_HEAD" | - tr -d '\r' | - grep Content-Length | - tail -n 1 | - awk '{print $2}' ) \ - || die "failed to get size (content-length) of '$SOURCE'" - test -z "$TARBALL_SIZE" \ - && die "failed to get size (content-length) of '$SOURCE'" - test "$TARBALL_SIZE" -le "$TARBALL_SIZE_LIMIT" \ - || die "tarball '$SOURCE' size too big ($TARBALL_SIZE)," \ - "current limit is $TARBALL_SIZE_LIMIT bytes." - - ## a remote tarball source - TMP1=$(basename "$SOURCE") \ - || die "failed to get basename of '$SOURCE'" - wget -q --no-check-certificate -O "$TMP1" "$SOURCE" \ - || die "failed to download '$SOURCE'" - - ## GNU Tar should automatically detect and uncompress the tarball. - tar -xf "$TMP1" \ - || die "failed to extract files from '$TMP1' (from '$SOURCE')" - +else ## - ## Some tarballs contain directories that are named differently than - ## the tarball. Annoying, but common enough. - ## So search for one sub-directory. + ## a wget-fetchable package ## - COUNT=$(find . -maxdepth 1 -type d | sed 1d | wc -l) - test "$COUNT" -eq 1 \ - || die "tarball '$SOURCE' contains more than one sub-directory." - SOURCEDIR=$(find . -maxdepth 1 -type d | sed 1d) - cd "$SOURCEDIR" \ - || die "failed to CD into '$SOURCEDIR' (extracted from '$SOURCE')" -else - die "Unknown source type (SOURCE) - expecting GIT or TARBALL on HTTP/FTP" + PACKAGE_FILE=$(fetch_package "$SOURCE") \ + || die "failed to fetch '$SOURCE'" + PACKAGE_DIRECTORY=$(process_package "$PACKAGE_FILE") \ + || die "failed to process '$PACKAGE_FILE'" fi ## ## Analize the project ## "$EVAL_SCRIPT" --project "$PROJECT_NAME" \ - "$DIRECTORY/$SOURCEDIR" > "$DIRECTORY/eval.md" \ + "$PACKAGE_DIRECTORY" > "$DIRECTORY/eval.md" \ || die "evaluation script failed (on '$SOURCE')" pandoc --from markdown \ -- 2.1.4
