Package: release.debian.org Severity: normal Tags: jessie User: release.debian....@packages.debian.org Usertags: pu
Please find attached a proposed patch for #798727 in perl, which I would like to include in an upload to stable. The same issues has already been fixed for libencode-perl in s-p-u (see #807828). Thanks, Dominic.
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index 6b35cb7..87bd99c 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -176,7 +176,15 @@ simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE). When BE or LE is omitted during decode(), it checks if BOM is at the beginning of the string; if one is found, the endianness is set to -what the BOM says. If no BOM is found, the routine dies. +what the BOM says. + +=item Default Byte Order + +When no BOM is found, Encode 2.76 and blow croaked. Since Encode +2.77, it falls back to BE accordingly to RFC2781 and the Unicode +Standard version 8.0. This behaviour has also been backported to +Encode 2.60 and later as shipped in the Debian perl package since +version 5.20.2-3+deb8u2 (see L<https://bugs.debian.org/798727>). =item * diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index cf42ab8..831708c 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -164,9 +164,19 @@ CODE: endian = 'V'; } else { - croak("%"SVf":Unrecognised BOM %"UVxf, - *hv_fetch((HV *)SvRV(obj),"Name",4,0), - bom); + /* No BOM found, use big-endian fallback as specified in + * RFC2781 and the Unicode Standard version 8.0: + * + * The UTF-16 encoding scheme may or may not begin with + * a BOM. However, when there is no BOM, and in the + * absence of a higher-level protocol, the byte order + * of the UTF-16 encoding scheme is big-endian. + * + * If the first two octets of the text is not 0xFE + * followed by 0xFF, and is not 0xFF followed by 0xFE, + * then the text SHOULD be interpreted as big-endian. + */ + s -= size; } } #if 1 diff --git a/debian/control b/debian/control index aae4f45..ad2264c 100644 --- a/debian/control +++ b/debian/control @@ -316,7 +316,7 @@ Breaks: perl-doc (<< ${Upstream-Version}-1), libthreads-perl (<< 1.93), libthreads-shared-perl (<< 1.46), libtime-piece-perl (<< 1.27), - libencode-perl (<< 2.60), + libencode-perl (<< 2.63-1+deb8u1), mrtg (<< 2.16.3-3.1), libhtml-template-compiled-perl (<< 0.95-1), libperl-apireference-perl (<< 0.09-1), diff --git a/debian/patches/debian/encode-unicode-bom-doc.diff b/debian/patches/debian/encode-unicode-bom-doc.diff new file mode 100644 index 0000000..ccd0108 --- /dev/null +++ b/debian/patches/debian/encode-unicode-bom-doc.diff @@ -0,0 +1,26 @@ +From c6c7ea3c32d8de7bec4e4e155db9b54b9315ebf0 Mon Sep 17 00:00:00 2001 +From: Dominic Hargreaves <d...@earth.li> +Date: Wed, 30 Dec 2015 20:45:28 +0000 +Subject: Document Debian backport of Encode::Unicode fix + +Bug-Debian: https://bugs.debian.org/798727 +Patch-Name: debian/encode-unicode-bom-doc.diff +--- + cpan/Encode/Unicode/Unicode.pm | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm +index 9b197d9..87bd99c 100644 +--- a/cpan/Encode/Unicode/Unicode.pm ++++ b/cpan/Encode/Unicode/Unicode.pm +@@ -182,7 +182,9 @@ what the BOM says. + + When no BOM is found, Encode 2.76 and blow croaked. Since Encode + 2.77, it falls back to BE accordingly to RFC2781 and the Unicode +-Standard version 8.0 ++Standard version 8.0. This behaviour has also been backported to ++Encode 2.60 and later as shipped in the Debian perl package since ++version 5.20.2-3+deb8u2 (see L<https://bugs.debian.org/798727>). + + =item * + diff --git a/debian/patches/fixes/encode-unicode-bom.diff b/debian/patches/fixes/encode-unicode-bom.diff new file mode 100644 index 0000000..7b8a512 --- /dev/null +++ b/debian/patches/fixes/encode-unicode-bom.diff @@ -0,0 +1,62 @@ +From 742bc306bb927745b3d832cdbae7075cd23536db Mon Sep 17 00:00:00 2001 +From: Dan Kogai <dankogai+git...@gmail.com> +Date: Tue, 15 Sep 2015 22:49:12 +0900 +Subject: Address https://rt.cpan.org/Public/Bug/Display.html?id=107043 + +Backported to Encode 2.72 by Niko Tyni <nt...@debian.org> + +Origin: backport, https://github.com/dankogai/p5-encode/commit/27682d02f7ac0669043faeb419dd5a104eecfb73 +Bug: https://rt.cpan.org/Public/Bug/Display.html?id=107043 +Bug-Debian: https://bugs.debian.org/798727 +Patch-Name: fixes/encode-unicode-bom.diff +--- + cpan/Encode/Unicode/Unicode.pm | 8 +++++++- + cpan/Encode/Unicode/Unicode.xs | 16 +++++++++++++--- + 2 files changed, 20 insertions(+), 4 deletions(-) + +diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm +index 6b35cb7..9b197d9 100644 +--- a/cpan/Encode/Unicode/Unicode.pm ++++ b/cpan/Encode/Unicode/Unicode.pm +@@ -176,7 +176,13 @@ simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE). + + When BE or LE is omitted during decode(), it checks if BOM is at the + beginning of the string; if one is found, the endianness is set to +-what the BOM says. If no BOM is found, the routine dies. ++what the BOM says. ++ ++=item Default Byte Order ++ ++When no BOM is found, Encode 2.76 and blow croaked. Since Encode ++2.77, it falls back to BE accordingly to RFC2781 and the Unicode ++Standard version 8.0 + + =item * + +diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs +index cf42ab8..831708c 100644 +--- a/cpan/Encode/Unicode/Unicode.xs ++++ b/cpan/Encode/Unicode/Unicode.xs +@@ -164,9 +164,19 @@ CODE: + endian = 'V'; + } + else { +- croak("%"SVf":Unrecognised BOM %"UVxf, +- *hv_fetch((HV *)SvRV(obj),"Name",4,0), +- bom); ++ /* No BOM found, use big-endian fallback as specified in ++ * RFC2781 and the Unicode Standard version 8.0: ++ * ++ * The UTF-16 encoding scheme may or may not begin with ++ * a BOM. However, when there is no BOM, and in the ++ * absence of a higher-level protocol, the byte order ++ * of the UTF-16 encoding scheme is big-endian. ++ * ++ * If the first two octets of the text is not 0xFE ++ * followed by 0xFF, and is not 0xFF followed by 0xFE, ++ * then the text SHOULD be interpreted as big-endian. ++ */ ++ s -= size; + } + } + #if 1 diff --git a/debian/patches/series b/debian/patches/series index 9e6e84a..4bd37be 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -41,3 +41,5 @@ fixes/failed_require_diagnostics.diff fixes/array-cloning.diff fixes/perldb-threads.diff +fixes/encode-unicode-bom.diff +debian/encode-unicode-bom-doc.diff