Hi
Am 2025-11-03 15:51, schrieb Gina P. Banyard:
While the < > syntax to "force" the endianess of a sequence specifier
is nice.
But if this requires rewriting the whole parser as this RFC implies,
then you are asking someone to commit to a larger amount of work than
they signed up, which is considered bad RFC etiquette. [1]
I disagree with that claim in the RFC and to put my money where my mouth
is, I have spent the 15 minutes of writing the necessary patch for the
pack() function. It is attached to this email and also available as this
gist: https://gist.github.com/TimWolla/d8bca56a6507226e684827d2a7b44829.
Given the time spent, I've only given it light testing, but it passes
all existing `pack()` tests and returns the correct output for:
<?php
var_dump(bin2hex(pack('s<2s>2', 258, -2, 258, -2)));
var_dump(bin2hex(pack('a>', 258)));
Using `perl -e "print pack('s<2s>2', 258, -2, 258, -2)" |xxd` as a
comparison. I have not created the patch for `unpack()`, but I believe
this is already sufficient demonstration that “rewriting the whole
parser” is not necessary at all.
Best regards
Tim Düsterhus
From 11aa9d37b0c06b678a27db8e37d4dc817dead4bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= <[email protected]>
Date: Mon, 3 Nov 2025 16:57:39 +0100
Subject: [PATCH] pack: Support endian specifier
---
ext/standard/pack.c | 59 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 51 insertions(+), 8 deletions(-)
diff --git a/ext/standard/pack.c b/ext/standard/pack.c
index 55da64897a2..5be2c0260bc 100644
--- a/ext/standard/pack.c
+++ b/ext/standard/pack.c
@@ -30,6 +30,7 @@
outputpos += (a)*(b);
typedef enum {
+ PHP_DEFAULT_ENDIAN,
PHP_LITTLE_ENDIAN,
PHP_BIG_ENDIAN,
} php_pack_endianness;
@@ -193,6 +194,11 @@ static double php_pack_parse_double(int is_little_endian, void * src)
}
/* }}} */
+struct formatarg {
+ int count;
+ php_pack_endianness endianness;
+};
+
/* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
* Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
* Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
@@ -207,7 +213,7 @@ PHP_FUNCTION(pack)
char *format;
size_t formatlen;
char *formatcodes;
- int *formatargs;
+ struct formatarg *formatargs;
size_t formatcount = 0;
int outputpos = 0, outputsize = 0;
zend_string *output;
@@ -226,9 +232,11 @@ PHP_FUNCTION(pack)
for (i = 0; i < formatlen; formatcount++) {
char code = format[i++];
int arg = 1;
+ php_pack_endianness endianness = PHP_DEFAULT_ENDIAN;
/* Handle format arguments if any */
if (i < formatlen) {
+ again:;
char c = format[i];
if (c == '*') {
@@ -242,6 +250,11 @@ PHP_FUNCTION(pack)
i++;
}
}
+ else if (c == '<' || c == '>') {
+ endianness = c == '<' ? PHP_LITTLE_ENDIAN : PHP_BIG_ENDIAN;
+ i++;
+ goto again;
+ }
}
/* Handle special arg '*' for all codes and check argv overflows */
@@ -341,8 +354,20 @@ PHP_FUNCTION(pack)
RETURN_THROWS();
}
+ if (endianness != PHP_DEFAULT_ENDIAN) {
+ if (strchr("sSiIlLqQf", code) == NULL) {
+ efree(formatcodes);
+ efree(formatargs);
+ zend_value_error("Type %c: Endianness may not be specified", code);
+ RETURN_THROWS();
+ }
+ }
+
formatcodes[formatcount] = code;
- formatargs[formatcount] = arg;
+ formatargs[formatcount] = (struct formatarg){
+ .count = arg,
+ .endianness = endianness,
+ };
}
if (currentarg < num_args) {
@@ -352,7 +377,7 @@ PHP_FUNCTION(pack)
/* Calculate output length and upper bound while processing*/
for (i = 0; i < formatcount; i++) {
char code = formatcodes[i];
- int arg = formatargs[i];
+ int arg = formatargs[i].count;
switch (code) {
case 'h':
@@ -435,7 +460,7 @@ PHP_FUNCTION(pack)
/* Do actual packing */
for (i = 0; i < formatcount; i++) {
char code = formatcodes[i];
- int arg = formatargs[i];
+ int arg = formatargs[i].count;
switch (code) {
case 'a':
@@ -517,6 +542,10 @@ PHP_FUNCTION(pack)
endianness = PHP_LITTLE_ENDIAN;
}
+ if (formatargs[i].endianness != PHP_DEFAULT_ENDIAN) {
+ endianness = formatargs[i].endianness;
+ }
+
while (arg-- > 0) {
php_pack(&argv[currentarg++], 2, endianness, &ZSTR_VAL(output)[outputpos]);
outputpos += 2;
@@ -525,12 +554,18 @@ PHP_FUNCTION(pack)
}
case 'i':
- case 'I':
+ case 'I': {
+ php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
+ if (formatargs[i].endianness != PHP_DEFAULT_ENDIAN) {
+ endianness = formatargs[i].endianness;
+ }
+
while (arg-- > 0) {
- php_pack(&argv[currentarg++], sizeof(int), PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
+ php_pack(&argv[currentarg++], sizeof(int), endianness, &ZSTR_VAL(output)[outputpos]);
outputpos += sizeof(int);
}
break;
+ }
case 'l':
case 'L':
@@ -543,6 +578,9 @@ PHP_FUNCTION(pack)
} else if (code == 'V') {
endianness = PHP_LITTLE_ENDIAN;
}
+ if (formatargs[i].endianness != PHP_DEFAULT_ENDIAN) {
+ endianness = formatargs[i].endianness;
+ }
while (arg-- > 0) {
php_pack(&argv[currentarg++], 4, endianness, &ZSTR_VAL(output)[outputpos]);
@@ -573,6 +611,11 @@ PHP_FUNCTION(pack)
#endif
case 'f': {
+ if (formatargs[i].endianness == PHP_LITTLE_ENDIAN) {
+ goto g;
+ } else if (formatargs[i].endianness == PHP_BIG_ENDIAN) {
+ goto G;
+ }
while (arg-- > 0) {
float v = (float) zval_get_double(&argv[currentarg++]);
memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
@@ -582,7 +625,7 @@ PHP_FUNCTION(pack)
}
case 'g': {
- /* pack little endian float */
+ g: /* pack little endian float */
while (arg-- > 0) {
float v = (float) zval_get_double(&argv[currentarg++]);
php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
@@ -592,7 +635,7 @@ PHP_FUNCTION(pack)
break;
}
case 'G': {
- /* pack big endian float */
+ G: /* pack big endian float */
while (arg-- > 0) {
float v = (float) zval_get_double(&argv[currentarg++]);
php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
--
2.43.0