Swapping the order of definition of the arrays as in this updated patch fixes
the stack smashing with the stack protector enabled — which is ideal so if you
still didn't upload the previous one, prefer this one —, maybe a bug in GCC[1].
Hopefully that will help aarch64 otherwise let's just try Clang.
Run in [2]. I'll run it against aarch64 too when Launchpad finally decides to
give it a builder.
[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115965
[2]
https://autopkgtest.ubuntu.com/results/autopkgtest-oracular-nteodosio-rebuilds/oracular/amd64/h/highway/20240717_113010_6b611@/log.gz
--- highway-1.2.0/debian/tests/compile-and-execute 1970-01-01 01:00:00.000000000 +0100
+++ highway-1.2.0/debian/tests/compile-and-execute 2024-07-16 13:00:51.000000000 +0200
@@ -0,0 +1,39 @@
+#!/bin/sh
+set -ex
+trap 'rm o' 0
+cflags=$(pkg-config --cflags --libs libhwy)
+# Why do we get stack smashing on GCC but not on Clang with the stack protector?
+c++ -Wall -Wextra -Wpedantic $cflags -x c++ - -o o <<EOF
+#include <hwy/highway.h>
+#include <stdio.h>
+namespace hn = hwy::HWY_NAMESPACE;
+using T = int;
+void MulAddLoop(const T* HWY_RESTRICT mul_array,
+ const T* HWY_RESTRICT add_array,
+ const size_t size, T* HWY_RESTRICT x_array) {
+ const hn::ScalableTag<T> d;
+ for (size_t i = 0; i < size; i += hn::Lanes(d)) {
+ const auto mul = hn::Load(d, mul_array + i);
+ const auto add = hn::Load(d, add_array + i);
+ auto x = hn::Load(d, x_array + i);
+ x = hn::MulAdd(mul, x, add);
+ hn::Store(x, d, x_array + i);
+ }
+}
+int main(){
+ T c[]={-1,-1,-1,-1,-1};
+ const T a[]={1,2,3,4,5},b[]={6,7,9,0,0},expect[]={5,5,6,-4,-5};
+ size_t size=sizeof(c)/sizeof(c[0]);
+ MulAddLoop(a,b,size,c);
+ for(size_t i=0;i<size;i++){
+ if (c[i]!=expect[i]){
+ for(size_t j=0;j<size;j++){
+ printf("%zuth element is %d, expected %d.\n",j,c[j],expect[j]);
+ }
+ return 52;
+ }
+ }
+ return 0;
+}
+EOF
+./o
--- highway-1.2.0/debian/tests/control 1970-01-01 01:00:00.000000000 +0100
+++ highway-1.2.0/debian/tests/control 2024-07-16 13:00:51.000000000 +0200
@@ -0,0 +1,3 @@
+Tests: compile-and-execute
+Depends: libhwy-dev, g++, pkgconf
+Restrictions: allow-stderr