ARM status

Nicholas Clark Mon, 05 May 2014 14:03:25 -0700

I managed to complete a build in the past 16 hours on a Raspberry Pi
using -fsigned-char (and a lot of swapping)


With the fixes in the previous 4 patches the NQP build still crashes, on
about the third thing run, with a SIGBUS

Fortunately, that turns out to be rather easy to diagnose, particularly as
dmesg confirms it - unaligned read.

It seems that the ARMv6 with hardfloat insists that doubles are 8 byte
aligned, and kills anything that ignores this. This is perfectly legitimate
behaviour, but we've not had to deal with this yet.

The attached hack gets the build to complete.
(NQP in about 75 minutes, and then Rakudo in maybe 12 hours - I didn't have
great timings)

Anyway, this is "for information". It needs cleaning up, to only use memmove()
on architectures that need it. I think that Configure.pl should be figuring
that out by probing, rather than having a hard coded list which gets wrong.

Although the counter argument is that anything catching these things with a
CPU alignment trap (as you *can* do on ARM, if you want to, and I think
Alphas did) will pass a probe test, without anything realising that it's
actually horribly inefficient. Whereas a hard-coded list would say "use
memmove", and be optimal.

Additionally, I think that we might need MVMint64 done with memmove() on
(at least) ARM. I've been building using clang with no optimisation yet,
and I think that ARM at some point introduced 64 byte load instructions.
But I'm not sure which architecture version (Pis are ARMv6, which is almost
prehistoric)

Also, I wouldn't be surprised if at least one of Power, Sparc or PA-Risc
will need it (not tested there yet).
Possibly also for 32 bit values, as the MoarVM stream is 2-byte aligned.

If anyone is curious, I'm building on an early Raspberry Pi model B, so
only 256Mb of RAM, with the GPU minimised to 16Mb, overclocked to 950Mhz
It's running with current Raspbian (jessie, so Debian testing), and the
real work is going on on an external USB drive, which also has 1Gb of swap
on it. ("I hear swapping, so much swapping"): Clang is:
Raspbian clang version 3.4-2 (tags/RELEASE_34/final) (based on LLVM 3.4)
Target: arm-unknown-linux-gnueabihf
Thread model: posix

It was kind of chance that I tried clang first, instead of gcc 4.8.2
I think we max out at something like 480Mb of swap used.
I'm not too worried about that currently - most folks will be using stuff
from packages. eg, here's what apt gave me:

This is perl6 version 2014.03.01 built on parrot 6.0.0 revision 0

Nicholas Clark

>From a1ff1a2d5dd49ef25ca0e962fd026e694cbb26fc Mon Sep 17 00:00:00 2001
From: Nicholas Clark <n...@ccl4.org>
Date: Mon, 5 May 2014 19:51:38 +0200
Subject: [PATCH 5/5] XXX Hacky removal of GET_N64

---
 src/core/bytecodedump.c | 3 +--
 src/core/interp.c       | 5 ++---
 src/core/interp.h       | 6 ++++++
 src/core/validation.c   | 1 -
 src/spesh/graph.c       | 3 +--
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/core/bytecodedump.c b/src/core/bytecodedump.c
index 80c93eb..1ff46f5 100644
--- a/src/core/bytecodedump.c
+++ b/src/core/bytecodedump.c
@@ -48,7 +48,6 @@ static const char * get_typename(MVMuint16 type) {
 #define GET_I64(pc, idx)    *((MVMint64 *)(pc + idx))
 #define GET_UI64(pc, idx)   *((MVMuint64 *)(pc + idx))
 #define GET_N32(pc, idx)    *((MVMnum32 *)(pc + idx))
-#define GET_N64(pc, idx)    *((MVMnum64 *)(pc + idx))
 
 enum {
     MVM_val_branch_target = 1,
@@ -238,7 +237,7 @@ char * MVM_bytecode_dump(MVMThreadContext *tc, MVMCompUnit *cu) {
                         break;
                     case MVM_operand_num64:
                         operand_size = 8;
-                        a("%f", GET_N64(cur_op, 0));
+                        a("%f", MVM_BC_get_N64(cur_op, 0));
                         break;
                     case MVM_operand_callsite:
                         operand_size = 2;
diff --git a/src/core/interp.c b/src/core/interp.c
index 341a97e..c365b19 100644
--- a/src/core/interp.c
+++ b/src/core/interp.c
@@ -12,7 +12,6 @@
 #define GET_I64(pc, idx)    *((MVMint64 *)(pc + idx))
 #define GET_UI64(pc, idx)   *((MVMuint64 *)(pc + idx))
 #define GET_N32(pc, idx)    *((MVMnum32 *)(pc + idx))
-#define GET_N64(pc, idx)    *((MVMnum64 *)(pc + idx))
 
 #define NEXT_OP (op = *(MVMuint16 *)(cur_op), cur_op += 2, op)
 
@@ -307,7 +306,7 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex
             OP(const_n32):
                 MVM_exception_throw_adhoc(tc, "const_n32 NYI");
             OP(const_n64):
-                GET_REG(cur_op, 0).n64 = GET_N64(cur_op, 2);
+                GET_REG(cur_op, 0).n64 = MVM_BC_get_N64(cur_op, 2);
                 cur_op += 10;
                 goto NEXT;
             OP(const_s):
@@ -564,7 +563,7 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex
                 cur_op += 10;
                 goto NEXT;
             OP(argconst_n):
-                tc->cur_frame->args[GET_UI16(cur_op, 0)].n64 = GET_N64(cur_op, 2);
+                tc->cur_frame->args[GET_UI16(cur_op, 0)].n64 = MVM_BC_get_N64(cur_op, 2);
                 cur_op += 10;
                 goto NEXT;
             OP(argconst_s):
diff --git a/src/core/interp.h b/src/core/interp.h
index cdaf384..d667c57 100644
--- a/src/core/interp.h
+++ b/src/core/interp.h
@@ -77,3 +77,9 @@ struct MVMOpInfo {
 /* Functions. */
 void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContext *, void *), void *invoke_data);
 MVM_PUBLIC void MVM_interp_enable_tracing();
+
+MVM_STATIC_INLINE MVMnum64 MVM_BC_get_N64(const MVMuint8 *cur_op, unsigned int offset) {
+    MVMnum64 temp;
+    memmove(&temp, cur_op + offset, sizeof(MVMnum64));
+    return temp;
+}
diff --git a/src/core/validation.c b/src/core/validation.c
index 5455ea0..df6446c 100644
--- a/src/core/validation.c
+++ b/src/core/validation.c
@@ -16,7 +16,6 @@
 #define GET_I64(pc, idx)    *((MVMint64 *)(pc + idx))
 #define GET_UI64(pc, idx)   *((MVMuint64 *)(pc + idx))
 #define GET_N32(pc, idx)    *((MVMnum32 *)(pc + idx))
-#define GET_N64(pc, idx)    *((MVMnum64 *)(pc + idx))
 
 #define MSG(val, msg) "Bytecode validation error at offset %" PRIu32 \
     ", instruction %" PRIu32 ":\n" msg, \
diff --git a/src/spesh/graph.c b/src/spesh/graph.c
index 7bb80bf..d1689fd 100644
--- a/src/spesh/graph.c
+++ b/src/spesh/graph.c
@@ -15,7 +15,6 @@
 #define GET_I64(pc, idx)    *((MVMint64 *)(pc + idx))
 #define GET_UI64(pc, idx)   *((MVMuint64 *)(pc + idx))
 #define GET_N32(pc, idx)    *((MVMnum32 *)(pc + idx))
-#define GET_N64(pc, idx)    *((MVMnum64 *)(pc + idx))
 
 /* Allocate a piece of memory from the spesh graph's buffer. Deallocated when
  * the spesh graph is. */
@@ -172,7 +171,7 @@ static void build_cfg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMStaticFrame *sf
                     arg_size += 4;
                     break;
                 case MVM_operand_num64:
-                    ins_node->operands[i].lit_n64 = GET_N64(args, arg_size);
+                    ins_node->operands[i].lit_n64 = MVM_BC_get_N64(args, arg_size);
                     arg_size += 8;
                     break;
                 case MVM_operand_callsite:
-- 
1.8.4.2

ARM status

Reply via email to