This patch adds 5 new VM instructions (br-if-f64-<, br-if-f64-<=, br-if-f64-=, br-if-f64->, br-if-f64->=) and a compiler optimization to perform unboxed floating point number comparisons where possible.
Take this contrived example code: (lambda () (let ((foo (f64vector 1 2 3))) (< (f64vector-ref foo 0) (f64vector-ref foo 1)))) Here is the disassembly without the optimization: 0 (assert-nargs-ee/locals 1 6) ;; 7 slots (0 args) at (unknown file):131:3 1 (make-short-immediate 6 1028) ;; #t 2 (toplevel-box 5 104 88 102 #t) ;; `f64vector' 7 (box-ref 3 5) 8 (make-short-immediate 2 6) ;; 1 9 (make-short-immediate 1 10) ;; 2 10 (make-short-immediate 0 14) ;; 3 11 (handle-interrupts) at (unknown file):132:37 12 (call 3 4) 14 (receive 1 3 7) 16 (load-u64 4 0 0) at (unknown file):133:31 19 (bv-f64-ref 4 5 4) 20 (f64->scm 4 4) 21 (load-u64 3 0 8) at (unknown file):134:31 24 (bv-f64-ref 5 5 3) 25 (f64->scm 5 5) 26 (br-if-< 4 5 #f 4) ;; -> L1 at (unknown file):133:28 29 (make-short-immediate 6 4) ;; #f L1: 30 (handle-interrupts) 31 (mov 5 6) 32 (return-values 2) ;; 1 value And here is the disassembly with the optimization: 0 (assert-nargs-ee/locals 1 6) ;; 7 slots (0 args) at (unknown file):1:3 1 (make-short-immediate 6 1028) ;; #t 2 (toplevel-box 5 102 86 100 #t) ;; `f64vector' 7 (box-ref 3 5) 8 (make-short-immediate 2 6) ;; 1 9 (make-short-immediate 1 10) ;; 2 10 (make-short-immediate 0 14) ;; 3 11 (handle-interrupts) at (unknown file):2:37 12 (call 3 4) 14 (receive 1 3 7) 16 (load-u64 4 0 0) at (unknown file):3:31 19 (bv-f64-ref 4 5 4) 20 (load-u64 3 0 8) at (unknown file):4:31 23 (bv-f64-ref 5 5 3) 24 (br-if-f64-< 4 5 #f 4) ;; -> #f at (unknown file):3:28 27 (make-short-immediate 6 4) ;; #f 28 (handle-interrupts) 29 (mov 5 6) 30 (return-values 2) ;; 1 value Much better! The f64->scm instructions have been eliminated. This greatly improves performance for things like realtime simulations that do lots of floating point vector and matrix arithmetic. Many thanks to Andy for already implementing this optimization for u64s which I shamelessly copied from and for the additional guidance on IRC.
>From 5f97216c1d19e9302903235da6e89b164d10ba30 Mon Sep 17 00:00:00 2001 From: David Thompson <dthomps...@worcester.edu> Date: Mon, 12 Dec 2016 22:46:08 -0500 Subject: [PATCH] Add unboxed floating point comparison instructions. * libguile/vm-engine.c (BR_F64_ARITHMETIC): New preprocessor macro. (br_if_f64_ee, br_if_f64_lt, br_if_f64_le, br_if_f64_gt, br_if_f64_ge): New VM instructions. * module/language/cps/compile-bytecode.scm (compile-function): Emit f64 comparison instructions. * module/language/cps/effects-analysis.scm: Define effects for f64 primcalls. * module/language/cps/primitives.scm (*branching-primcall-arities*): Add arities for f64 primcalls. * module/language/cps/specialize-numbers.scm (specialize-f64-comparison): New procedure. (specialize-operations): Specialize f64 comparisons. * module/language/cps/type-fold.scm: Define branch folder aliases for f64 primcalls. * module/language/cps/types.scm: Define type checkers and comparison inferrers for f64 primcalls. (&max/f64, define-f64-comparison-inferrer): New syntax. (infer-f64-comparison-ranges): New procedure. * module/system/vm/assembler.scm (emit-br-if-f64-=, emit-br-if-f64-<) (emit-br-if-f64-<=, emit-br-if-f64->, emit-br-if-f64->=): Export. * module/system/vm/disassembler.scm (code-annotation): Add annotations for f64 comparison instructions. --- libguile/vm-engine.c | 68 +++++++++++++++++++++++++++--- module/language/cps/compile-bytecode.scm | 7 ++- module/language/cps/effects-analysis.scm | 5 +++ module/language/cps/primitives.scm | 7 ++- module/language/cps/specialize-numbers.scm | 49 +++++++++++++++------ module/language/cps/type-fold.scm | 5 +++ module/language/cps/types.scm | 30 +++++++++++++ module/system/vm/assembler.scm | 5 +++ module/system/vm/disassembler.scm | 2 + 9 files changed, 157 insertions(+), 21 deletions(-) diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c index 4406845..6a7ba51 100644 --- a/libguile/vm-engine.c +++ b/libguile/vm-engine.c @@ -358,6 +358,24 @@ NEXT (3); \ } +#define BR_F64_ARITHMETIC(crel) \ + { \ + scm_t_uint32 a, b; \ + scm_t_uint64 x, y; \ + UNPACK_24 (op, a); \ + UNPACK_24 (ip[1], b); \ + x = SP_REF_F64 (a); \ + y = SP_REF_F64 (b); \ + if ((ip[2] & 0x1) ? !(x crel y) : (x crel y)) \ + { \ + scm_t_int32 offset = ip[2]; \ + offset >>= 8; /* Sign-extending shift. */ \ + NEXT (offset); \ + } \ + NEXT (3); \ + } + + #define ARGS1(a1) \ scm_t_uint16 dst, src; \ SCM a1; \ @@ -3950,11 +3968,51 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp, NEXT (1); } - VM_DEFINE_OP (187, unused_187, NULL, NOP) - VM_DEFINE_OP (188, unused_188, NULL, NOP) - VM_DEFINE_OP (189, unused_189, NULL, NOP) - VM_DEFINE_OP (190, unused_190, NULL, NOP) - VM_DEFINE_OP (191, unused_191, NULL, NOP) + /* br-if-f64= a:12 b:12 invert:1 _:7 offset:24 + * + * If the F64 value in A is = to the value in B, add OFFSET, a signed + * 24-bit number, to the current instruction pointer. + */ + VM_DEFINE_OP (187, br_if_f64_ee, "br-if-f64-=", OP3 (X8_S24, X8_S24, B1_X7_L24)) + { + BR_F64_ARITHMETIC (==); + } + + /* br-if-f64< a:12 b:12 invert:1 _:7 offset:24 + * + * If the F64 value in A is < to the value in B, add OFFSET, a signed + * 24-bit number, to the current instruction pointer. + */ + VM_DEFINE_OP (188, br_if_f64_lt, "br-if-f64-<", OP3 (X8_S24, X8_S24, B1_X7_L24)) + { + BR_F64_ARITHMETIC (<); + } + + VM_DEFINE_OP (189, br_if_f64_le, "br-if-f64-<=", OP3 (X8_S24, X8_S24, B1_X7_L24)) + { + BR_F64_ARITHMETIC (<=); + } + + /* br-if-f64-> a:24 _:8 b:24 invert:1 _:7 offset:24 + * + * If the F64 value in A is > than the SCM value in B, add OFFSET, a + * signed 24-bit number, to the current instruction pointer. + */ + VM_DEFINE_OP (190, br_if_f64_gt, "br-if-f64->", OP3 (X8_S24, X8_S24, B1_X7_L24)) + { + BR_F64_ARITHMETIC (>); + } + + /* br-if-uf4->= a:24 _:8 b:24 invert:1 _:7 offset:24 + * + * If the F64 value in A is >= than the SCM value in B, add OFFSET, a + * signed 24-bit number, to the current instruction pointer. + */ + VM_DEFINE_OP (191, br_if_f64_ge, "br-if-f64->=", OP3 (X8_S24, X8_S24, B1_X7_L24)) + { + BR_F64_ARITHMETIC (>=); + } + VM_DEFINE_OP (192, unused_192, NULL, NOP) VM_DEFINE_OP (193, unused_193, NULL, NOP) VM_DEFINE_OP (194, unused_194, NULL, NOP) diff --git a/module/language/cps/compile-bytecode.scm b/module/language/cps/compile-bytecode.scm index db5b8fa..a3f8ba4 100644 --- a/module/language/cps/compile-bytecode.scm +++ b/module/language/cps/compile-bytecode.scm @@ -446,7 +446,12 @@ (($ $primcall 'u64-=-scm (a b)) (binary emit-br-if-u64-=-scm a b)) (($ $primcall 'u64->=-scm (a b)) (binary emit-br-if-u64->=-scm a b)) (($ $primcall 'u64->-scm (a b)) (binary emit-br-if-u64->-scm a b)) - (($ $primcall 'logtest (a b)) (binary emit-br-if-logtest a b)))) + (($ $primcall 'logtest (a b)) (binary emit-br-if-logtest a b)) + (($ $primcall 'f64-< (a b)) (binary emit-br-if-f64-< a b)) + (($ $primcall 'f64-<= (a b)) (binary emit-br-if-f64-<= a b)) + (($ $primcall 'f64-= (a b)) (binary emit-br-if-f64-= a b)) + (($ $primcall 'f64->= (a b)) (binary emit-br-if-f64->= a b)) + (($ $primcall 'f64-> (a b)) (binary emit-br-if-f64-> a b)))) (define (compile-trunc label k exp nreq rest-var) (define (do-call proc args emit-call) diff --git a/module/language/cps/effects-analysis.scm b/module/language/cps/effects-analysis.scm index 9ce6585..f1833bb 100644 --- a/module/language/cps/effects-analysis.scm +++ b/module/language/cps/effects-analysis.scm @@ -439,6 +439,11 @@ is or might be a read or a write to the same location as A." ((u64-=-scm . _) &type-check) ((u64->=-scm . _) &type-check) ((u64->-scm . _) &type-check) + ((f64-= . _)) + ((f64-< . _)) + ((f64-> . _)) + ((f64-<= . _)) + ((f64->= . _)) ((zero? . _) &type-check) ((add . _) &type-check) ((add/immediate . _) &type-check) diff --git a/module/language/cps/primitives.scm b/module/language/cps/primitives.scm index bc03c98..a3e6e38 100644 --- a/module/language/cps/primitives.scm +++ b/module/language/cps/primitives.scm @@ -99,7 +99,12 @@ (u64-=-scm . (1 . 2)) (u64->=-scm . (1 . 2)) (u64->-scm . (1 . 2)) - (logtest . (1 . 2)))) + (logtest . (1 . 2)) + (f64-= . (1 . 2)) + (f64-< . (1 . 2)) + (f64-> . (1 . 2)) + (f64-<= . (1 . 2)) + (f64->= . (1 . 2)))) (define (compute-prim-instructions) (let ((table (make-hash-table))) diff --git a/module/language/cps/specialize-numbers.scm b/module/language/cps/specialize-numbers.scm index d9fe76c..6c8627a 100644 --- a/module/language/cps/specialize-numbers.scm +++ b/module/language/cps/specialize-numbers.scm @@ -144,6 +144,20 @@ ($continue kop src ($primcall 'scm->u64 (a-u64))))))) +(define (specialize-f64-comparison cps kf kt src op a b) + (let ((op (symbol-append 'f64- op))) + (with-cps cps + (letv f64-a f64-b) + (letk kop ($kargs ('f64-b) (f64-b) + ($continue kf src + ($branch kt ($primcall op (f64-a f64-b)))))) + (letk kunbox-b ($kargs ('f64-a) (f64-a) + ($continue kop src + ($primcall 'scm->f64 (b))))) + (build-term + ($continue kunbox-b src + ($primcall 'scm->f64 (a))))))) + (define (sigbits-union x y) (and x y (logior x y))) @@ -283,6 +297,8 @@ BITS indicating the significant bits needed for a variable. BITS may be (lambda (type min max) (and (eqv? type &exact-integer) (<= 0 min max #xffffffffffffffff)))))) + (define (f64-operand? var) + (operand-in-range? var &flonum -inf.0 +inf.0)) (match cont (($ $kfun) (let ((types (infer-types cps label))) @@ -387,20 +403,25 @@ BITS indicating the significant bits needed for a variable. BITS may be ($ $continue k src ($ $branch kt ($ $primcall (and op (or '< '<= '= '>= '>)) (a b))))) (values - (if (u64-operand? a) - (let ((specialize (if (u64-operand? b) - specialize-u64-comparison - specialize-u64-scm-comparison))) - (with-cps cps - (let$ body (specialize k kt src op a b)) - (setk label ($kargs names vars ,body)))) - (if (u64-operand? b) - (let ((op (match op - ('< '>) ('<= '>=) ('= '=) ('>= '<=) ('> '<)))) - (with-cps cps - (let$ body (specialize-u64-scm-comparison k kt src op b a)) - (setk label ($kargs names vars ,body)))) - cps)) + (cond + ((or (f64-operand? a) (f64-operand? b)) + (with-cps cps + (let$ body (specialize-f64-comparison k kt src op a b)) + (setk label ($kargs names vars ,body)))) + ((u64-operand? a) + (let ((specialize (if (u64-operand? b) + specialize-u64-comparison + specialize-u64-scm-comparison))) + (with-cps cps + (let$ body (specialize k kt src op a b)) + (setk label ($kargs names vars ,body))))) + ((u64-operand? b) + (let ((op (match op + ('< '>) ('<= '>=) ('= '=) ('>= '<=) ('> '<)))) + (with-cps cps + (let$ body (specialize-u64-scm-comparison k kt src op b a)) + (setk label ($kargs names vars ,body))))) + (else cps)) types sigbits)) (_ (values cps types sigbits)))) diff --git a/module/language/cps/type-fold.scm b/module/language/cps/type-fold.scm index 9459e31..a688292 100644 --- a/module/language/cps/type-fold.scm +++ b/module/language/cps/type-fold.scm @@ -110,6 +110,7 @@ (else (values #f #f)))) (define-branch-folder-alias u64-< <) (define-branch-folder-alias u64-<-scm <) +(define-branch-folder-alias f64-< <) (define-binary-branch-folder (<= type0 min0 max0 type1 min1 max1) (case (compare-ranges type0 min0 max0 type1 min1 max1) @@ -118,6 +119,7 @@ (else (values #f #f)))) (define-branch-folder-alias u64-<= <=) (define-branch-folder-alias u64-<=-scm <=) +(define-branch-folder-alias f64-<= <=) (define-binary-branch-folder (= type0 min0 max0 type1 min1 max1) (case (compare-ranges type0 min0 max0 type1 min1 max1) @@ -126,6 +128,7 @@ (else (values #f #f)))) (define-branch-folder-alias u64-= =) (define-branch-folder-alias u64-=-scm =) +(define-branch-folder-alias f64-= =) (define-binary-branch-folder (>= type0 min0 max0 type1 min1 max1) (case (compare-ranges type0 min0 max0 type1 min1 max1) @@ -134,6 +137,7 @@ (else (values #f #f)))) (define-branch-folder-alias u64->= >=) (define-branch-folder-alias u64->=-scm >=) +(define-branch-folder-alias f64->= >=) (define-binary-branch-folder (> type0 min0 max0 type1 min1 max1) (case (compare-ranges type0 min0 max0 type1 min1 max1) @@ -142,6 +146,7 @@ (else (values #f #f)))) (define-branch-folder-alias u64-> >) (define-branch-folder-alias u64->-scm >) +(define-branch-folder-alias f64-> >) (define-binary-branch-folder (logtest type0 min0 max0 type1 min1 max1) (define (logand-min a b) diff --git a/module/language/cps/types.scm b/module/language/cps/types.scm index c7e4211..b3d4b4a 100644 --- a/module/language/cps/types.scm +++ b/module/language/cps/types.scm @@ -378,6 +378,7 @@ minimum, and maximum." (define-syntax-rule (&max/u64 x) (min (&max x) &u64-max)) (define-syntax-rule (&min/s64 x) (max (&min x) &s64-min)) (define-syntax-rule (&max/s64 x) (min (&max x) &s64-max)) +(define-syntax-rule (&max/f64 x) (min (&max x) +inf.0)) (define-syntax-rule (&max/size x) (min (&max x) *max-size-t*)) (define-syntax-rule (define-type-checker (name arg ...) body ...) @@ -945,6 +946,35 @@ minimum, and maximum." (define-simple-type-checker (u64-> &u64 &u64)) (define-u64-comparison-inferrer (u64-> > <=)) +(define (infer-f64-comparison-ranges op min0 max0 min1 max1) + (match op + ('< (values min0 (min max0 (1- max1)) (max (1+ min0) min1) max1)) + ('<= (values min0 (min max0 max1) (max min0 min1) max1)) + ('>= (values (max min0 min1) max0 min1 (min max0 max1))) + ('> (values (max min0 (1+ min1)) max0 min1 (min (1- max0) max1))))) +(define-syntax-rule (define-f64-comparison-inferrer (f64-op op inverse)) + (define-predicate-inferrer (f64-op a b true?) + (call-with-values + (lambda () + (infer-f64-comparison-ranges (if true? 'op 'inverse) + (&min/0 a) (&max/f64 a) + (&min/0 b) (&max/f64 b))) + (lambda (min0 max0 min1 max1) + (restrict! a &f64 min0 max0) + (restrict! b &f64 min1 max1))))) + +(define-simple-type-checker (f64-< &f64 &f64)) +(define-f64-comparison-inferrer (f64-< < >=)) + +(define-simple-type-checker (f64-<= &f64 &f64)) +(define-f64-comparison-inferrer (f64-<= <= >)) + +(define-simple-type-checker (f64->= &f64 &f64)) +(define-f64-comparison-inferrer (f64-<= >= <)) + +(define-simple-type-checker (f64-> &f64 &f64)) +(define-f64-comparison-inferrer (f64-> > <=)) + ;; Arithmetic. (define-syntax-rule (define-unary-result! a result min max) (let ((min* min) diff --git a/module/system/vm/assembler.scm b/module/system/vm/assembler.scm index 2c6bf81..226a223 100644 --- a/module/system/vm/assembler.scm +++ b/module/system/vm/assembler.scm @@ -106,6 +106,11 @@ emit-br-if-u64-=-scm emit-br-if-u64->=-scm emit-br-if-u64->-scm + emit-br-if-f64-= + emit-br-if-f64-< + emit-br-if-f64-<= + emit-br-if-f64-> + emit-br-if-f64->= emit-box emit-box-ref emit-box-set! diff --git a/module/system/vm/disassembler.scm b/module/system/vm/disassembler.scm index b0867e6..b6f4f78 100644 --- a/module/system/vm/disassembler.scm +++ b/module/system/vm/disassembler.scm @@ -198,6 +198,8 @@ address of that offset." 'br-if-u64-= 'br-if-u64-< 'br-if-u64-<= 'br-if-u64-<-scm 'br-if-u64-<=-scm 'br-if-u64-=-scm 'br-if-u64->-scm 'br-if-u64->=-scm + 'br-if-f64-= 'br-if-f64-< 'br-if-f64-<= + 'br-if-f64-> 'br-if-f64->= 'br-if-logtest) _ ... target) (list "-> ~A" (vector-ref labels (- (+ offset target) start)))) (('br-if-tc7 slot invert? tc7 target) -- 2.10.0
-- David Thompson