This updates the libffi MIPS support up to commit 746dbe3a6a79, with the
exception of commit bd72848c7af9 which prefixes the ALIGN macro with
FFI_ for all ports.

These patches, with the exception of the softfloat one, have been used
on the Debian GCC packages for quite some time.

libffi/Changelog:

2019-08-05  Aurelien Jarno  <aurel...@aurel32.net>

        Import from upstream
        * src/mips/ffi.c (ffi_call_O32, ffi_call_N32,
        ffi_closure_mips_inner_O32, ffi_closure_mips_inner_N32): Adjust
        interface.
        (ffi_call_int): Renamed from ffi_call.
        (ffi_call, ffi_call_go, ffi_prep_go_closure): New functions.
        (ffi_prep_closure_loc): Define jr instruction for R6.
        * src/mips/ffitarget.h (FFI_GO_CLOSURES): Define.
        (FFI_TRAMPOLINE_SIZE): Define to 56 for N64.
        Test for __linux__ instead of linux.
        * src/mips/n32.S (ffi_go_closure_N32): New function.
        (ffi_call_N32): Adjust code for softfloat.
        (.set mips4): Guard with !defined(__mips_isa_rev) ||
        (__mips_isa_rev<6).
        * src/mips/o32.S (ffi_go_closure_O32): New function.
        (ffi_call_O32): Adjust code for softfloat.

diff --git a/libffi/src/mips/ffi.c b/libffi/src/mips/ffi.c
index 5d0dd70cb32..70a2081b498 100644
--- a/libffi/src/mips/ffi.c
+++ b/libffi/src/mips/ffi.c
@@ -581,14 +581,15 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 /* Low level routine for calling O32 functions */
 extern int ffi_call_O32(void (*)(char *, extended_cif *, int, int), 
                        extended_cif *, unsigned, 
-                       unsigned, unsigned *, void (*)(void));
+                       unsigned, unsigned *, void (*)(void), void *closure);
 
 /* Low level routine for calling N32 functions */
 extern int ffi_call_N32(void (*)(char *, extended_cif *, int, int), 
                        extended_cif *, unsigned, 
-                       unsigned, void *, void (*)(void));
+                       unsigned, void *, void (*)(void), void *closure);
 
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+void ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue, 
+             void **avalue, void *closure)
 {
   extended_cif ecif;
 
@@ -610,7 +611,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, 
void **avalue)
     case FFI_O32:
     case FFI_O32_SOFT_FLOAT:
       ffi_call_O32(ffi_prep_args, &ecif, cif->bytes, 
-                  cif->flags, ecif.rvalue, fn);
+                  cif->flags, ecif.rvalue, fn, closure);
       break;
 #endif
 
@@ -642,7 +643,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, 
void **avalue)
 #endif
          }
         ffi_call_N32(ffi_prep_args, &ecif, cif->bytes,
-                     cif->flags, rvalue_copy, fn);
+                     cif->flags, rvalue_copy, fn, closure);
         if (copy_rvalue)
           memcpy(ecif.rvalue, rvalue_copy + copy_offset, cif->rtype->size);
       }
@@ -655,11 +656,27 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void 
*rvalue, void **avalue)
     }
 }
 
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+            void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
 #if FFI_CLOSURES
 #if defined(FFI_MIPS_O32)
 extern void ffi_closure_O32(void);
+extern void ffi_go_closure_O32(void);
 #else
 extern void ffi_closure_N32(void);
+extern void ffi_go_closure_N32(void);
 #endif /* FFI_MIPS_O32 */
 
 ffi_status
@@ -698,7 +715,11 @@ ffi_prep_closure_loc (ffi_closure *closure,
   /* lui  $12,high(codeloc) */
   tramp[2] = 0x3c0c0000 | ((unsigned)codeloc >> 16);
   /* jr   $25          */
+#if !defined(__mips_isa_rev) || (__mips_isa_rev<6)
   tramp[3] = 0x03200008;
+#else
+  tramp[3] = 0x03200009;
+#endif
   /* ori  $12,low(codeloc)  */
   tramp[4] = 0x358c0000 | ((unsigned)codeloc & 0xffff);
 #else
@@ -726,7 +747,11 @@ ffi_prep_closure_loc (ffi_closure *closure,
   /* ori  $25,low(fn)  */
   tramp[10] = 0x37390000 | ((unsigned long)fn  & 0xffff);
   /* jr   $25          */
+#if !defined(__mips_isa_rev) || (__mips_isa_rev<6)
   tramp[11] = 0x03200008;
+#else
+  tramp[11] = 0x03200009;
+#endif
   /* ori  $12,low(codeloc)  */
   tramp[12] = 0x358c0000 | ((unsigned long)codeloc & 0xffff);
 
@@ -762,17 +787,17 @@ ffi_prep_closure_loc (ffi_closure *closure,
  * Based on the similar routine for sparc.
  */
 int
-ffi_closure_mips_inner_O32 (ffi_closure *closure,
+ffi_closure_mips_inner_O32 (ffi_cif *cif,
+                            void (*fun)(ffi_cif*, void*, void**, void*),
+                           void *user_data,
                            void *rvalue, ffi_arg *ar,
                            double *fpr)
 {
-  ffi_cif *cif;
   void **avaluep;
   ffi_arg *avalue;
   ffi_type **arg_types;
   int i, avn, argn, seen_int;
 
-  cif = closure->cif;
   avalue = alloca (cif->nargs * sizeof (ffi_arg));
   avaluep = alloca (cif->nargs * sizeof (ffi_arg));
 
@@ -840,7 +865,7 @@ ffi_closure_mips_inner_O32 (ffi_closure *closure,
     }
 
   /* Invoke the closure. */
-  (closure->fun) (cif, rvalue, avaluep, closure->user_data);
+  fun(cif, rvalue, avaluep, user_data);
 
   if (cif->abi == FFI_O32_SOFT_FLOAT)
     {
@@ -916,11 +941,12 @@ copy_struct_N32(char *target, unsigned offset, ffi_abi 
abi, ffi_type *type,
  *
  */
 int
-ffi_closure_mips_inner_N32 (ffi_closure *closure,
+ffi_closure_mips_inner_N32 (ffi_cif *cif, 
+                           void (*fun)(ffi_cif*, void*, void**, void*),
+                            void *user_data,
                            void *rvalue, ffi_arg *ar,
                            ffi_arg *fpr)
 {
-  ffi_cif *cif;
   void **avaluep;
   ffi_arg *avalue;
   ffi_type **arg_types;
@@ -928,7 +954,6 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
   int soft_float;
   ffi_arg *argp;
 
-  cif = closure->cif;
   soft_float = cif->abi == FFI_N64_SOFT_FLOAT
     || cif->abi == FFI_N32_SOFT_FLOAT;
   avalue = alloca (cif->nargs * sizeof (ffi_arg));
@@ -1040,11 +1065,49 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
     }
 
   /* Invoke the closure. */
-  (closure->fun) (cif, rvalue, avaluep, closure->user_data);
+  fun (cif, rvalue, avaluep, user_data);
 
   return cif->flags >> (FFI_FLAG_BITS * 8);
 }
 
 #endif /* FFI_MIPS_N32 */
 
+#if defined(FFI_MIPS_O32)
+extern void ffi_closure_O32(void);
+extern void ffi_go_closure_O32(void);
+#else
+extern void ffi_closure_N32(void);
+extern void ffi_go_closure_N32(void);
+#endif /* FFI_MIPS_O32 */
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+                    void (*fun)(ffi_cif*,void*,void**,void*))
+{
+  void * fn;
+
+#if defined(FFI_MIPS_O32)
+  if (cif->abi != FFI_O32 && cif->abi != FFI_O32_SOFT_FLOAT)
+    return FFI_BAD_ABI;
+  fn = ffi_go_closure_O32;
+#else
+#if _MIPS_SIM ==_ABIN32
+  if (cif->abi != FFI_N32
+      && cif->abi != FFI_N32_SOFT_FLOAT)
+    return FFI_BAD_ABI;
+#else
+  if (cif->abi != FFI_N64
+      && cif->abi != FFI_N64_SOFT_FLOAT)
+    return FFI_BAD_ABI;
+#endif
+  fn = ffi_go_closure_N32;
+#endif /* FFI_MIPS_O32 */
+
+  closure->tramp = (void *)fn;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
 #endif /* FFI_CLOSURES */
diff --git a/libffi/src/mips/ffitarget.h b/libffi/src/mips/ffitarget.h
index 717d65951c3..97158909c43 100644
--- a/libffi/src/mips/ffitarget.h
+++ b/libffi/src/mips/ffitarget.h
@@ -32,7 +32,7 @@
 #error "Please do not include ffitarget.h directly into your source.  Use 
ffi.h instead."
 #endif
 
-#ifdef linux
+#ifdef __linux__
 # include <asm/sgidefs.h>
 #elif defined(__rtems__)
 /*
@@ -231,12 +231,14 @@ typedef enum ffi_abi {
 
 #if defined(FFI_MIPS_O32)
 #define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
 #define FFI_TRAMPOLINE_SIZE 20
 #else
 /* N32/N64. */
 # define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
 #if _MIPS_SIM==_ABI64
-#define FFI_TRAMPOLINE_SIZE 52
+#define FFI_TRAMPOLINE_SIZE 56
 #else
 #define FFI_TRAMPOLINE_SIZE 20
 #endif
diff --git a/libffi/src/mips/n32.S b/libffi/src/mips/n32.S
index c6985d30a6f..1a940b6d2a8 100644
--- a/libffi/src/mips/n32.S
+++ b/libffi/src/mips/n32.S
@@ -37,36 +37,43 @@
 #define flags   a3
 #define raddr    a4
 #define fn       a5
+#define closure  a6
 
-#define SIZEOF_FRAME   ( 8 * FFI_SIZEOF_ARG )
+/* Note: to keep stack 16 byte aligned we need even number slots 
+   used 9 slots here
+*/
+#define SIZEOF_FRAME   ( 10 * FFI_SIZEOF_ARG )
 
 #ifdef __GNUC__
        .abicalls
 #endif
+#if !defined(__mips_isa_rev) || (__mips_isa_rev<6)
        .set mips4
+#endif
        .text
        .align  2
        .globl  ffi_call_N32
        .ent    ffi_call_N32
 ffi_call_N32:  
-.LFB3:
+.LFB0:
        .frame  $fp, SIZEOF_FRAME, ra
        .mask   0xc0000000,-FFI_SIZEOF_ARG
        .fmask  0x00000000,0
 
        # Prologue
        SUBU    $sp, SIZEOF_FRAME                       # Frame size
-.LCFI0:
+.LCFI00:
        REG_S   $fp, SIZEOF_FRAME - 2*FFI_SIZEOF_ARG($sp)       # Save frame 
pointer
        REG_S   ra, SIZEOF_FRAME - 1*FFI_SIZEOF_ARG($sp)        # Save return 
address
-.LCFI1:
+.LCFI01:
        move    $fp, $sp
-.LCFI3:
+.LCFI02:
        move    t9, callback    # callback function pointer
        REG_S   bytes, 2*FFI_SIZEOF_ARG($fp) # bytes
        REG_S   flags, 3*FFI_SIZEOF_ARG($fp) # flags
        REG_S   raddr, 4*FFI_SIZEOF_ARG($fp) # raddr
        REG_S   fn,    5*FFI_SIZEOF_ARG($fp) # fn
+       REG_S   closure, 6*FFI_SIZEOF_ARG($fp) # closure
 
        # Allocate at least 4 words in the argstack
        move    v0, bytes
@@ -107,6 +114,16 @@ loadregs:
 
        REG_L   t6, 3*FFI_SIZEOF_ARG($fp)  # load the flags word into t6.
 
+#ifdef __mips_soft_float
+       REG_L   a0, 0*FFI_SIZEOF_ARG(t9)
+       REG_L   a1, 1*FFI_SIZEOF_ARG(t9)
+       REG_L   a2, 2*FFI_SIZEOF_ARG(t9)
+       REG_L   a3, 3*FFI_SIZEOF_ARG(t9)
+       REG_L   a4, 4*FFI_SIZEOF_ARG(t9)
+       REG_L   a5, 5*FFI_SIZEOF_ARG(t9)
+       REG_L   a6, 6*FFI_SIZEOF_ARG(t9)
+       REG_L   a7, 7*FFI_SIZEOF_ARG(t9)
+#else
        and     t4, t6, ((1<<FFI_FLAG_BITS)-1)
        REG_L   a0, 0*FFI_SIZEOF_ARG(t9)
        beqz    t4, arg1_next
@@ -193,11 +210,15 @@ arg7_next:
 arg8_doublep:  
        l.d     $f19, 7*FFI_SIZEOF_ARG(t9)      
 arg8_next:     
+#endif
 
 callit:                
        # Load the function pointer
        REG_L   t9, 5*FFI_SIZEOF_ARG($fp)
 
+       # install the static chain(t7=$15)
+       REG_L   t7, 6*FFI_SIZEOF_ARG($fp)
+
        # If the return value pointer is NULL, assume no return value.
        REG_L   t5, 4*FFI_SIZEOF_ARG($fp)
        beqz    t5, noretval
@@ -214,6 +235,7 @@ retint:
        b       epilogue
 
 retfloat:
+#ifndef __mips_soft_float
        bne     t6, FFI_TYPE_FLOAT, retdouble
        jal     t9
        REG_L   t4, 4*FFI_SIZEOF_ARG($fp)
@@ -272,6 +294,7 @@ retstruct_f_d:
        s.s     $f0, 0(t4)
        s.d     $f2, 8(t4)
        b       epilogue
+#endif
 
 retstruct_d_soft:
        bne     t6, FFI_TYPE_STRUCT_D_SOFT, retstruct_f_soft
@@ -346,7 +369,7 @@ epilogue:
        ADDU    $sp, SIZEOF_FRAME                     # Fix stack pointer
        j       ra
 
-.LFE3:
+.LFE0:
        .end    ffi_call_N32
 
 /* ffi_closure_N32. Expects address of the passed-in ffi_closure in t0
@@ -405,6 +428,41 @@ epilogue:
 #define RA_OFF2                (1  * FFI_SIZEOF_ARG)
 #define GP_OFF2                (0  * FFI_SIZEOF_ARG)
 
+       .align  2
+       .globl  ffi_go_closure_N32
+       .ent    ffi_go_closure_N32
+ffi_go_closure_N32:
+.LFB1:
+       .frame  $sp, SIZEOF_FRAME2, ra
+       .mask   0x90000000,-(SIZEOF_FRAME2 - RA_OFF2)
+       .fmask  0x00000000,0
+       SUBU    $sp, SIZEOF_FRAME2
+.LCFI10:
+       .cpsetup t9, GP_OFF2, ffi_go_closure_N32
+       REG_S   ra, RA_OFF2($sp)        # Save return address
+.LCFI11:
+
+       REG_S   a0, A0_OFF2($sp)
+       REG_S   a1, A1_OFF2($sp)
+       REG_S   a2, A2_OFF2($sp)
+       REG_S   a3, A3_OFF2($sp)
+       REG_S   a4, A4_OFF2($sp)
+       REG_S   a5, A5_OFF2($sp)
+
+       # Call ffi_closure_mips_inner_N32 to do the real work.
+       LA      t9, ffi_closure_mips_inner_N32
+       REG_L   a0, 8($15)   # cif
+       REG_L   a1, 16($15) # fun
+       move    a2, t7                     # userdata=closure
+       ADDU    a3, $sp, V0_OFF2           # rvalue
+       ADDU    a4, $sp, A0_OFF2           # ar
+       ADDU    a5, $sp, F12_OFF2          # fpr
+
+       b       $do_closure
+
+.LFE1: 
+       .end    ffi_go_closure_N32
+
        .align  2
        .globl  ffi_closure_N32
        .ent    ffi_closure_N32
@@ -414,21 +472,33 @@ ffi_closure_N32:
        .mask   0x90000000,-(SIZEOF_FRAME2 - RA_OFF2)
        .fmask  0x00000000,0
        SUBU    $sp, SIZEOF_FRAME2
-.LCFI5:
+.LCFI20:
        .cpsetup t9, GP_OFF2, ffi_closure_N32
        REG_S   ra, RA_OFF2($sp)        # Save return address
-.LCFI6:
-       # Store all possible argument registers. If there are more than
-       # fit in registers, then they were stored on the stack.
+.LCFI21:
        REG_S   a0, A0_OFF2($sp)
        REG_S   a1, A1_OFF2($sp)
        REG_S   a2, A2_OFF2($sp)
        REG_S   a3, A3_OFF2($sp)
        REG_S   a4, A4_OFF2($sp)
        REG_S   a5, A5_OFF2($sp)
+
+       # Call ffi_closure_mips_inner_N32 to do the real work.
+       LA      t9, ffi_closure_mips_inner_N32
+       REG_L   a0, 56($12)   # cif
+       REG_L   a1, 64($12)   # fun
+       REG_L   a2, 72($12) # user_data
+       ADDU    a3, $sp, V0_OFF2
+       ADDU    a4, $sp, A0_OFF2
+       ADDU    a5, $sp, F12_OFF2
+
+$do_closure:
+       # Store all possible argument registers. If there are more than
+       # fit in registers, then they were stored on the stack.
        REG_S   a6, A6_OFF2($sp)
        REG_S   a7, A7_OFF2($sp)
 
+#ifndef __mips_soft_float
        # Store all possible float/double registers.
        s.d     $f12, F12_OFF2($sp)
        s.d     $f13, F13_OFF2($sp)
@@ -438,13 +508,8 @@ ffi_closure_N32:
        s.d     $f17, F17_OFF2($sp)
        s.d     $f18, F18_OFF2($sp)
        s.d     $f19, F19_OFF2($sp)
+#endif
 
-       # Call ffi_closure_mips_inner_N32 to do the real work.
-       LA      t9, ffi_closure_mips_inner_N32
-       move    a0, $12  # Pointer to the ffi_closure
-       ADDU    a1, $sp, V0_OFF2
-       ADDU    a2, $sp, A0_OFF2
-       ADDU    a3, $sp, F12_OFF2
        jalr    t9
 
        # Return flags are in v0
@@ -458,6 +523,7 @@ cls_retint:
        b       cls_epilogue
 
 cls_retfloat:
+#ifndef __mips_soft_float
        bne     v0, FFI_TYPE_FLOAT, cls_retdouble
        l.s     $f0, V0_OFF2($sp)
        b       cls_epilogue
@@ -500,6 +566,7 @@ cls_retstruct_f_d:
        l.s     $f0, V0_OFF2($sp)
        l.d     $f2, V1_OFF2($sp)
        b       cls_epilogue
+#endif
        
 cls_retstruct_small2:  
        REG_L   v0, V0_OFF2($sp)
@@ -531,46 +598,66 @@ cls_epilogue:
         .align  EH_FRAME_ALIGN
 .LECIE1:
 
-.LSFDE1:
-        .4byte  .LEFDE1-.LASFDE1       # length.
-.LASFDE1:
-        .4byte  .LASFDE1-.Lframe1      # CIE_pointer.
-        FDE_ADDR_BYTES  .LFB3          # initial_location.
-        FDE_ADDR_BYTES  .LFE3-.LFB3    # address_range.
+.LSFDE0:
+        .4byte  .LEFDE0-.LASFDE0       # length.
+.LASFDE0:
+        .4byte  .LASFDE0-.Lframe1      # CIE_pointer.
+        FDE_ADDR_BYTES  .LFB0          # initial_location.
+        FDE_ADDR_BYTES  .LFE0-.LFB0    # address_range.
         .byte   0x4                    # DW_CFA_advance_loc4
-        .4byte  .LCFI0-.LFB3           # to .LCFI0
+        .4byte  .LCFI00-.LFB0          # to .LCFI00
         .byte   0xe                    # DW_CFA_def_cfa_offset
         .uleb128 SIZEOF_FRAME          # adjust stack.by SIZEOF_FRAME
         .byte   0x4                    # DW_CFA_advance_loc4
-        .4byte  .LCFI1-.LCFI0          # to .LCFI1
+        .4byte  .LCFI01-.LCFI00                # to .LCFI01
         .byte   0x9e                   # DW_CFA_offset of $fp
         .uleb128 2*FFI_SIZEOF_ARG/4    # 
         .byte   0x9f                   # DW_CFA_offset of ra
         .uleb128 1*FFI_SIZEOF_ARG/4    # 
         .byte   0x4                    # DW_CFA_advance_loc4
-        .4byte  .LCFI3-.LCFI1          # to .LCFI3
+        .4byte  .LCFI02-.LCFI01                # to .LCFI02
         .byte   0xd                    # DW_CFA_def_cfa_register
         .uleb128 0x1e                  # in $fp
         .align  EH_FRAME_ALIGN
+.LEFDE0:
+
+.LSFDE1:
+       .4byte  .LEFDE1-.LASFDE1        # length
+.LASFDE1:
+       .4byte  .LASFDE1-.Lframe1       # CIE_pointer.
+       FDE_ADDR_BYTES  .LFB1           # initial_location.
+       FDE_ADDR_BYTES  .LFE1-.LFB1     # address_range.
+       .byte   0x4                     # DW_CFA_advance_loc4
+       .4byte  .LCFI10-.LFB1           # to .LCFI10
+       .byte   0xe                     # DW_CFA_def_cfa_offset
+       .uleb128 SIZEOF_FRAME2          # adjust stack.by SIZEOF_FRAME
+       .byte   0x4                     # DW_CFA_advance_loc4
+       .4byte  .LCFI11-.LCFI10         # to .LCFI11
+       .byte   0x9c                    # DW_CFA_offset of $gp ($28)
+       .uleb128 (SIZEOF_FRAME2 - GP_OFF2)/4
+       .byte   0x9f                    # DW_CFA_offset of ra ($31)
+       .uleb128 (SIZEOF_FRAME2 - RA_OFF2)/4
+       .align  EH_FRAME_ALIGN
 .LEFDE1:
-.LSFDE3:
-       .4byte  .LEFDE3-.LASFDE3        # length
-.LASFDE3:
-       .4byte  .LASFDE3-.Lframe1       # CIE_pointer.
+
+.LSFDE2:
+       .4byte  .LEFDE2-.LASFDE2        # length
+.LASFDE2:
+       .4byte  .LASFDE2-.Lframe1       # CIE_pointer.
        FDE_ADDR_BYTES  .LFB2           # initial_location.
        FDE_ADDR_BYTES  .LFE2-.LFB2     # address_range.
        .byte   0x4                     # DW_CFA_advance_loc4
-       .4byte  .LCFI5-.LFB2            # to .LCFI5
+       .4byte  .LCFI20-.LFB2           # to .LCFI20
        .byte   0xe                     # DW_CFA_def_cfa_offset
        .uleb128 SIZEOF_FRAME2          # adjust stack.by SIZEOF_FRAME
        .byte   0x4                     # DW_CFA_advance_loc4
-       .4byte  .LCFI6-.LCFI5           # to .LCFI6
+       .4byte  .LCFI21-.LCFI20         # to .LCFI21
        .byte   0x9c                    # DW_CFA_offset of $gp ($28)
        .uleb128 (SIZEOF_FRAME2 - GP_OFF2)/4
        .byte   0x9f                    # DW_CFA_offset of ra ($31)
        .uleb128 (SIZEOF_FRAME2 - RA_OFF2)/4
        .align  EH_FRAME_ALIGN
-.LEFDE3:
+.LEFDE2:
 #endif /* __GNUC__ */  
        
 #endif
diff --git a/libffi/src/mips/o32.S b/libffi/src/mips/o32.S
index eb279813a76..44e74cb91a2 100644
--- a/libffi/src/mips/o32.S
+++ b/libffi/src/mips/o32.S
@@ -50,14 +50,14 @@ ffi_call_O32:
 $LFB0:
        # Prologue
        SUBU    $sp, SIZEOF_FRAME       # Frame size
-$LCFI0:
+$LCFI00:
        REG_S   $fp, FP_OFF($sp)        # Save frame pointer
-$LCFI1:
+$LCFI01:
        REG_S   ra, RA_OFF($sp)         # Save return address
-$LCFI2:
+$LCFI02:
        move    $fp, $sp
 
-$LCFI3:
+$LCFI03:
        move    t9, callback            # callback function pointer
        REG_S   flags, A3_OFF($fp)      # flags
 
@@ -82,13 +82,16 @@ sixteen:
                
        ADDU    $sp, 4 * FFI_SIZEOF_ARG         # adjust $sp to new args
 
+#ifndef __mips_soft_float
        bnez    t0, pass_d                      # make it quick for int
+#endif
        REG_L   a0, 0*FFI_SIZEOF_ARG($sp)       # just go ahead and load the
        REG_L   a1, 1*FFI_SIZEOF_ARG($sp)       # four regs.
        REG_L   a2, 2*FFI_SIZEOF_ARG($sp)
        REG_L   a3, 3*FFI_SIZEOF_ARG($sp)
        b       call_it
 
+#ifndef __mips_soft_float
 pass_d:
        bne     t0, FFI_ARGS_D, pass_f
        l.d     $f12, 0*FFI_SIZEOF_ARG($sp)     # load $fp regs from args
@@ -130,8 +133,12 @@ pass_f_d:
  #     bne     t0, FFI_ARGS_F_D, call_it
        l.s     $f12, 0*FFI_SIZEOF_ARG($sp)     # load $fp regs from args
        l.d     $f14, 2*FFI_SIZEOF_ARG($sp)     # passing double and float
+#endif
 
 call_it:       
+       # Load the static chain pointer
+       REG_L   t7, SIZEOF_FRAME + 6*FFI_SIZEOF_ARG($fp)
+
        # Load the function pointer
        REG_L   t9, SIZEOF_FRAME + 5*FFI_SIZEOF_ARG($fp)
 
@@ -158,14 +165,23 @@ retfloat:
        bne     t2, FFI_TYPE_FLOAT, retdouble
        jalr    t9
        REG_L   t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
+#ifndef __mips_soft_float
        s.s     $f0, 0(t0)
+#else
+       REG_S   v0, 0(t0)
+#endif
        b       epilogue
 
 retdouble:     
        bne     t2, FFI_TYPE_DOUBLE, noretval
        jalr    t9
        REG_L   t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
+#ifndef __mips_soft_float
        s.d     $f0, 0(t0)
+#else
+       REG_S   v1, 4(t0)
+       REG_S   v0, 0(t0)
+#endif
        b       epilogue
        
 noretval:      
@@ -204,13 +220,15 @@ $LFE0:
        -8 - f14 (le low, be high)
        -9 - f12 (le high, be low)
        -10 - f12 (le low, be high)
-       -11 - Called function a3 save
-       -12 - Called function a2 save
-       -13 - Called function a1 save
-       -14 - Called function a0 save, our sp and fp point here
+       -11 - Called function a5 save
+       -12 - Called function a4 save
+       -13 - Called function a3 save
+       -14 - Called function a2 save
+       -15 - Called function a1 save
+       -16 - Called function a0 save, our sp and fp point here
         */
        
-#define SIZEOF_FRAME2  (14 * FFI_SIZEOF_ARG)
+#define SIZEOF_FRAME2  (16 * FFI_SIZEOF_ARG)
 #define A3_OFF2                (SIZEOF_FRAME2 + 3 * FFI_SIZEOF_ARG)
 #define A2_OFF2                (SIZEOF_FRAME2 + 2 * FFI_SIZEOF_ARG)
 #define A1_OFF2                (SIZEOF_FRAME2 + 1 * FFI_SIZEOF_ARG)
@@ -225,13 +243,71 @@ $LFE0:
 #define FA_1_0_OFF2    (SIZEOF_FRAME2 - 8 * FFI_SIZEOF_ARG)
 #define FA_0_1_OFF2    (SIZEOF_FRAME2 - 9 * FFI_SIZEOF_ARG)
 #define FA_0_0_OFF2    (SIZEOF_FRAME2 - 10 * FFI_SIZEOF_ARG)
+#define CALLED_A5_OFF2  (SIZEOF_FRAME2 - 11 * FFI_SIZEOF_ARG)
+#define CALLED_A4_OFF2  (SIZEOF_FRAME2 - 12 * FFI_SIZEOF_ARG)
 
        .text
+
+       .align  2
+       .globl  ffi_go_closure_O32
+       .ent    ffi_go_closure_O32
+ffi_go_closure_O32:
+$LFB1:
+       # Prologue
+       .frame  $fp, SIZEOF_FRAME2, ra
+       .set    noreorder
+       .cpload t9
+       .set    reorder
+       SUBU    $sp, SIZEOF_FRAME2
+       .cprestore GP_OFF2
+$LCFI10:
+
+       REG_S   $16, S0_OFF2($sp)        # Save s0
+       REG_S   $fp, FP_OFF2($sp)        # Save frame pointer
+       REG_S   ra, RA_OFF2($sp)         # Save return address
+$LCFI11:
+
+       move    $fp, $sp
+$LCFI12:
+
+       REG_S   a0, A0_OFF2($fp)
+       REG_S   a1, A1_OFF2($fp)
+       REG_S   a2, A2_OFF2($fp)
+       REG_S   a3, A3_OFF2($fp)
+
+       # Load ABI enum to s0
+       REG_L   $16, 4($15)     # cif 
+       REG_L   $16, 0($16)     # abi is first member.
+
+       li      $13, 1          # FFI_O32
+       bne     $16, $13, 1f    # Skip fp save if FFI_O32_SOFT_FLOAT
+       
+       # Store all possible float/double registers.
+       s.d     $f12, FA_0_0_OFF2($fp)
+       s.d     $f14, FA_1_0_OFF2($fp)
+1:
+       # prepare arguments for ffi_closure_mips_inner_O32
+       REG_L   a0, 4($15)       # cif 
+       REG_L   a1, 8($15)       # fun
+       move    a2, $15          # user_data = go closure
+       addu    a3, $fp, V0_OFF2 # rvalue
+
+       addu    t9, $fp, A0_OFF2 # ar
+       REG_S   t9, CALLED_A4_OFF2($fp)
+
+       addu    t9, $fp, FA_0_0_OFF2 #fpr
+       REG_S   t9, CALLED_A5_OFF2($fp)
+
+       b $do_closure
+
+$LFE1:
+       .end ffi_go_closure_O32
+
        .align  2
        .globl  ffi_closure_O32
        .ent    ffi_closure_O32
 ffi_closure_O32:
-$LFB1:
+$LFB2:
        # Prologue
        .frame  $fp, SIZEOF_FRAME2, ra
        .set    noreorder
@@ -239,14 +315,14 @@ $LFB1:
        .set    reorder
        SUBU    $sp, SIZEOF_FRAME2
        .cprestore GP_OFF2
-$LCFI4:
+$LCFI20:
        REG_S   $16, S0_OFF2($sp)        # Save s0
        REG_S   $fp, FP_OFF2($sp)        # Save frame pointer
        REG_S   ra, RA_OFF2($sp)         # Save return address
-$LCFI6:
+$LCFI21:
        move    $fp, $sp
 
-$LCFI7:
+$LCFI22:
        # Store all possible argument registers. If there are more than
        # four arguments, then they are stored above where we put a3.
        REG_S   a0, A0_OFF2($fp)
@@ -261,16 +337,27 @@ $LCFI7:
        li      $13, 1          # FFI_O32
        bne     $16, $13, 1f    # Skip fp save if FFI_O32_SOFT_FLOAT
        
+#ifndef __mips_soft_float
        # Store all possible float/double registers.
        s.d     $f12, FA_0_0_OFF2($fp)
        s.d     $f14, FA_1_0_OFF2($fp)
+#endif
 1:     
-       # Call ffi_closure_mips_inner_O32 to do the work.
+       # prepare arguments for ffi_closure_mips_inner_O32
+       REG_L   a0, 20($12)      # cif pointer follows tramp.
+       REG_L   a1, 24($12)      # fun
+       REG_L   a2, 28($12)      # user_data
+       addu    a3, $fp, V0_OFF2 # rvalue
+
+       addu    t9, $fp, A0_OFF2 # ar
+       REG_S   t9, CALLED_A4_OFF2($fp)
+
+       addu    t9, $fp, FA_0_0_OFF2 #fpr
+       REG_S   t9, CALLED_A5_OFF2($fp)
+
+$do_closure:
        la      t9, ffi_closure_mips_inner_O32
-       move    a0, $12  # Pointer to the ffi_closure
-       addu    a1, $fp, V0_OFF2
-       addu    a2, $fp, A0_OFF2
-       addu    a3, $fp, FA_0_0_OFF2
+       # Call ffi_closure_mips_inner_O32 to do the work.
        jalr    t9
 
        # Load the return value into the appropriate register.
@@ -281,6 +368,7 @@ $LCFI7:
        li      $13, 1          # FFI_O32
        bne     $16, $13, 1f    # Skip fp restore if FFI_O32_SOFT_FLOAT
 
+#ifndef __mips_soft_float
        li      $9, FFI_TYPE_FLOAT
        l.s     $f0, V0_OFF2($fp)
        beq     $8, $9, closure_done
@@ -288,6 +376,7 @@ $LCFI7:
        li      $9, FFI_TYPE_DOUBLE
        l.d     $f0, V0_OFF2($fp)
        beq     $8, $9, closure_done
+#endif
 1:     
        REG_L   $3, V1_OFF2($fp)
        REG_L   $2, V0_OFF2($fp)
@@ -300,7 +389,7 @@ closure_done:
        REG_L   ra,  RA_OFF2($sp)        # Restore return address
        ADDU    $sp, SIZEOF_FRAME2
        j       ra
-$LFE1:
+$LFE2:
        .end    ffi_closure_O32
 
 /* DWARF-2 unwind info. */
@@ -322,6 +411,7 @@ $LSCIE0:
        .uleb128 0x0
        .align  2
 $LECIE0:
+
 $LSFDE0:
        .4byte  $LEFDE0-$LASFDE0         # FDE Length
 $LASFDE0:
@@ -330,11 +420,11 @@ $LASFDE0:
        .4byte  $LFE0-$LFB0      # FDE address range
        .uleb128 0x0     # Augmentation size
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI0-$LFB0
+       .4byte  $LCFI00-$LFB0
        .byte   0xe      # DW_CFA_def_cfa_offset
        .uleb128 0x18
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI2-$LCFI0
+       .4byte  $LCFI01-$LCFI00
        .byte   0x11     # DW_CFA_offset_extended_sf
        .uleb128 0x1e    # $fp
        .sleb128 -2      # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp)
@@ -342,12 +432,13 @@ $LASFDE0:
        .uleb128 0x1f    # $ra
        .sleb128 -1      # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI3-$LCFI2
+       .4byte  $LCFI02-$LCFI01
        .byte   0xc      # DW_CFA_def_cfa
        .uleb128 0x1e
        .uleb128 0x18
        .align  2
 $LEFDE0:
+
 $LSFDE1:
        .4byte  $LEFDE1-$LASFDE1         # FDE Length
 $LASFDE1:
@@ -356,11 +447,11 @@ $LASFDE1:
        .4byte  $LFE1-$LFB1      # FDE address range
        .uleb128 0x0     # Augmentation size
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI4-$LFB1
+       .4byte  $LCFI10-$LFB1
        .byte   0xe      # DW_CFA_def_cfa_offset
-       .uleb128 0x38
+       .uleb128 SIZEOF_FRAME2
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI6-$LCFI4
+       .4byte  $LCFI11-$LCFI10
        .byte   0x11     # DW_CFA_offset_extended_sf
        .uleb128 0x10    # $16
        .sleb128 -3      # SIZEOF_FRAME2 - 3*FFI_SIZEOF_ARG($sp)
@@ -371,11 +462,41 @@ $LASFDE1:
        .uleb128 0x1f    # $ra
        .sleb128 -1      # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
        .byte   0x4      # DW_CFA_advance_loc4
-       .4byte  $LCFI7-$LCFI6
+       .4byte  $LCFI12-$LCFI11
        .byte   0xc      # DW_CFA_def_cfa
        .uleb128 0x1e
-       .uleb128 0x38
+       .uleb128 SIZEOF_FRAME2
        .align  2
 $LEFDE1:
 
+$LSFDE2:
+       .4byte  $LEFDE2-$LASFDE2         # FDE Length
+$LASFDE2:
+       .4byte  $LASFDE2-$Lframe0        # FDE CIE offset
+       .4byte  $LFB2    # FDE initial location
+       .4byte  $LFE2-$LFB2      # FDE address range
+       .uleb128 0x0     # Augmentation size
+       .byte   0x4      # DW_CFA_advance_loc4
+       .4byte  $LCFI20-$LFB2
+       .byte   0xe      # DW_CFA_def_cfa_offset
+       .uleb128 SIZEOF_FRAME2
+       .byte   0x4      # DW_CFA_advance_loc4
+       .4byte  $LCFI21-$LCFI20
+       .byte   0x11     # DW_CFA_offset_extended_sf
+       .uleb128 0x10    # $16
+       .sleb128 -3      # SIZEOF_FRAME2 - 3*FFI_SIZEOF_ARG($sp)
+       .byte   0x11     # DW_CFA_offset_extended_sf
+       .uleb128 0x1e    # $fp
+       .sleb128 -2      # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp)
+       .byte   0x11     # DW_CFA_offset_extended_sf
+       .uleb128 0x1f    # $ra
+       .sleb128 -1      # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
+       .byte   0x4      # DW_CFA_advance_loc4
+       .4byte  $LCFI22-$LCFI21
+       .byte   0xc      # DW_CFA_def_cfa
+       .uleb128 0x1e
+       .uleb128 SIZEOF_FRAME2
+       .align  2
+$LEFDE2:
+
 #endif
-- 
2.20.1


-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurel...@aurel32.net                 http://www.aurel32.net

Reply via email to