RFA: RL78: Add support for G13 and G14 multiply and divide

Nick Clifton Wed, 15 Apr 2015 06:20:15 -0700

Hi DJ,

  The attached patch adds support for the RL78 divide and modulus
  capabilities (both as an instruction in G14 cores and as a hardware
  peripheral in G13 cores).  It also updates the multiply patterns to
  add clobbers of the AX and BC registers, which helps fix some reload
  problems with those patterns.


  Tested with no regressions on an rl78-elf toolchain.

  OK to apply ?

Cheers
  Nick

gcc/ChangeLog
2015-04-15  Nick Clifton  <ni...@redhat.com>

        * config/rl78/rl78-opts.h (enum rl78_mul_types): Add MUL_G14 and
        MUL_UNINIT.
        (enum rl78_cpu_type): New.
        * config/rl78/rl78-virt.md (attr valloc): Add divhi and divsi.
        (umulhi3_shift_virt): Remove m constraint from operand 1.
        (umulqihi3_virt): Likewise.
        * config/rl78/rl78.c (rl78_option_override): Add code to process
        -mcpu and -mmul options.
        (rl78_alloc_physical_registers): Add code to handle divhi and
        divsi valloc attributes.
        (set_origin): Likewise.
        * config/rl78/rl78.h (RL78_MUL_G14): Define.
        (TARGET_G10, TARGET_G13, TARGET_G14): Define.
        (TARGET_CPU_CPP_BUILTINS): Define __RL78_MUL_xxx__ and
        __RL78_Gxx__.
        (ASM_SPEC): Pass -mcpu on to assembler.
        * config/rl78/rl78.md (mulqi3): Add a clobber of AX.
        (mulqi3_rl78): Likewise.
        (mulhi3_g13): Likewise.
        (mulhi3): Generate the G13 or G14 versions of the insn directly.
        (mulsi3): Likewise.
        (mulhi3_g14): Add clobbers of AX and BC.
        (mulsi3_g14): Likewise.
        (mulsi3_g13): Likewise.
        (udivmodhi4, udivmodhi4_g14, udivmodsi4): New patterns.
        (udivmodsi4_g14, udivmodsi4_g13): New patterns.
        * config/rl78/rl78.opt (mmul): Initialise value to
        RL78_MUL_UNINIT.
        (mcpu): New option.
        (m13, m14, mrl78): New option aliases.
        * config/rl78/t-rl78 (MULTILIB_OPTIONS): Add mg13 and mg14.
        (MULTILIB_DIRNAMES): Add g13 and g14.
        * doc/invoke.texi: Document -mcpu and -mmul options.

libgcc/ChangeLog
2015-04-15  Nick Clifton  <ni...@redhat.com>

        * config/rl78/divmodhi.S: Add G14 and G13 versions of the __divhi3
        and __modhi3 functions.
        * config/rl78/divmodso.S: Add G14 and G13 versions of the
        __divsi3, __udivsi3, __modsi3 and __umodsi3 functions.

Index: gcc/config/rl78/rl78-opts.h
===================================================================
--- gcc/config/rl78/rl78-opts.h	(revision 222124)
+++ gcc/config/rl78/rl78-opts.h	(working copy)
@@ -24,7 +24,17 @@
 {
   MUL_NONE,
   MUL_RL78,
-  MUL_G13
+  MUL_G13,
+  MUL_G14,
+  MUL_UNINIT
 };
 
+enum rl78_cpu_types
+{
+  CPU_G10,
+  CPU_G13,
+  CPU_G14,
+  CPU_UNINIT
+};
+
 #endif
Index: gcc/config/rl78/rl78-virt.md
===================================================================
--- gcc/config/rl78/rl78-virt.md	(revision 222124)
+++ gcc/config/rl78/rl78-virt.md	(working copy)
@@ -28,7 +28,7 @@
 ;; instruction - op1 is of the form "a = op(b)", op2 is "a = b op c"
 ;; etc.
 
-(define_attr "valloc" "op1,op2,ro1,cmp,umul,macax"
+(define_attr "valloc" "op1,op2,ro1,cmp,umul,macax,divhi,divsi"
   (const_string "op2"))
 
 ;;---------- Moving ------------------------
@@ -113,7 +113,7 @@
 )
 
 (define_insn "*umulhi3_shift_virt"
-  [(set (match_operand:HI 0 "register_operand" "=vm")
+  [(set (match_operand:HI          0 "register_operand" "=v")
         (mult:HI (match_operand:HI 1 "rl78_nonfar_operand" "%vim")
                  (match_operand:HI 2 "rl78_24_operand" "Ni")))]
   "rl78_virt_insns_ok () && !TARGET_G10"
@@ -122,7 +122,7 @@
 )
 
 (define_insn "*umulqihi3_virt"
-  [(set (match_operand:HI 0 "register_operand" "=vm")
+  [(set (match_operand:HI                          0 "register_operand" "=v")
         (mult:HI (zero_extend:HI (match_operand:QI 1 "rl78_nonfar_operand" "%vim"))
                  (zero_extend:HI (match_operand:QI 2 "general_operand" "vim"))))]
   "rl78_virt_insns_ok () && !TARGET_G10"
Index: gcc/config/rl78/rl78.c
===================================================================
--- gcc/config/rl78/rl78.c	(revision 222124)
+++ gcc/config/rl78/rl78.c	(working copy)
@@ -377,6 +377,48 @@
       && strcmp (lang_hooks.name, "GNU GIMPLE"))
     /* Address spaces are currently only supported by C.  */
     error ("-mes0 can only be used with C");
+
+  switch (rl78_cpu_type)
+    {
+    case CPU_UNINIT:
+      rl78_cpu_type = CPU_G14;
+      if (rl78_mul_type == MUL_UNINIT)
+	rl78_mul_type = MUL_NONE;
+      break;
+
+    case CPU_G10:
+      switch (rl78_mul_type)
+	{
+	case MUL_UNINIT: rl78_mul_type = MUL_NONE; break;
+	case MUL_NONE:   break;
+	case MUL_G13:  	 error ("-mmul=g13 cannot be used with -mcpu=g10"); break;
+	case MUL_G14:  	 error ("-mmul=g14 cannot be used with -mcpu=g10"); break;
+	}
+      break;
+
+    case CPU_G13:
+      switch (rl78_mul_type)
+	{
+	case MUL_UNINIT: rl78_mul_type = MUL_G13; break;
+	case MUL_NONE:   break;
+	case MUL_G13:  	break;
+	  /* The S2 core does not have mul/div instructions.  */
+	case MUL_G14: 	error ("-mmul=g14 cannot be used with -mcpu=g13"); break;
+	}
+      break;
+
+    case CPU_G14:
+      switch (rl78_mul_type)
+	{
+	case MUL_UNINIT: rl78_mul_type = MUL_G14; break;
+	case MUL_NONE:   break;
+	case MUL_G14:  	break;
+	/* The G14 core does not have the hardware multiply peripheral used by the
+	   G13 core, hence you cannot use G13 multipliy routines on G14 hardware.  */
+	case MUL_G13: 	error ("-mmul=g13 cannot be used with -mcpu=g14"); break;
+	}
+      break;
+    }
 }
 
 /* Most registers are 8 bits.  Some are 16 bits because, for example,
@@ -3514,6 +3556,18 @@
 	  record_content (BC, NULL_RTX);
 	  record_content (DE, NULL_RTX);
 	}
+      else if (valloc_method == VALLOC_DIVHI)
+	{
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	}
+      else if (valloc_method == VALLOC_DIVSI)
+	{
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
+	  record_content (HL, NULL_RTX);
+	}
 
       if (insn_ok_now (insn))
 	continue;
@@ -3541,6 +3595,7 @@
 	  break;
 	case VALLOC_UMUL:
 	  rl78_alloc_physical_registers_umul (insn);
+	  record_content (AX, NULL_RTX);
 	  break;
 	case VALLOC_MACAX:
 	  /* Macro that clobbers AX.  */
@@ -3549,6 +3604,18 @@
 	  record_content (BC, NULL_RTX);
 	  record_content (DE, NULL_RTX);
 	  break;
+	case VALLOC_DIVSI:
+	  rl78_alloc_address_registers_div (insn);
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
+	  record_content (HL, NULL_RTX);
+	  break;
+	case VALLOC_DIVHI:
+	  rl78_alloc_address_registers_div (insn);
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -3863,7 +3930,38 @@
 	    age[i] = 0;
 	  }
     }
+  else if (get_attr_valloc (insn) == VALLOC_DIVHI)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Resetting origin of AX/DE for DIVHI pattern.\n");
 
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (i == A_REG
+	    || i == X_REG
+	    || i == D_REG
+	    || i == E_REG
+	    || origins[i] == A_REG
+	    || origins[i] == X_REG
+	    || origins[i] == D_REG
+	    || origins[i] == E_REG)
+	  {
+	    origins[i] = i;
+	    age[i] = 0;
+	  }
+    }
+  else if (get_attr_valloc (insn) == VALLOC_DIVSI)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Resetting origin of AX/BC/DE/HL for DIVSI pattern.\n");
+
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (i <= 7 || origins[i] <= 7)
+	  {
+	    origins[i] = i;
+	    age[i] = 0;
+	  }
+    }
+
   if (GET_CODE (src) == ASHIFT
       || GET_CODE (src) == ASHIFTRT
       || GET_CODE (src) == LSHIFTRT)
@@ -4087,7 +4185,7 @@
       switch (code)
 	{
 	case MULT:
-	  if (RL78_MUL_RL78)
+	  if (RL78_MUL_G14)
 	    *total = COSTS_N_INSNS (14);
 	  else if (RL78_MUL_G13)
 	    *total = COSTS_N_INSNS (29);
Index: gcc/config/rl78/rl78.h
===================================================================
--- gcc/config/rl78/rl78.h	(revision 222124)
+++ gcc/config/rl78/rl78.h	(working copy)
@@ -20,20 +20,32 @@
 
 
 #define RL78_MUL_NONE	(rl78_mul_type == MUL_NONE)
-#define RL78_MUL_RL78	(rl78_mul_type == MUL_RL78)
 #define RL78_MUL_G13	(rl78_mul_type == MUL_G13)
+#define RL78_MUL_G14	(rl78_mul_type == MUL_G14)
 
+#define TARGET_G10	(rl78_cpu_type == CPU_G10)
+#define TARGET_G13	(rl78_cpu_type == CPU_G13)
+#define TARGET_G14	(rl78_cpu_type == CPU_G14)
+
 #define TARGET_CPU_CPP_BUILTINS()               \
   do                                            \
     {                                           \
       builtin_define ("__RL78__"); 		\
       builtin_assert ("cpu=RL78"); 		\
-      if (RL78_MUL_RL78)			\
-	builtin_define ("__RL78_MUL_RL78__"); 	\
-      if (RL78_MUL_G13)				\
+      						\
+      if (RL78_MUL_NONE)			\
+	builtin_define ("__RL78_MUL_NONE__"); 	\
+      else if (RL78_MUL_G13)			\
 	builtin_define ("__RL78_MUL_G13__"); 	\
+      else if (RL78_MUL_G14)			\
+	builtin_define ("__RL78_MUL_G14__"); 	\
+      						\
       if (TARGET_G10)				\
 	builtin_define ("__RL78_G10__"); 	\
+      else if (TARGET_G13)			\
+	builtin_define ("__RL78_G13__"); 	\
+      else if (TARGET_G14)			\
+	builtin_define ("__RL78_G14__"); 	\
     }                                           \
   while (0)
 
@@ -46,7 +58,14 @@
 #undef  ASM_SPEC
 #define ASM_SPEC "\
 %{mrelax:-relax} \
-%{mg10} \
+%{mg10:--mg10} \
+%{mg13:--mg13} \
+%{mg14:--mg14} \
+%{mrl78:--mg14} \
+%{mcpu=g10:--mg10} \
+%{mcpu=g13:--mg13} \
+%{mcpu=g14:--mg14} \
+%{mcpu=rl78:--mg14} \
 "
 
 #undef  LINK_SPEC
Index: gcc/config/rl78/rl78.md
===================================================================
--- gcc/config/rl78/rl78.md	(revision 222124)
+++ gcc/config/rl78/rl78.md	(working copy)
@@ -288,10 +288,13 @@
 )
 
 (define_expand "mulqi3"
-  [(set (match_operand:QI          0 "register_operand")
-	(mult:QI  (match_operand:QI 1 "general_operand")
-		  (match_operand:QI 2 "nonmemory_operand")))
-   ]
+  [(parallel
+    [(set (match_operand:QI            0 "register_operand")
+	   (mult:QI  (match_operand:QI 1 "general_operand")
+		     (match_operand:QI 2 "nonmemory_operand")))
+      (clobber (reg:HI AX_REG))
+    ])
+  ]
   "" ; mulu supported by all targets
   ""
 )
@@ -302,7 +305,13 @@
 		 (match_operand:HI 2 "nonmemory_operand")))
    ]
   "! RL78_MUL_NONE"
-  ""
+  {
+    if (RL78_MUL_G14)
+      emit_insn (gen_mulhi3_g14 (operands[0], operands[1], operands[2]));
+    else /* RL78_MUL_G13 */
+      emit_insn (gen_mulhi3_g13 (operands[0], operands[1], operands[2]));
+    DONE;
+  }
 )
 
 (define_expand "mulsi3"
@@ -311,7 +320,13 @@
 		 (match_operand:SI 2 "nonmemory_operand")))
    ]
   "! RL78_MUL_NONE"
-  ""
+  {
+    if (RL78_MUL_G14)
+      emit_insn (gen_mulsi3_g14 (operands[0], operands[1], operands[2]));
+    else /* RL78_MUL_G13 */
+      emit_insn (gen_mulsi3_g13 (operands[0], operands[1], operands[2]));
+    DONE;
+  }
 )
 
 (define_insn "*mulqi3_rl78"
@@ -318,7 +333,8 @@
   [(set (match_operand:QI          0 "register_operand" "=&v")
 	(mult:QI (match_operand:QI 1 "general_operand" "viU")
 		 (match_operand:QI 2 "general_operand" "vi")))
-   ]
+   (clobber (reg:HI AX_REG))
+  ]
   "" ; mulu supported by all targets
   "; mulqi macro %0 = %1 * %2
 	mov    a, %h1
@@ -328,31 +344,34 @@
 	mov    a, x
 	mov    %h0, a
 	; end of mulqi macro"
-;;  [(set_attr "valloc" "macax")]
+  [(set_attr "valloc" "macax")]
 )
 
-(define_insn "*mulhi3_rl78"
+(define_insn "mulhi3_g14"
   [(set (match_operand:HI          0 "register_operand" "=&v")
 	(mult:HI (match_operand:HI 1 "general_operand" "viU")
 		 (match_operand:HI 2 "general_operand" "vi")))
-   ]
-  "RL78_MUL_RL78"
-  "; mulhi macro %0 = %1 * %2
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+  ]
+  "RL78_MUL_G14"
+  "; G14 mulhi macro %0 = %1 * %2
 	movw    ax, %h1
 	movw    bc, %h2
 	mulhu   ; bcax = bc * ax
 	movw    %h0, ax
 	; end of mulhi macro"
-;;  [(set_attr "valloc" "macax")]
+  [(set_attr "valloc" "macax")]
 )
 
-(define_insn "*mulhi3_g13"
+(define_insn "mulhi3_g13"
   [(set (match_operand:HI          0 "register_operand" "=&v")
 	(mult:HI (match_operand:HI 1 "general_operand" "viU")
 		 (match_operand:HI 2 "general_operand" "vi")))
-   ]
+   (clobber (reg:HI AX_REG))
+  ]
   "RL78_MUL_G13"
-  "; mulhi macro %0 = %1 * %2
+  "; G13 mulhi macro %0 = %1 * %2
 	mov     a, #0x00
 	mov     !0xf00e8, a     ; MDUC
 	movw    ax, %h1
@@ -363,19 +382,21 @@
 	movw    ax, 0xffff6     ; MDBL
 	movw    %h0, ax
         ; end of mulhi macro"
-;;  [(set_attr "valloc" "umul")]
+  [(set_attr "valloc" "macax")]
 )
 
 ;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
 ;; because we're only using the lower 16 bits (which is the upper 16
 ;; bits of the result).
-(define_insn "mulsi3_rl78"
+(define_insn "mulsi3_g14"
   [(set (match_operand:SI          0 "register_operand" "=&v")
 	(mult:SI (match_operand:SI 1 "general_operand" "viU")
 		 (match_operand:SI 2 "general_operand" "vi")))
-   ]
-  "RL78_MUL_RL78"
-  "; mulsi macro %0 = %1 * %2
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+  ]
+  "RL78_MUL_G14"
+  "; G14 mulsi macro %0 = %1 * %2
 	movw	ax, %h1
 	movw	bc, %h2
 	MULHU	; bcax = bc * ax
@@ -403,9 +424,11 @@
   [(set (match_operand:SI          0 "register_operand" "=&v")
 	(mult:SI (match_operand:SI 1 "general_operand" "viU")
 		 (match_operand:SI 2 "general_operand" "viU")))
-   ]
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+  ]
   "RL78_MUL_G13"
-  "; mulsi macro %0 = %1 * %2
+  "; G13 mulsi macro %0 = %1 * %2
 	mov	a, #0x00
 	mov	!0xf00e8, a	; MDUC
 	movw	ax, %h1
@@ -441,3 +464,236 @@
 	; end of mulsi macro"
   [(set_attr "valloc" "macax")]
 )
+
+(define_expand "udivmodhi4"
+  [(parallel
+    [(set (match_operand:HI          0 "register_operand")
+          (udiv:HI (match_operand:HI 1 "register_operand")
+                   (match_operand:HI 2 "register_operand")))
+     (set (match_operand:HI          3 "register_operand")
+          (umod:HI (match_dup 1) (match_dup 2)))
+     (clobber (reg:HI AX_REG))
+     (clobber (reg:HI DE_REG))
+    ])
+   ]
+  "RL78_MUL_G14"
+  ""
+)
+
+(define_insn "*udivmodhi4_g14"
+  [(set (match_operand:HI          0 "register_operand" "=v")
+	(udiv:HI (match_operand:HI 1 "register_operand" "v")
+		 (match_operand:HI 2 "register_operand" "v")))
+   (set (match_operand:HI          3 "register_operand" "=v")
+	(umod:HI (match_dup 1) (match_dup 2)))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI DE_REG))
+  ]
+  "RL78_MUL_G14"
+  {
+    if (find_reg_note (insn, REG_UNUSED, operands[3]))
+      return "; G14 udivhi macro %0 = %1 / %2 \n\
+	movw    ax, %h1 \n\
+	movw    de, %h2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divhu   	; ax = ax / de \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw    %h0, ax \n\
+	; end of udivhi macro";
+    else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+      return "; G14 umodhi macro %3 = %1 %% %2 \n\
+	movw    ax, %h1 \n\
+	movw    de, %h2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divhu   	; de = ax %% de \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw	ax, de \n\
+	movw    %h3, ax \n\
+	; end of umodhi macro";
+    else
+      return "; G14 udivmodhi macro %0 = %1 / %2 and %3 = %1 %% %2 \n\
+	movw    ax, %h1 \n\
+	movw    de, %h2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divhu   	; ax = ax / de, de = ax %% de \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw    %h0, ax \n\
+	movw	ax, de \n\
+	movw    %h3, ax \n\
+	; end of udivmodhi macro";
+  }
+  [(set_attr "valloc" "divhi")]
+)
+
+(define_expand "udivmodsi4"
+  [(parallel
+    [(set (match_operand:SI          0 "register_operand")
+          (udiv:SI (match_operand:SI 1 "register_operand")
+                   (match_operand:SI 2 "register_operand")))
+     (set (match_operand:SI          3 "register_operand")
+          (umod:SI (match_dup 1) (match_dup 2)))
+    ])
+   ]
+  "! RL78_MUL_NONE && ! optimize_size"
+  {
+    if (RL78_MUL_G14)
+      emit_insn (gen_udivmodsi4_g14 (operands[0], operands[1], operands[2], operands[3]));
+    else /* RL78_MUL_G13 */
+      emit_insn (gen_udivmodsi4_g13 (operands[0], operands[1], operands[2], operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "udivmodsi4_g14"
+  [(set (match_operand:SI          0 "register_operand" "=v")
+	(udiv:SI (match_operand:SI 1 "register_operand" "v")
+		 (match_operand:SI 2 "register_operand" "v")))
+   (set (match_operand:SI          3 "register_operand" "=v")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   (clobber (reg:HI DE_REG))
+   (clobber (reg:HI HL_REG))
+  ]
+  "RL78_MUL_G14"
+  {
+    if (find_reg_note (insn, REG_UNUSED, operands[3]))
+      return "; G14 udivsi macro %0 = %1 / %2 \n\
+	movw    ax, %h1 \n\
+	movw    bc, %H1 \n\
+	movw    de, %h2 \n\
+	movw    hl, %H2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divwu   	; bcax = bcax / hlde \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw    %h0, ax \n\
+	movw	ax, bc \n\
+	movw    %H0, ax \n\
+	; end of udivsi macro";
+    else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+      return "; G14 umodsi macro %3 = %1 %% %2 \n\
+	movw    ax, %h1 \n\
+	movw    bc, %H1 \n\
+	movw    de, %h2 \n\
+	movw    hl, %H2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divwu   	; hlde = bcax %% hlde \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw	ax, de \n\
+	movw    %h3, ax \n\
+	movw	ax, hl \n\
+	movw    %H3, ax \n\
+	; end of umodsi macro";
+    else
+      return "; G14 udivmodsi macro %0 = %1 / %2 and %3 = %1 %% %2 \n\
+	movw    ax, %h1 \n\
+	movw    bc, %H1 \n\
+	movw    de, %h2 \n\
+	movw    hl, %H2 \n\
+	push	psw	; Save the current interrupt status \n\
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E \n\
+	divwu   	; bcax = bcax / hlde, hlde = bcax %% hlde \n\
+	pop	psw	; Restore saved interrupt status \n\
+	movw    %h0, ax \n\
+	movw	ax, bc \n\
+	movw    %H0, ax \n\
+	movw	ax, de \n\
+	movw    %h3, ax \n\
+	movw	ax, hl \n\
+	movw    %H3, ax \n\
+	; end of udivmodsi macro";
+  }
+  [(set_attr "valloc" "divsi")]
+)
+
+;; Warning: these values match the silicon not the documentation.
+;; 0xFFFF0 is MDAL.  0xFFFF2 is MDAH.
+;; 0xFFFF6 is MDBL.  0xFFFF4 is MDBH.
+;; 0xF00E0 is MDCL.  0xF00E2 is MDCH.
+;; 0xF00E8 is MDUC.
+
+(define_insn "udivmodsi4_g13"
+  [(set (match_operand:SI          0 "register_operand" "=v")
+	(udiv:SI (match_operand:SI 1 "register_operand" "v")
+		 (match_operand:SI 2 "register_operand" "v")))
+   (set (match_operand:SI          3 "register_operand" "=v")
+	(umod:SI (match_dup 1) (match_dup 2)))
+   (clobber (reg:HI AX_REG))
+  ]
+  "RL78_MUL_G13"
+  {
+    if (find_reg_note (insn, REG_UNUSED, operands[3]))
+      return "; G13 udivsi macro %0 = %1 / %2 \n\
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1 \n\
+	mov	!0xf00e8, a	; This preps the peripheral for division without interrupt generation \n\
+	movw	ax, %H1		\n\
+	movw	0xffff2, ax	; MDAH \n\
+	movw	ax, %h1		\n\
+	movw	0xffff0, ax	; MDAL \n\
+	movw	ax, %H2		\n\
+	movw	0xffff4, ax	; MDBH \n\
+	movw	ax, %h2		\n\
+	movw	0xffff6, ax	; MDBL \n\
+	mov	a, #0xC1	; Set the DIVST bit in MDUC \n\
+	mov	!0xf00e8, a	; This starts the division op \n\
+1:	mov	a, !0xf00e8	; Wait 16 clocks or until DIVST is clear \n\
+	bt	a.0, $1b	\n\
+	movw    ax, 0xffff0	; Read the quotient \n\
+	movw	%h0, ax		\n\
+	movw    ax, 0xffff2	\n\
+	movw	%H0, ax		\n\
+	; end of udivsi macro";
+    else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+      return "; G13 umodsi macro %3 = %1 %% %2 \n\
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1 \n\
+	mov	!0xf00e8, a	; This preps the peripheral for division without interrupt generation \n\
+	movw	ax, %H1		\n\
+	movw	0xffff2, ax	; MDAH \n\
+	movw	ax, %h1		\n\
+	movw	0xffff0, ax	; MDAL \n\
+	movw	ax, %H2		\n\
+	movw	0xffff4, ax	; MDBH \n\
+	movw	ax, %h2		\n\
+	movw	0xffff6, ax	; MDBL \n\
+	mov	a, #0xC1	; Set the DIVST bit in MDUC \n\
+	mov	!0xf00e8, a	; This starts the division op \n\
+1:	mov	a, !0xf00e8	; Wait 16 clocks or until DIVST is clear \n\
+	bt	a.0, $1b	\n\
+  	movw	ax, !0xf00e0	; Read the remainder \n\
+	movw	%h3, ax		\n\
+	movw	ax, !0xf00e2	\n\
+	movw	%H3, ax		\n\
+	; end of umodsi macro";
+    else
+      return "; G13 udivmodsi macro %0 = %1 / %2 and %3 = %1 %% %2 \n\
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1 \n\
+	mov	!0xf00e8, a	; This preps the peripheral for division without interrupt generation \n\
+	movw	ax, %H1		\n\
+	movw	0xffff2, ax	; MDAH \n\
+	movw	ax, %h1		\n\
+	movw	0xffff0, ax	; MDAL \n\
+	movw	ax, %H2		\n\
+	movw	0xffff4, ax	; MDBH \n\
+	movw	ax, %h2		\n\
+	movw	0xffff6, ax	; MDBL \n\
+	mov	a, #0xC1	; Set the DIVST bit in MDUC \n\
+	mov	!0xf00e8, a	; This starts the division op \n\
+1:	mov	a, !0xf00e8	; Wait 16 clocks or until DIVST is clear \n\
+	bt	a.0, $1b	\n\
+	movw    ax, 0xffff0	; Read the quotient \n\
+	movw	%h0, ax		\n\
+	movw    ax, 0xffff2	\n\
+	movw	%H0, ax		\n\
+  	movw	ax, !0xf00e0	; Read the remainder \n\
+	movw	%h3, ax		\n\
+	movw	ax, !0xf00e2	\n\
+	movw	%H3, ax		\n\
+	; end of udivmodsi macro";
+      }
+  [(set_attr "valloc" "macax")]
+)
Index: gcc/config/rl78/rl78.opt
===================================================================
--- gcc/config/rl78/rl78.opt	(revision 222124)
+++ gcc/config/rl78/rl78.opt	(working copy)
@@ -27,21 +27,24 @@
 Use the simulator runtime.
 
 mmul=
-Target RejectNegative Joined Var(rl78_mul_type) Report Tolower Enum(rl78_mul_types) Init(MUL_NONE)
-Select hardware or software multiplication support.
+Target RejectNegative Joined Var(rl78_mul_type) Report Tolower Enum(rl78_mul_types) Init(MUL_UNINIT)
+Selects the type of hardware multiplication and division to use (none/g13/g14).
 
 Enum
 Name(rl78_mul_types) Type(enum rl78_mul_types)
 
 EnumValue
-Enum(rl78_mul_types) String(none) Value(MUL_NONE)
+Enum(rl78_mul_types) String(g10) Value(MUL_NONE)
 
 EnumValue
-Enum(rl78_mul_types) String(rl78) Value(MUL_RL78)
+Enum(rl78_mul_types) String(g13) Value(MUL_G13)
 
 EnumValue
-Enum(rl78_mul_types) String(g13) Value(MUL_G13)
+Enum(rl78_mul_types) String(g14) Value(MUL_G14)
 
+EnumValue
+Enum(rl78_mul_types) String(rl78) Value(MUL_G14)
+
 mallregs
 Target Mask(ALLREGS) Report Optimization
 Use all registers, reserving none for interrupt handlers.
@@ -50,10 +53,41 @@
 Target Report Optimization
 Enable assembler and linker relaxation.  Enabled by default at -Os.
 
+mcpu=
+Target RejectNegative Joined Var(rl78_cpu_type) Report ToLower Enum(rl78_cpu_types) Init(CPU_UNINIT)
+Selects the type of RL78 core being targeted (g10/g13/g14).  The default is the G14.  If set, also selects the hardware multiply support to be used.
+
+Enum
+Name(rl78_cpu_types) Type(enum rl78_cpu_types)
+
+EnumValue
+Enum(rl78_cpu_types) String(g10) Value(CPU_G10)
+
+EnumValue
+Enum(rl78_cpu_types) String(g13) Value(CPU_G13)
+
+EnumValue
+Enum(rl78_cpu_types) String(g14) Value(CPU_G14)
+
+EnumValue
+Enum(rl78_cpu_types) String(rl78) Value(CPU_G14)
+
 mg10
-Target Mask(G10) Report
-Target the RL78/G10 series
+Target RejectNegative Report Alias(mcpu=, g10)
+Alias for -mcpu=g10
 
+mg13
+Target RejectNegative Report Alias(mcpu=, g13)
+Alias for -mcpu=g13
+
+mg14
+Target RejectNegative Report Alias(mcpu=, g14)
+Alias for -mcpu=g14
+
+mrl78
+Target RejectNegative Report Alias(mcpu=, g14)
+Alias for -mcpu=g14
+
 mes0
 Target Mask(ES0)
 Assume ES is zero throughout program execution, use ES: for read-only data.
Index: gcc/config/rl78/t-rl78
===================================================================
--- gcc/config/rl78/t-rl78	(revision 222124)
+++ gcc/config/rl78/t-rl78	(working copy)
@@ -23,5 +23,7 @@
 
 # Enable multilibs:
 
-MULTILIB_OPTIONS    = mg10
-MULTILIB_DIRNAMES   = g10
+MULTILIB_OPTIONS    = mg10/mg13/mg14
+MULTILIB_DIRNAMES   = g10 g13 g14
+
+MULTILIB_MATCHES    = mg10=mcpu?g10 mg13=mcpu?g13 mg14=mcpu?g14 mg14=mcpu?rl78
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 222124)
+++ gcc/doc/invoke.texi	(working copy)
@@ -870,7 +870,8 @@
 See RS/6000 and PowerPC Options.
 
 @emph{RL78 Options}
-@gccoptlist{-msim -mmul=none -mmul=g13 -mmul=rl78 @gol
+@gccoptlist{-msim -mmul=none -mmul=g13 -mmul=g14 -mallregs @gol
+-mcpu=g10 -mcpu=g13 -mcpu=g14 -mg10 -mg13 -mg14 @gol
 -m64bit-doubles -m32bit-doubles}
 
 @emph{RS/6000 and PowerPC Options}
@@ -18730,15 +18730,73 @@
 simulator.
 
 @item -mmul=none
+@itemx -mmul=g10
 @itemx -mmul=g13
+@itemx -mmul=g14
 @itemx -mmul=rl78
 @opindex mmul
-Specifies the type of hardware multiplication support to be used.  The
-default is @samp{none}, which uses software multiplication functions.
-The @samp{g13} option is for the hardware multiply/divide peripheral
-only on the RL78/G13 targets.  The @samp{rl78} option is for the
-standard hardware multiplication defined in the RL78 software manual.
+Specifies the type of hardware multiplication and division support to
+be used.  The simplest is @code{none}, which uses software for both
+multiplication and division.  This is the default.  The @code{g13}
+value is for the hardware multiply/divide peripheral found on the
+RL78/G13 (S2 core) targets.  The @code{g14} value selects the use of
+the multiplication and division instructions supported by the RL78/G14
+(S3 core) parts.  The value @code{rl78} is an alias for @code{g14} and
+the value @code{mg10} is an alias for @code{none}.
 
+In addition a C preprocessor macro is defined, based upon the setting
+of this option.  Possible values are: @code{__RL78_MUL_NONE__},
+@code{__RL78_MUL_G13__} or @code{__RL78_MUL_G14__}.
+
+@item -mcpu=g10
+@itemx -mcpu=g13
+@itemx -mcpu=g14
+@itemx -mcpu=rl78
+@opindex mcpu
+Specifies the RL78 core to target.  The default is the G14 core, also
+known as an S3 core or just RL78.  The G13 or S2 core does not have
+multiply or divide instructions, instead it uses a hardware peripheral
+for these operations.  The G10 or S1 core does not have register
+banks, so it uses a different calling convention.
+
+If this option is set it also selects the type of hardware multiply
+support to use, unless this is overridden by an explicit
+@option{-mmul=none} option on the command line.  Thus specifying
+@option{-mcpu=g13} enables the use of the G13 hardware multiply
+peripheral and specifying @option{-mcpu=g10} disables the use of
+hardware multipications altogether.
+
+Note, although the RL78/G14 core is the default target, specifying
+@option{-mcpu=g14} or @option{-mcpu=rl78} on the command line does
+change the behaviour of the toolchain since it also enables G14
+hardware multiply support.  If these options are not specified on the
+command line then software multiplication routines will be used even
+though the code targets the RL78 core.  This is for backwards
+compatibility with older toolchains which did not have hardware
+multiply and divide support.
+
+In addition a C preprocessor macro is defined, based upon the setting
+of this option.  Possible values are: @code{__RL78_G10__},
+@code{__RL78_G13__} or @code{__RL78_G14__}.
+
+@item -mg10
+@itemx -mg13
+@itemx -mg14
+@itemx -mrl78
+@opindex mg10
+@opindex mg13
+@opindex mg14
+@opindex mrl78
+These are aliases for the corresponding @option{-mcpu=} option.  They
+are provided for backwards compatibility.
+
+@item -mallregs
+@opindex mallregs
+Allow the compiler to use all of the available registers.  By default
+registers @code{r24..r31} are reserved for use in interrupt handlers.
+With this option enabled these registers can be used in ordinary
+functions as well.
+
 @item -m64bit-doubles
 @itemx -m32bit-doubles
 @opindex m64bit-doubles
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 222124)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -25,6 +25,360 @@
 
 #include "vregs.h"
 
+#if defined __RL78_MUL_G14__
+
+START_FUNC ___divhi3
+	;; r8 = 4[sp] / 6[sp]
+
+	;; Test for a negative denumerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	de, ax
+	bc	$__div_neg_den
+
+	;; Test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	bc	$__div_neg_num
+
+	;; Neither are negative - we can use the unsigned divide instruction.
+__div_no_convert:	
+	push	psw
+	di
+	divhu
+	pop	psw
+	
+	movw	r8, ax
+	ret
+
+__div_neg_den:
+	;; Negate the denumerator (which is in DE)
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	
+	;; Test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	;; If it is not negative then we perform the division and then negate the result.
+	bnc	$__div_then_convert
+
+	;; Otherwise we negate the numerator and then go with an unsigned division.
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	br	$__div_no_convert
+
+__div_neg_num:
+	;; Negate the numerator (which is in AX)
+	;; We know that the denumerator is positive.
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	
+__div_then_convert:
+	push	psw
+	di
+	divhu
+	pop	psw
+	
+	;; Negate result and transfer into r8
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	movw	r8, ax
+	ret
+
+END_FUNC ___divhi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___modhi3
+	;; r8 = 4[sp] % 6[sp]
+
+	;; Test for a negative denumerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	de, ax
+	bc	$__mod_neg_den
+
+	;; Test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	bc	$__mod_neg_num
+
+	;; Neither are negative - we can use the unsigned divide instruction.
+__mod_no_convert:	
+	push	psw
+	di
+	divhu
+	pop	psw
+
+	movw	ax, de
+	movw	r8, ax
+	ret
+
+__mod_neg_den:	
+	;; Negate the denumerator (which is in DE)
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	
+	;; Test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	;; If it is not negative then we perform the modulo operation without conversion.
+	bnc	$__mod_no_convert
+
+	;; Otherwise we negate the numerator and then go with an unsigned modulo operation.
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	br	$__mod_then_convert
+
+__mod_neg_num:
+	;; Negate the numerator (which is in AX)
+	;; We know that the denumerator is positive.
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	
+__mod_then_convert:
+	push	psw
+	di
+	divhu
+	pop	psw
+
+	;; Negate result and transfer into r8
+	clrw	  ax
+	subw	  ax, de
+	movw	  r8, ax
+	ret
+
+END_FUNC ___modhi3
+
+;----------------------------------------------------------------------
+
+#elif defined __RL78_MUL_G13__
+
+	;; The G13 S2 core does not have a 16 bit divide peripheral.
+	;; So instead we perform a 32-bit divide and twiddle the inputs
+	;; as necessary.
+
+	;; Hardware registers.  Note - these values match the silicon, not the documentation.
+	MDAL = 0xffff0
+	MDAH = 0xffff2
+	MDBL = 0xffff6
+	MDBH = 0xffff4
+	MDCL = 0xf00e0
+	MDCH = 0xf00e2
+	MDUC = 0xf00e8
+
+.macro _Negate src, dest
+	movw	ax, !\src
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	movw	\dest, ax
+.endm
+	
+;----------------------------------------------------------------------
+	
+START_FUNC ___divhi3
+	;; r8 = 4[sp] / 6[sp] (signed division)
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	clrw	ax     		; Clear the top 16-bits of the divisor and dividend
+	movw	MDBH, ax
+	movw	MDAH, ax
+	
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+6]
+	movw	MDBL, ax
+	mov1	cy, a.7
+	bc	$__div_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	movw	MDAL, ax
+	bc	$__div_neg_num
+
+	;; Neither are negative - we can use the unsigned divide hardware.
+__div_no_convert:	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, MDAL	; Read the result
+	movw	r8, ax
+	ret
+
+__div_neg_den:
+	;; Negate the denumerator (which is in MDBL)
+	_Negate MDBL MDBL
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	movw	MDAL, ax
+	;; If it is not negative then we perform the division and then negate the result.
+	bnc	$__div_then_convert
+
+	;; Otherwise we negate the numerator and then go with a straightforward unsigned division.
+	_Negate MDAL MDAL
+	br	$!__div_no_convert
+
+__div_neg_num:
+	;; Negate the numerator (which is in MDAL)
+	;; We know that the denumerator is positive.
+	_Negate MDAL MDAL
+	
+__div_then_convert:
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+	;; Negate result and transfer into r8
+	_Negate MDAL r8
+	ret
+
+END_FUNC ___divhi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___modhi3
+	;; r8 = 4[sp] % 6[sp] (signed modulus)
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	clrw	ax     		; Clear the top 16-bits of the divisor and dividend
+	movw	MDBH, ax
+	movw	MDAH, ax
+	
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+6]
+	movw	MDBL, ax
+	mov1	cy, a.7
+	bc	$__mod_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	movw	MDAL, ax
+	bc	$__mod_neg_num
+
+	;; Neither are negative - we can use the unsigned divide hardware
+__mod_no_convert:	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDCL	; Read the remainder
+	movw	r8, ax
+	ret
+
+__mod_neg_den:
+	;; Negate the denumerator (which is in MDBL)
+	_Negate MDBL MDBL
+	
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+4]
+	mov1	cy, a.7
+	movw	MDAL, ax
+	;; If it is not negative then we perform the modulo operation without conversion.
+	bnc	$__mod_no_convert
+
+	;; Otherwise we negate the numerator and then go with a modulo followed by negation.
+	_Negate MDAL MDAL
+	br	$!__mod_then_convert
+
+__mod_neg_num:
+	;; Negate the numerator (which is in MDAL)
+	;; We know that the denumerator is positive.
+	_Negate MDAL MDAL
+	
+__mod_then_convert:
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+	_Negate	MDCL r8
+	ret
+
+END_FUNC ___modhi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___udivhi3
+	;; r8 = 4[sp] / 6[sp] (unsigned division)
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	movw	ax, [sp+4]	; Load the divisor
+	movw	MDAL, ax
+	movw	ax, [sp+6]	; Load the dividend
+	movw	MDBL, ax
+	clrw	ax
+	movw	MDAH, ax
+	movw	MDBH, ax
+	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDAL	; Read the remainder
+	movw	r8, ax
+	ret
+
+END_FUNC   ___udivhi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___umodhi3
+	;; r8 = 4[sp] % 6[sp] (unsigned modulus)
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	movw	ax, [sp+4]	; Load the divisor
+	movw	MDAL, ax
+	movw	ax, [sp+6]	; Load the dividend
+	movw	MDBL, ax
+	clrw	ax
+	movw	MDAH, ax
+	movw	MDBH, ax
+	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDCL	; Read the remainder
+	movw	r8, ax
+	ret
+	
+END_FUNC   ___umodhi3
+
+;----------------------------------------------------------------------
+	
+#elif defined __RL78_MUL_NONE__
+	
 .macro MAKE_GENERIC  which,need_result
 
 	.if \need_result
@@ -328,3 +682,11 @@
 mod_skip_restore_den:	
 	ret
 END_FUNC ___modhi3
+
+;----------------------------------------------------------------------
+
+#else
+
+#error "Unknown RL78 hardware multiply/divide support"
+
+#endif
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 222124)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -25,6 +25,537 @@
 
 #include "vregs.h"
 
+#if defined __RL78_MUL_G14__
+
+START_FUNC ___divsi3
+	;; r8,r10 = 4[sp],6[sp] / 8[sp],10[sp]
+
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+8]
+	movw	de, ax
+	movw	ax, [sp+10]
+	mov1	cy, a.7
+	movw	hl, ax
+	bc	$__div_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	bc, ax
+	movw	ax, [sp+4]
+	bc	$__div_neg_num
+
+	;; Neither are negative - we can use the unsigned divide instruction.
+__div_no_convert:	
+	push	psw
+	di
+	divwu
+	pop	psw
+	
+	movw	r8, ax
+	movw	ax, bc
+	movw	r10, ax
+	ret
+
+__div_neg_den:
+	;; Negate the denumerator (which is in HLDE)
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, hl
+	movw	hl, ax
+	
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	bc, ax
+	movw	ax, [sp+4]
+	;; If it is not negative then we perform the division and then negate the result.
+	bnc	$__div_then_convert
+
+	;; Otherwise we negate the numerator and then go with a straightforward unsigned division.
+	;; The negation is complicated because AX, BC, DE and HL are already in use.
+	;;              ax: numL  bc: numH  r8:       r10:
+	xchw	ax, bc			    
+	;;              ax: numH  bc: numL  r8:       r10:
+	movw	r8, ax			    
+	;;              ax:       bc: numL  r8: numH  r10:
+	clrw	ax			    
+	;;              ax:    0  bc: numL  r8: numH  r10:
+	subw	ax, bc			    
+	;;              ax: -numL bc:       r8: numH  r10:
+	movw	r10, ax			    
+	;;              ax:       bc:       r8: numH  r10: -numL
+	movw	ax, r8			    
+	;;              ax: numH  bc:       r8:       r10: -numL
+	movw	bc, ax			    
+	;;              ax:       bc: numH  r8:       r10: -numL
+	clrw	ax			    
+	;;              ax:    0  bc: numH  r8:       r10: -numL
+	sknc				    
+	decw	ax			    
+	;;              ax:    -1 bc: numH  r8:       r10: -numL
+	subw	ax, bc			    
+	;;              ax: -numH bc:       r8:       r10: -numL
+	movw	bc, ax			    
+	;;              ax:       bc: -numH r8:       r10: -numL
+	movw	ax, r10			    
+	;;              ax: -numL bc: -numH r8:       r10:
+	br	$!__div_no_convert
+
+__div_neg_num:
+	;; Negate the numerator (which is in BCAX)
+	;; We know that the denumerator is positive.
+	;; Note - we temporarily overwrite DE.  We know that we can safely load it again off the stack again.
+	movw	de, ax
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, bc
+	movw	bc, ax
+
+	movw	ax, [sp+8]
+	xchw	ax, de
+	
+__div_then_convert:
+	push	psw
+	di
+	divwu
+	pop	psw
+
+	;; Negate result (in BCAX) and transfer into r8,r10
+	movw	de, ax
+	clrw	ax
+	subw	ax, de
+	movw	r8, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, bc
+	movw	r10, ax
+	ret
+
+END_FUNC ___divsi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___udivsi3
+	;; r8,r10 = 4[sp],6[sp] / 8[sp],10[sp]
+	;; Used when compiling with -Os specified.
+
+	movw	ax, [sp+10]
+	movw	hl, ax
+	movw	ax, [sp+8]
+	movw	de, ax
+	movw	ax, [sp+6]
+	movw	bc, ax
+	movw    ax, [sp+4]
+	push	psw	; Save the current interrupt status
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E
+	divwu   	; bcax = bcax / hlde
+	pop	psw	; Restore saved interrupt status
+	movw    r8, ax
+	movw	ax, bc
+	movw    r10, ax
+	ret
+
+END_FUNC ___udivsi3
+
+;----------------------------------------------------------------------
+	
+START_FUNC ___modsi3
+	;; r8,r10 = 4[sp],6[sp] % 8[sp],10[sp]
+
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+8]
+	movw	de, ax
+	movw	ax, [sp+10]
+	mov1	cy, a.7
+	movw	hl, ax
+	bc	$__mod_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	bc, ax
+	movw	ax, [sp+4]
+	bc	$__mod_neg_num
+
+	;; Neither are negative - we can use the unsigned divide instruction.
+__mod_no_convert:	
+	push	psw
+	di
+	divwu
+	pop	psw
+
+	movw	ax, de
+	movw	r8, ax
+	movw	ax, hl
+	movw	r10, ax
+	ret
+
+__mod_neg_den:
+	;; Negate the denumerator (which is in HLDE)
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, hl
+	movw	hl, ax
+	
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	bc, ax
+	movw	ax, [sp+4]
+	;; If it is not negative then we perform the modulo operation without conversion
+	bnc	$__mod_no_convert
+
+	;; Otherwise we negate the numerator and then go with a modulo followed by negation.
+	;; The negation is complicated because AX, BC, DE and HL are already in use.
+	xchw	ax, bc			    
+	movw	r8, ax			    
+	clrw	ax			    
+	subw	ax, bc			    
+	movw	r10, ax			    
+	movw	ax, r8			    
+	movw	bc, ax			    
+	clrw	ax			    
+	sknc				    
+	decw	ax			    
+	subw	ax, bc			    
+	movw	bc, ax			    
+	movw	ax, r10			    
+	br	$!__mod_then_convert
+
+__mod_neg_num:
+	;; Negate the numerator (which is in BCAX)
+	;; We know that the denumerator is positive.
+	;; Note - we temporarily overwrite DE.  We know that we can safely load it again off the stack again.
+	movw	de, ax
+	clrw	ax
+	subw	ax, de
+	movw	de, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, bc
+	movw	bc, ax
+
+	movw	ax, [sp+8]
+	xchw	ax, de
+	
+__mod_then_convert:
+	push	psw
+	di
+	divwu
+	pop	psw
+
+	;; Negate result (in HLDE) and transfer into r8,r10
+	clrw	ax
+	subw	ax, de
+	movw	r8, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, hl
+	movw	r10, ax
+	ret
+
+END_FUNC ___modsi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___umodsi3
+	;; r8,r10 = 4[sp],6[sp] % 8[sp],10[sp]
+	;; Used when compiling with -Os specified.
+
+	movw	ax, [sp+10]
+	movw	hl, ax
+	movw	ax, [sp+8]
+	movw	de, ax
+	movw	ax, [sp+6]
+	movw	bc, ax
+	movw    ax, [sp+4]
+	push	psw	; Save the current interrupt status
+	di		; Disable interrupts. See Renesas Technical update TN-RL*-A025B/E
+	divwu   	; hlde = bcax %% hlde
+	pop	psw	; Restore saved interrupt status
+	movw	ax, de
+	movw    r8, ax
+	movw	ax, hl
+	movw    r10, ax
+	ret
+
+END_FUNC   ___umodsi3
+
+;----------------------------------------------------------------------
+
+#elif defined __RL78_MUL_G13__
+
+;----------------------------------------------------------------------
+
+	;; Hardware registers.  Note - these values match the silicon, not the documentation.
+	MDAL = 0xffff0
+	MDAH = 0xffff2
+	MDBL = 0xffff6
+	MDBH = 0xffff4
+	MDCL = 0xf00e0
+	MDCH = 0xf00e2
+	MDUC = 0xf00e8
+
+.macro _Negate low, high
+	movw	ax, \low
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	movw	\low, ax
+	movw	ax, \high
+	movw	bc, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, bc
+	movw	\high, ax
+.endm
+	
+;----------------------------------------------------------------------
+
+START_FUNC ___divsi3
+	;; r8,r10 = 4[sp],6[sp] / 8[sp],10[sp]
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+8]
+	movw	MDBL, ax
+	movw	ax, [sp+10]
+	mov1	cy, a.7
+	movw	MDBH, ax
+	bc	$__div_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	MDAH, ax
+	movw	ax, [sp+4]
+	movw	MDAL, ax
+	bc	$__div_neg_num
+
+	;; Neither are negative - we can use the unsigned divide hardware.
+__div_no_convert:	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, MDAL	; Read the result
+	movw	r8, ax
+	movw	ax, MDAH
+	movw	r10, ax	
+	ret
+
+__div_neg_den:
+	;; Negate the denumerator (which is in MDBL/MDBH)
+	_Negate MDBL MDBH
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	MDAH, ax
+	movw	ax, [sp+4]
+	movw	MDAL, ax
+	;; If it is not negative then we perform the division and then negate the result.
+	bnc	$__div_then_convert
+
+	;; Otherwise we negate the numerator and then go with a straightforward unsigned division.
+	_Negate MDAL MDAH
+	br	$!__div_no_convert
+
+__div_neg_num:
+	;; Negate the numerator (which is in MDAL/MDAH)
+	;; We know that the denumerator is positive.
+	_Negate MDAL MDAH
+	
+__div_then_convert:
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+	;; Negate result and transfer into r8,r10
+	_Negate MDAL MDAH    	; FIXME: This could be coded more efficiently.
+	movw	r10, ax
+	movw	ax, MDAL
+	movw	r8, ax
+
+	ret
+
+END_FUNC ___divsi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___modsi3
+	;; r8,r10 = 4[sp],6[sp] % 8[sp],10[sp]
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	;; Load and test for a negative denumerator.
+	movw	ax, [sp+8]
+	movw	MDBL, ax
+	movw	ax, [sp+10]
+	mov1	cy, a.7
+	movw	MDBH, ax
+	bc	$__mod_neg_den
+
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	MDAH, ax
+	movw	ax, [sp+4]
+	movw	MDAL, ax
+	bc	$__mod_neg_num
+
+	;; Neither are negative - we can use the unsigned divide hardware
+__mod_no_convert:	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDCL	; Read the remainder
+	movw	r8, ax
+	movw	ax, !MDCH
+	movw	r10, ax	
+	ret
+
+__mod_neg_den:
+	;; Negate the denumerator (which is in MDBL/MDBH)
+	_Negate MDBL MDBH
+	
+	;; Load and test for a negative numerator.
+	movw	ax, [sp+6]
+	mov1	cy, a.7
+	movw	MDAH, ax
+	movw	ax, [sp+4]
+	movw	MDAL, ax
+	;; If it is not negative then we perform the modulo operation without conversion
+	bnc	$__mod_no_convert
+
+	;; Otherwise we negate the numerator and then go with a modulo followed by negation.
+	_Negate MDAL MDAH
+	br	$!__mod_then_convert
+
+__mod_neg_num:
+	;; Negate the numerator (which is in MDAL/MDAH)
+	;; We know that the denumerator is positive.
+	_Negate MDAL MDAH
+	
+__mod_then_convert:
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+	movw	ax, !MDCL
+	movw	bc, ax
+	clrw	ax
+	subw	ax, bc
+	movw	r8, ax
+	movw	ax, !MDCH
+	movw	bc, ax
+	clrw	ax
+	sknc
+	decw	ax
+	subw	ax, bc
+	movw	r10, ax
+	ret
+
+END_FUNC ___modsi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___udivsi3
+	;; r8,r10 = 4[sp],6[sp] / 8[sp],10[sp]
+	;; Used when compilng with -Os specified.
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	movw	ax, [sp+4]	; Load the divisor
+	movw	MDAL, ax
+	movw	ax, [sp+6]
+	movw	MDAH, ax
+	movw	ax, [sp+8]	; Load the dividend
+	movw	MDBL, ax
+	movw    ax, [sp+10]
+	movw	MDBH, ax
+	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDAL	; Read the result
+	movw	r8, ax
+	movw	ax, !MDAH
+	movw	r10, ax	
+	ret
+	
+END_FUNC   ___udivsi3
+
+;----------------------------------------------------------------------
+
+START_FUNC ___umodsi3
+	;; r8,r10 = 4[sp],6[sp] % 8[sp],10[sp]
+	;; Used when compilng with -Os specified.
+	;; Note - hardware address match the silicon, not the documentation
+
+	mov	a, #0xC0	; Set DIVMODE=1 and MACMODE=1
+	mov	!MDUC, a	; This preps the peripheral for division without interrupt generation
+
+	movw	ax, [sp+4]	; Load the divisor
+	movw	MDAL, ax
+	movw	ax, [sp+6]
+	movw	MDAH, ax
+	movw	ax, [sp+8]	; Load the dividend
+	movw	MDBL, ax
+	movw    ax, [sp+10]
+	movw	MDBH, ax
+	
+	mov	a, #0xC1	; Set the DIVST bit in MDUC
+	mov	!MDUC, a	; This starts the division op
+
+1:	mov	a, !MDUC	; Wait 16 clocks or until DIVST is clear
+	bt	a.0, $1b
+
+  	movw	ax, !MDCL	; Read the remainder
+	movw	r8, ax
+	movw	ax, !MDCH
+	movw	r10, ax	
+	ret
+	
+END_FUNC   ___umodsi3
+
+;----------------------------------------------------------------------
+
+#elif defined __RL78_MUL_NONE__
+
 .macro MAKE_GENERIC  which,need_result
 
 	.if \need_result
@@ -67,6 +598,8 @@
 	bitB2 = bit+2
 	bitB3 = bit+3
 
+;----------------------------------------------------------------------
+
 START_FUNC __generic_sidivmod\which
 
 num_lt_den\which:
@@ -533,3 +1066,11 @@
  .endif	
 	ret
 END_FUNC ___modsi3
+
+;----------------------------------------------------------------------
+
+#else
+
+#error "Unknown RL78 hardware multiply/divide support"
+
+#endif

RFA: RL78: Add support for G13 and G14 multiply and divide

Reply via email to