pratlucas created this revision.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.
pratlucas added reviewers: chill, rjmccall, ostannard.
pratlucas added a parent revision: D75169: [ARM] Enforcing calling convention 
for half-precision FP arguments and returns for big-endian AArch32.

As half-precision floating point arguments and returns were previously
coerced to either float or int32 by clang's codegen, the CMSE handling
of those was also performed in clang's side by zeroing the unused MSBs
of the coercer values.

This patch moves this handling to the backend's calling convention
lowering, making sure the high bits of the registers used by
half-precision arguments and returns are zeroed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81428

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGen/cmse-clear-fp16.c
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll

Index: llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
===================================================================
--- llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
+++ llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
@@ -809,3 +809,379 @@
   ret void
 }
 
+define arm_aapcs_vfpcc half @h1(half (half)* nocapture %hptr) #10 {
+; CHECK-8M-LABEL: h1:
+; CHECK-8M:       @ %bb.0:
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    vldr s0, .LCPI11_0
+; CHECK-8M-NEXT:    blx r0
+; CHECK-8M-NEXT:    vmov r0, s0
+; CHECK-8M-NEXT:    uxth r0, r0
+; CHECK-8M-NEXT:    vmov s0, r0
+; CHECK-8M-NEXT:    pop.w {r7, lr}
+; CHECK-8M-NEXT:    mrs r12, control
+; CHECK-8M-NEXT:    tst.w r12, #8
+; CHECK-8M-NEXT:    beq .LBB11_2
+; CHECK-8M-NEXT:  @ %bb.1:
+; CHECK-8M-NEXT:    vmrs r12, fpscr
+; CHECK-8M-NEXT:    vmov s1, lr
+; CHECK-8M-NEXT:    vmov d1, lr, lr
+; CHECK-8M-NEXT:    vmov d2, lr, lr
+; CHECK-8M-NEXT:    vmov d3, lr, lr
+; CHECK-8M-NEXT:    vmov d4, lr, lr
+; CHECK-8M-NEXT:    vmov d5, lr, lr
+; CHECK-8M-NEXT:    vmov d6, lr, lr
+; CHECK-8M-NEXT:    vmov d7, lr, lr
+; CHECK-8M-NEXT:    bic r12, r12, #159
+; CHECK-8M-NEXT:    bic r12, r12, #4026531840
+; CHECK-8M-NEXT:    vmsr fpscr, r12
+; CHECK-8M-NEXT:  .LBB11_2:
+; CHECK-8M-NEXT:    mov r0, lr
+; CHECK-8M-NEXT:    mov r1, lr
+; CHECK-8M-NEXT:    mov r2, lr
+; CHECK-8M-NEXT:    mov r3, lr
+; CHECK-8M-NEXT:    mov r12, lr
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, lr
+; CHECK-8M-NEXT:    bxns lr
+; CHECK-8M-NEXT:    .p2align 2
+; CHECK-8M-NEXT:  @ %bb.3:
+; CHECK-8M-NEXT:  .LCPI11_0:
+; CHECK-8M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-81M-LABEL: h1:
+; CHECK-81M:       @ %bb.0:
+; CHECK-81M-NEXT:    vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    sub sp, #4
+; CHECK-81M-NEXT:    vldr s0, .LCPI11_0
+; CHECK-81M-NEXT:    blx r0
+; CHECK-81M-NEXT:    vmov r0, s0
+; CHECK-81M-NEXT:    uxth r0, r0
+; CHECK-81M-NEXT:    vmov s0, r0
+; CHECK-81M-NEXT:    add sp, #4
+; CHECK-81M-NEXT:    pop.w {r7, lr}
+; CHECK-81M-NEXT:    vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT:    vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT:    clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT:    bxns lr
+; CHECK-81M-NEXT:    .p2align 2
+; CHECK-81M-NEXT:  @ %bb.1:
+; CHECK-81M-NEXT:  .LCPI11_0:
+; CHECK-81M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+  %call = call arm_aapcs_vfpcc half %hptr(half 10.0) #11
+  ret half %call
+}
+
+attributes #10 = { "cmse_nonsecure_entry" nounwind }
+attributes #11 = { nounwind }
+
+define arm_aapcs_vfpcc half @h2(half (half)* nocapture %hptr) #12 {
+; CHECK-8M-LABEL: h2:
+; CHECK-8M:       @ %bb.0: @ %entry
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    vldr s0, .LCPI12_0
+; CHECK-8M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    bic r0, r0, #1
+; CHECK-8M-NEXT:    sub sp, #136
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlstm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    ldr r1, [sp, #64]
+; CHECK-8M-NEXT:    bic r1, r1, #159
+; CHECK-8M-NEXT:    bic r1, r1, #4026531840
+; CHECK-8M-NEXT:    vmsr fpscr, r1
+; CHECK-8M-NEXT:    mov r1, r0
+; CHECK-8M-NEXT:    mov r2, r0
+; CHECK-8M-NEXT:    mov r3, r0
+; CHECK-8M-NEXT:    mov r4, r0
+; CHECK-8M-NEXT:    mov r5, r0
+; CHECK-8M-NEXT:    mov r6, r0
+; CHECK-8M-NEXT:    mov r7, r0
+; CHECK-8M-NEXT:    mov r8, r0
+; CHECK-8M-NEXT:    mov r9, r0
+; CHECK-8M-NEXT:    mov r10, r0
+; CHECK-8M-NEXT:    mov r11, r0
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT:    blxns r0
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlldm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    add sp, #136
+; CHECK-8M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    pop {r7, pc}
+; CHECK-8M-NEXT:    .p2align 2
+; CHECK-8M-NEXT:  @ %bb.1:
+; CHECK-8M-NEXT:  .LCPI12_0:
+; CHECK-8M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-81M-LABEL: h2:
+; CHECK-81M:       @ %bb.0: @ %entry
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    vldr s0, .LCPI12_0
+; CHECK-81M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    bic r0, r0, #1
+; CHECK-81M-NEXT:    vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT:    vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT:    clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT:    blxns r0
+; CHECK-81M-NEXT:    vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT:    vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    pop {r7, pc}
+; CHECK-81M-NEXT:    .p2align 2
+; CHECK-81M-NEXT:  @ %bb.1:
+; CHECK-81M-NEXT:  .LCPI12_0:
+; CHECK-81M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+entry:
+  %call = call arm_aapcs_vfpcc half %hptr(half 10.0) #13
+  ret half %call
+}
+
+attributes #12 = { nounwind }
+attributes #13 = { "cmse_nonsecure_call" nounwind }
+
+define arm_aapcs_vfpcc half @h3(half (half)* nocapture %hptr) #14 {
+; CHECK-8M-LABEL: h3:
+; CHECK-8M:       @ %bb.0: @ %entry
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    vldr s0, .LCPI13_0
+; CHECK-8M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    bic r0, r0, #1
+; CHECK-8M-NEXT:    sub sp, #136
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlstm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    ldr r1, [sp, #64]
+; CHECK-8M-NEXT:    bic r1, r1, #159
+; CHECK-8M-NEXT:    bic r1, r1, #4026531840
+; CHECK-8M-NEXT:    vmsr fpscr, r1
+; CHECK-8M-NEXT:    mov r1, r0
+; CHECK-8M-NEXT:    mov r2, r0
+; CHECK-8M-NEXT:    mov r3, r0
+; CHECK-8M-NEXT:    mov r4, r0
+; CHECK-8M-NEXT:    mov r5, r0
+; CHECK-8M-NEXT:    mov r6, r0
+; CHECK-8M-NEXT:    mov r7, r0
+; CHECK-8M-NEXT:    mov r8, r0
+; CHECK-8M-NEXT:    mov r9, r0
+; CHECK-8M-NEXT:    mov r10, r0
+; CHECK-8M-NEXT:    mov r11, r0
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT:    blxns r0
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlldm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    add sp, #136
+; CHECK-8M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    pop {r7, pc}
+; CHECK-8M-NEXT:    .p2align 2
+; CHECK-8M-NEXT:  @ %bb.1:
+; CHECK-8M-NEXT:  .LCPI13_0:
+; CHECK-8M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-81M-LABEL: h3:
+; CHECK-81M:       @ %bb.0: @ %entry
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    vldr s0, .LCPI13_0
+; CHECK-81M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    bic r0, r0, #1
+; CHECK-81M-NEXT:    vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT:    vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT:    clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT:    blxns r0
+; CHECK-81M-NEXT:    vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT:    vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    pop {r7, pc}
+; CHECK-81M-NEXT:    .p2align 2
+; CHECK-81M-NEXT:  @ %bb.1:
+; CHECK-81M-NEXT:  .LCPI13_0:
+; CHECK-81M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+entry:
+  %call = tail call arm_aapcs_vfpcc half %hptr(half 10.0) #15
+  ret half %call
+}
+
+attributes #14 = { nounwind }
+attributes #15 = { "cmse_nonsecure_call" nounwind }
+
+define arm_aapcs_vfpcc half @h4(half ()* nocapture %hptr) #16 {
+; CHECK-8M-LABEL: h4:
+; CHECK-8M:       @ %bb.0: @ %entry
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    bic r0, r0, #1
+; CHECK-8M-NEXT:    sub sp, #136
+; CHECK-8M-NEXT:    vlstm sp
+; CHECK-8M-NEXT:    mov r1, r0
+; CHECK-8M-NEXT:    mov r2, r0
+; CHECK-8M-NEXT:    mov r3, r0
+; CHECK-8M-NEXT:    mov r4, r0
+; CHECK-8M-NEXT:    mov r5, r0
+; CHECK-8M-NEXT:    mov r6, r0
+; CHECK-8M-NEXT:    mov r7, r0
+; CHECK-8M-NEXT:    mov r8, r0
+; CHECK-8M-NEXT:    mov r9, r0
+; CHECK-8M-NEXT:    mov r10, r0
+; CHECK-8M-NEXT:    mov r11, r0
+; CHECK-8M-NEXT:    mov r12, r0
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT:    blxns r0
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlldm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    add sp, #136
+; CHECK-8M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    pop {r7, pc}
+;
+; CHECK-81M-LABEL: h4:
+; CHECK-81M:       @ %bb.0: @ %entry
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    bic r0, r0, #1
+; CHECK-81M-NEXT:    vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT:    vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT:    clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT:    blxns r0
+; CHECK-81M-NEXT:    vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT:    vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    pop {r7, pc}
+entry:
+  %call = call arm_aapcs_vfpcc half %hptr() #17
+  ret half %call
+}
+
+attributes #16 = { nounwind }
+attributes #17 = { "cmse_nonsecure_call" nounwind }
+
+define arm_aapcs_vfpcc half @h1_minsize(half (half)* nocapture %hptr) #18 {
+; CHECK-8M-LABEL: h1_minsize:
+; CHECK-8M:       @ %bb.0: @ %entry
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    vldr s0, .LCPI15_0
+; CHECK-8M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    bic r0, r0, #1
+; CHECK-8M-NEXT:    sub sp, #136
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlstm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    ldr r1, [sp, #64]
+; CHECK-8M-NEXT:    bic r1, r1, #159
+; CHECK-8M-NEXT:    bic r1, r1, #4026531840
+; CHECK-8M-NEXT:    vmsr fpscr, r1
+; CHECK-8M-NEXT:    mov r1, r0
+; CHECK-8M-NEXT:    mov r2, r0
+; CHECK-8M-NEXT:    mov r3, r0
+; CHECK-8M-NEXT:    mov r4, r0
+; CHECK-8M-NEXT:    mov r5, r0
+; CHECK-8M-NEXT:    mov r6, r0
+; CHECK-8M-NEXT:    mov r7, r0
+; CHECK-8M-NEXT:    mov r8, r0
+; CHECK-8M-NEXT:    mov r9, r0
+; CHECK-8M-NEXT:    mov r10, r0
+; CHECK-8M-NEXT:    mov r11, r0
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT:    blxns r0
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlldm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    add sp, #136
+; CHECK-8M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    pop {r7, pc}
+; CHECK-8M-NEXT:    .p2align 2
+; CHECK-8M-NEXT:  @ %bb.1:
+; CHECK-8M-NEXT:  .LCPI15_0:
+; CHECK-8M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-81M-LABEL: h1_minsize:
+; CHECK-81M:       @ %bb.0: @ %entry
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    vldr s0, .LCPI15_0
+; CHECK-81M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    bic r0, r0, #1
+; CHECK-81M-NEXT:    vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT:    vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT:    clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT:    blxns r0
+; CHECK-81M-NEXT:    vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT:    vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    pop {r7, pc}
+; CHECK-81M-NEXT:    .p2align 2
+; CHECK-81M-NEXT:  @ %bb.1:
+; CHECK-81M-NEXT:  .LCPI15_0:
+; CHECK-81M-NEXT:    .long 0x00004900 @ float 2.61874657E-41
+entry:
+  %call = call arm_aapcs_vfpcc half %hptr(half 10.0) #19
+  ret half %call
+}
+
+attributes #18 = { "cmse_nonsecure_entry" minsize nounwind }
+attributes #19 = { nounwind }
+
+define arm_aapcs_vfpcc half @h1_arg(half (half)* nocapture %hptr, half %harg) #18 {
+; CHECK-8M-LABEL: h1_arg:
+; CHECK-8M:       @ %bb.0: @ %entry
+; CHECK-8M-NEXT:    push {r7, lr}
+; CHECK-8M-NEXT:    vmov r1, s0
+; CHECK-8M-NEXT:    uxth r1, r1
+; CHECK-8M-NEXT:    vmov s0, r1
+; CHECK-8M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    bic r0, r0, #1
+; CHECK-8M-NEXT:    sub sp, #136
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlstm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    ldr r1, [sp, #64]
+; CHECK-8M-NEXT:    bic r1, r1, #159
+; CHECK-8M-NEXT:    bic r1, r1, #4026531840
+; CHECK-8M-NEXT:    vmsr fpscr, r1
+; CHECK-8M-NEXT:    mov r1, r0
+; CHECK-8M-NEXT:    mov r2, r0
+; CHECK-8M-NEXT:    mov r3, r0
+; CHECK-8M-NEXT:    mov r4, r0
+; CHECK-8M-NEXT:    mov r5, r0
+; CHECK-8M-NEXT:    mov r6, r0
+; CHECK-8M-NEXT:    mov r7, r0
+; CHECK-8M-NEXT:    mov r8, r0
+; CHECK-8M-NEXT:    mov r9, r0
+; CHECK-8M-NEXT:    mov r10, r0
+; CHECK-8M-NEXT:    mov r11, r0
+; CHECK-8M-NEXT:    msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT:    blxns r0
+; CHECK-8M-NEXT:    vmov r12, s0
+; CHECK-8M-NEXT:    vlldm sp
+; CHECK-8M-NEXT:    vmov s0, r12
+; CHECK-8M-NEXT:    add sp, #136
+; CHECK-8M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT:    pop {r7, pc}
+;
+; CHECK-81M-LABEL: h1_arg:
+; CHECK-81M:       @ %bb.0: @ %entry
+; CHECK-81M-NEXT:    push {r7, lr}
+; CHECK-81M-NEXT:    vmov r1, s0
+; CHECK-81M-NEXT:    uxth r1, r1
+; CHECK-81M-NEXT:    vmov s0, r1
+; CHECK-81M-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    bic r0, r0, #1
+; CHECK-81M-NEXT:    vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT:    vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT:    clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT:    blxns r0
+; CHECK-81M-NEXT:    vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT:    vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT:    pop {r7, pc}
+entry:
+  %call = call arm_aapcs_vfpcc half %hptr(half %harg) #19
+  ret half %call
+}
+
+attributes #18 = { nounwind }
+attributes #19 = { "cmse_nonsecure_call" nounwind }
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2264,6 +2264,17 @@
       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
     }
 
+    // Mask f16 arguments if this is a CMSE nonsecure call
+    auto ArgVT = Outs[realArgIdx].ArgVT;
+    if (isCmseNSCall && (ArgVT == MVT::f16)) {
+      auto LocBits = VA.getLocVT().getSizeInBits();
+      auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
+      SDValue Mask = DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
+      Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+    }
+
     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
     if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
@@ -2922,7 +2933,6 @@
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
-    case CCValAssign::FPExt: // Avoid bitcasts as described above
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
       if (!ReturnF16)
@@ -2930,6 +2940,29 @@
       break;
     }
 
+    // Avoid bitcasts as described above, unless it's a CMSE nonsecure entry
+    // f16 arguments have their size extended to 4 bytes and passed as if they
+    // had been copied to the LSBs of a 32-bit register.
+    // For that, we extend it to f32 (hard ABI) or i32 (soft ABI)
+    if (VA.needsCustom() && VA.getValVT() == MVT::f16 && AFI->isCmseNSEntryFunction()) {
+      Arg = DAG.getNode(ISD::BITCAST, dl,
+                        MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl,
+                        MVT::getIntegerVT(VA.getLocVT().getSizeInBits()), Arg);
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+    }
+
+    // Mask f16 arguments if this is a CMSE nonsecure entry
+    auto RetVT = Outs[realRVLocIdx].ArgVT;
+    if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
+      auto LocBits = VA.getLocVT().getSizeInBits();
+      auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
+      SDValue Mask = DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
+      Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+    }
+
     if (VA.needsCustom() &&
         (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
       if (VA.getLocVT() == MVT::v2f64) {
Index: clang/test/CodeGen/cmse-clear-fp16.c
===================================================================
--- clang/test/CodeGen/cmse-clear-fp16.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// TODO: Move cmse-clear tests of half-precision floating points to backend.
-// XFAIL: *
-// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse  -S -emit-llvm \
-// RUN:            -fallow-half-arguments-and-returns %s -o - | \
-// RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-SOFT
-// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse  -S -emit-llvm \
-// RUN:            -fallow-half-arguments-and-returns %s -o - | \
-// RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-OPT-SOFT
-// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse  -S -emit-llvm \
-// RUN:            -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \
-// RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-HARD
-// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse  -S -emit-llvm \
-// RUN:            -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \
-// RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-OPT-HARD
-
-__fp16 g0();
-__attribute__((cmse_nonsecure_entry)) __fp16 f0() {
-  return g0();
-}
-// CHECK:           define {{.*}}@f0()
-
-// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32
-// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-NOPT-SOFT: ret i32 %[[V1]]
-
-// CHECK-OPT-SOFT: %[[V0:.*]] = tail call {{.*}} @g0
-// CHECK-OPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-OPT-SOFT: ret i32 %[[V1]]
-
-// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
-// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
-// CHECK-NOPT-HARD: ret float %[[V2]]
-
-// CHECK-OPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
-// CHECK-OPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-OPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
-// CHECK-OPT-HARD: ret float %[[V2]]
-
-void __attribute__((cmse_nonsecure_call)) (*g1)(__fp16);
-__fp16 x;
-void f1() {
-  g1(x);
-}
-// CHECK: define {{.*}}@f1()
-
-// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32
-// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-NOPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]])
-
-// CHECK-OPT-SOFT: %[[V1:.*]] = zext i16 {{.*}} to i32
-// CHECK-OPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]])
-
-// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32
-// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535
-// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float
-// CHECK-NOPT-HARD: call {{.*}}(float %[[V2]])
-
-// CHECK-OPT-HARD: %[[V0:.*]] = zext i16 {{.*}} to i32
-// CHECK-OPT-HARD: %[[V1:.*]] = bitcast i32 %[[V0]] to float
-// CHECK-OPT-HARD: call {{.*}}(float %[[V1]])
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -3878,7 +3878,6 @@
                                    QualType RTy);
   llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy,
                                    QualType RTy);
-  llvm::Value *EmitCMSEClearFP16(llvm::Value *V);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
                                          unsigned LLVMIntrinsic,
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -3125,20 +3125,6 @@
   return R;
 }
 
-// Emit code to clear the padding bits when returning or passing as an argument
-// a 16-bit floating-point value.
-llvm::Value *CodeGenFunction::EmitCMSEClearFP16(llvm::Value *Src) {
-  llvm::Type *RetTy = Src->getType();
-  assert(RetTy->isFloatTy() ||
-         (RetTy->isIntegerTy() && RetTy->getIntegerBitWidth() == 32));
-  if (RetTy->isFloatTy()) {
-    llvm::Value *T0 = Builder.CreateBitCast(Src, Builder.getIntNTy(32));
-    llvm::Value *T1 = Builder.CreateAnd(T0, 0xffff, "cmse.clear");
-    return Builder.CreateBitCast(T1, RetTy);
-  }
-  return Builder.CreateAnd(Src, 0xffff, "cmse.clear");
-}
-
 void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
                                          bool EmitRetDbgLoc,
                                          SourceLocation EndLoc) {
@@ -3308,18 +3294,10 @@
     if (CurFuncDecl && CurFuncDecl->hasAttr<CmseNSEntryAttr>()) {
       // For certain return types, clear padding bits, as they may reveal
       // sensitive information.
-      const Type *RTy = RetTy.getCanonicalType().getTypePtr();
-      // TODO: Move CMSE handling of half-precision floating points to backend
-      if ((RTy->isFloat16Type() || RTy->isHalfType()) && !RV->getType()->isHalfTy()) {
-        // 16-bit floating-point types are passed in a 32-bit integer or float,
-        // with unspecified upper bits.
-        RV = EmitCMSEClearFP16(RV);
-      } else {
-        // Small struct/union types are passed as integers.
-        auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
-        if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
-          RV = EmitCMSEClearRecord(RV, ITy, RetTy);
-      }
+      // Small struct/union types are passed as integers.
+      auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
+      if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
+        RV = EmitCMSEClearRecord(RV, ITy, RetTy);
     }
     EmitReturnValueCheck(RV);
     Ret = Builder.CreateRet(RV);
@@ -4622,18 +4600,10 @@
         if (CallInfo.isCmseNSCall()) {
           // For certain parameter types, clear padding bits, as they may reveal
           // sensitive information.
-          const Type *PTy = I->Ty.getCanonicalType().getTypePtr();
-          // 16-bit floating-point types are passed in a 32-bit integer or
-          // float, with unspecified upper bits.
-          // TODO: Move CMSE handling of half-precision floating points to backend
-          if ((PTy->isFloat16Type() || PTy->isHalfType()) && !Load->getType()->isHalfTy()) {
-            Load = EmitCMSEClearFP16(Load);
-          } else {
-            // Small struct/union types are passed as integer arrays.
-            auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
-            if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
-              Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
-          }
+          // Small struct/union types are passed as integer arrays.
+          auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
+          if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
+            Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
         }
         IRCallArgs[FirstIRArg] = Load;
       }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to