http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50281
--- Comment #5 from NickParker at Eaton dot com 2011-09-03 01:32:20 UTC --- Sorry. I pasted a broken version. Before. Code below works. uint32_t MulU3U3S3(uint32_t a_u4, uint32_t b_u4) { //uint32_t answer; asm volatile ( "push r0" "\n\t" "push r1" "\n\t" "push r10" "\n\t" "clr r10" "\n\t" // zero register // 0 byte shifts "mul %A1,%A2" "\n\t" // a1a2 "mov r2,r0" "\n\t" "mov r3,r1" "\n\t" // 1 byte shifts "mul %A1,%B2" "\n\t" "add r3,r0" "\n\t" "adc r4,r1" "\n\t" "adc r5,r10" "\n\t" "mul %A2,%B1" "\n\t" "add r3,r0" "\n\t" "adc r4,r1" "\n\t" "adc r5,r10" "\n\t" // 2 byte shifts "mul %A1,%C2" "\n\t" "add r4,r0" "\n\t" "adc r5,r1" "\n\t" "adc r6,r10" "\n\t" "mul %A2,%C1" "\n\t" "add r4,r0" "\n\t" "adc r5,r1" "\n\t" "adc r6,r10" "\n\t" "mul %B2,%B1" "\n\t" "add r4,r0" "\n\t" "adc r5,r1" "\n\t" "adc r6,r10" "\n\t" // 3 byte shifts "mul %B1,%C2" "\n\t" "add r5,r0" "\n\t" "adc r6,r1" "\n\t" "adc r7,r10" "\n\t" "mul %B2,%C1" "\n\t" "add r5,r0" "\n\t" "adc r6,r1" "\n\t" "adc r7,r10" "\n\t" // 4 byte shifts "mul %C2,%C1" "\n\t" "add r6,r0" "\n\t" "adc r7,r1" "\n\t" "mov %A0,r5" "\n\t" "mov %B0,r6" "\n\t" "mov %C0,r7" "\n\t" "clr %D0" "\n\t" //"adc %G0,r20" "\n\t" "pop r10" "\n\t" "pop r1" "\n\t" "pop r0" "\n\t" : "=&r" (answer) : "r" (a_u4), "r" (b_u4) : "r2","r3","r4","r5","r6","r7","r10" ); return (answer); }