Il 11/08/23 10:34, Damien Zammit ha scritto:> diff --git
a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index c00896e8..df086370 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -39,12 +39,30 @@
#define CX(addr, reg) addr(,reg,8)
#endif
-#define CPU_NUMBER(reg) \
+#define CPU_NUMBER_NO_STACK(reg) \
movl %cs:lapic, reg ;\
movl %cs:APIC_ID(reg), reg ;\
shrl $24, reg ;\
movl %cs:CX(cpu_id_lut, reg), reg ;\
+/* Never call CPU_NUMBER(%esi) */
+#define CPU_NUMBER(reg) \
+ pushl %esi ;\
+ pushl %eax ;\
+ pushl %ebx ;\
+ pushl %ecx ;\
+ pushl %edx ;\
+ movl $1, %eax ;\
+ cpuid ;\
+ shrl $24, %ebx ;\
+ movl %cs:CX(cpu_id_lut, %ebx), %esi ;\
+ popl %edx ;\
+ popl %ecx ;\
+ popl %ebx ;\
+ popl %eax ;\
+ movl %esi, reg ;\
+ popl %esi ;\
+
How much faster is this? did you measure also on hw? I would use RDTSC
to do it, I don't know if there are better ways.
Interestingly, Linux uses either LSL or RDPID to obtain the same value,
I wonder if they could be an easy alternative.
Also, could it be that there is a particular use of it that is much more
frequent than the others, and that might benefit from further optimizations?
Luca