Hi OpenBSD/riscv64'ers! After a week of debugging a different issue I noticed this issue with the L2 cache in locore.S:
The physical address of the base boot memory is held in register s9, and this is shifted by the L2 cache code by 21 to the right. In order to make 2 MiB offsets. However, I have found in my research that the algorithm is flawed a little. It expects pages not an address on s9. I wrote this program to understand the algorithm better. And I wrote it in C and it should be an exact duplication of the asm code. Please point out if it isn't. Here is the output. I'm attaching the program after this it's colour coded so you can see it better. As you can see with the first output there is bits in the PTE beyond PPN[1] in PPN[2], in the L2 cache. In the second output which ends at the same address the bits are perfectly aligned in PPN[1]. pjp@polarstern$ ./l2shit | tail sd 1FB80003(000000000000000000000000011111101110000000000000000011) to 1014FB0 sd 1FC00003(000000000000000000000000011111110000000000000000000011) to 1014FB8 sd 1FC80003(000000000000000000000000011111110010000000000000000011) to 1014FC0 sd 1FD00003(000000000000000000000000011111110100000000000000000011) to 1014FC8 sd 1FD80003(000000000000000000000000011111110110000000000000000011) to 1014FD0 sd 1FE00003(000000000000000000000000011111111000000000000000000011) to 1014FD8 sd 1FE80003(000000000000000000000000011111111010000000000000000011) to 1014FE0 sd 1FF00003(000000000000000000000000011111111100000000000000000011) to 1014FE8 sd 1FF80003(000000000000000000000000011111111110000000000000000011) to 1014FF0 sd 20000003(000000000000000000000000100000000000000000000000000011) to 1014FF8 pjp@polarstern$ ./l2shit pages | tail sd 0FB00003(000000000000000000000000001111101100000000000000000011) to 1014FB0 sd 0FB80003(000000000000000000000000001111101110000000000000000011) to 1014FB8 sd 0FC00003(000000000000000000000000001111110000000000000000000011) to 1014FC0 sd 0FC80003(000000000000000000000000001111110010000000000000000011) to 1014FC8 sd 0FD00003(000000000000000000000000001111110100000000000000000011) to 1014FD0 sd 0FD80003(000000000000000000000000001111110110000000000000000011) to 1014FD8 sd 0FE00003(000000000000000000000000001111111000000000000000000011) to 1014FE0 sd 0FE80003(000000000000000000000000001111111010000000000000000011) to 1014FE8 sd 0FF00003(000000000000000000000000001111111100000000000000000011) to 1014FF0 sd 0FF80003(000000000000000000000000001111111110000000000000000011) to 1014FF8 /* 94 lla s1, pagetable_l2 95 srli t4, s9, L2_SHIFT 96 li t2, 512 97 add t3, t4, t2 98 li t0, (PTE_KERN | PTE_X) 99 1: 100 slli t2, t4, PTE_PPN1_S 101 or t5, t0, t2 102 sd t5, (s1) 103 addi s1, s1, PTE_SIZE 104 105 addi t4, t4, 1 106 bltu t4, t3, 1b 107 */ #include <stdio.h> #include <stdlib.h> #include <string.h> #define P_KERN 0x1 /* not real */ #define P_X 0x2 /* not real */ char * binary(ulong t5) { static char ret[1280]; int i = 0; ret[0] = '\0'; for (i = 53; i >= 0; i--) { switch (i) { case (53 - 26): strlcat(ret,"[32m", sizeof(ret)); break; case (53 - 26 - 9): strlcat(ret,"[34m", sizeof(ret)); break; case (53 - 26 - 9 - 9): strlcat(ret,"[35m", sizeof(ret)); break; default: //strlcat(ret,"[0m", sizeof(ret)); break; } if (t5 & (1UL << i)) { strlcat(ret, "1", sizeof(ret)); } else { strlcat(ret, "0", sizeof(ret)); } } return (&ret[0]); } int main(int argc, char *argv[]) { u_long s1 = 0x1014000; /* pagetable l2 */ u_long s9 = 0x40200000 >> ((argc > 1) ? 12 : 0); /* physmem s9 (pages?) */ u_long t4 = s9 >> 21; u_long t2 = 512; u_long t3 = t4 + t2; u_long t0 = (P_KERN | P_X); u_long t5; repeat: t2 = t4 << 19; t5 = t0 | t2; printf("sd %08lX(%s[0m) to %lX\n", t5, binary(t5), s1); s1 += 8; t4 += 1; if (t4 < t3) goto repeat; return 0; } Please look at this document section 4.4.1 figure 4.21 to see the structure of the PTE. https://mainrechner.de/riscv-privileged-20211203.pdf Best Regards, -peter -- Over thirty years experience on Unix-like Operating Systems starting with QNX.