Issue |
96595
|
Summary |
[RISC-V] Add some orc.b optimizations
|
Labels |
new issue
|
Assignees |
|
Reporter |
Validark
|
[Godbolt link](https://godbolt.org/z/oh87najaG)
The following functions can all use, or should be directly translated to, the orc.b instruction:
```zig
// If the lowest bit of each byte is 1, set that byte to 255, else 0.
// Should emit constant-load+and+orc.b
export fn orc_b_opportunity1(x: u64) u64 {
return (x & 0x0101010101010101) *% 0xFF;
}
// If the lowest bit of each byte is 1, set that byte to 255, else 0.
// Should emit constant-load+and+orc.b
export fn orc_b_opportunity1_fixed(x: u64) u64 {
return orc_b(x & 0x0101010101010101);
}
// If the highest bit of each byte is 1, set that byte to 255, else 0.
// Should emit constant-load+and+orc.b
export fn orc_b_opportunity2(x: u64) u64 {
return ((x & 0x8080808080808080) >> 7) *% 0xFF;
}
// If the highest bit of each byte is 1, set that byte to 255, else 0.
// Should emit constant-load+and+orc.b
export fn orc_b_opportunity3(x: u64) u64 {
const y = x & 0x8080808080808080;
return (y << 1) -% (y >> 7);
}
// If the highest bit of each byte is 1, set that byte to 255, else 0.
// Should emit constant-load+and+orc.b
export fn orc_b_opportunity4(x: u64) u64 {
return ((x >> 7) & 0x0101010101010101) *% 0xFF;
}
// If the highest bit of each byte is 1, set that byte to 255, else 0.
export fn orc_b_opportunity2_and_3_and_4_fixed(x: u64) u64 {
return orc_b(x & 0x8080808080808080);
}
// If any byte is non-zero, set that byte to 255 (all ones), else 0.
// This should just emit the `orc.b` instruction.
export fn orc_b_opportunity5(x: u64) u64 {
const c1: u64 = @bitCast([_]u8{0x7F} ** 8);
return orc_b_opportunity2(((x & c1) +% c1) | x);
}
// If any byte is non-zero, set that byte to 255 (all ones), else 0.
// This should just emit the `orc.b` instruction.
export fn orc_b_opportunity6(x: u64) u64 {
const c1: u64 = @bitCast([_]u8{0x7F} ** 8);
return orc_b_opportunity3(((x & c1) +% c1) | x);
}
// If any byte is non-zero, set that byte to 255 (all ones), else 0.
// This should just emit the `orc.b` instruction.
export fn orc_b_opportunity7(x: u64) u64 {
const c1: u64 = @bitCast([_]u8{0x7F} ** 8);
return orc_b_opportunity4(((x & c1) +% c1) | x);
}
export fn orc_b_opportunity5_and_6_and_7_fixed(x: u64) u64 {
return orc_b(x);
}
fn orc_b(x: u64) u64 {
return struct {
extern fn @"llvm.riscv.orc.b.i64"(u64) u64;
}.@"llvm.riscv.orc.b.i64"(x);
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs