On 11/5/22 06:26, Taylor Simpson wrote:
+static void gen_set_usr_field(int field, TCGv val)
+{
+ tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR],
+ val,
+ reg_field_info[field].offset,
+ reg_field_info[field].width);
+}
If you plan to use this for something else, fine, but since you're never clearing USR_OVF,
it would be better to use tcg_gen_ori_tl.
+static void gen_sat_i64(TCGv_i64 dst, TCGv_i64 src, uint32_t bits)
+{
+ TCGLabel *label = gen_new_label();
+
+ tcg_gen_sextract_i64(dst, src, 0, bits);
+ tcg_gen_brcond_i64(TCG_COND_EQ, dst, src, label);
+ {
+ TCGv_i64 min = tcg_constant_i64(-(1LL << (bits - 1)));
+ TCGv_i64 max = tcg_constant_i64((1LL << (bits - 1)) - 1);
+ tcg_gen_movcond_i64(TCG_COND_LT, dst, src, tcg_constant_i64(0),
+ min, max);
+ gen_set_usr_fieldi(USR_OVF, 1);
+ }
+ gen_set_label(label);
+}
This could be done branchless:
tcg_gen_smax_i64(dst, src, min);
tcg_gen_smin_i64(dst, dst, max);
tcg_gen_setcond_i64(TCG_COND_NE, o_64, dst, src);
tcg_gen_trunc_i64_tl(o_tl, o_64);
tcg_gen_shli_tl(o_tl, reg_field_info[USE_OVF].offset);
tcg_gen_or_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR], o_tl);
+static void gen_satval(TCGv_i64 dest, TCGv_i64 source, uint32_t bits)
+{
+ TCGv_i64 min = tcg_constant_i64(-(1LL << (bits - 1)));
+ TCGv_i64 max = tcg_constant_i64((1LL << (bits - 1)) - 1);
+
+ gen_set_usr_fieldi(USR_OVF, 1);
+ tcg_gen_movcond_i64(TCG_COND_LT, dest, source, tcg_constant_i64(0),
+ min, max);
+}
Likewise.
+/* Shift left with saturation */
+static void gen_shl_sat(TCGv RdV, TCGv RsV, TCGv shift_amt)
+{
+ /*
+ * int64_t A = (fCAST4_8s(RsV) << shift_amt;
+ * if (((int32_t)((fSAT(A)) ^ ((int32_t)(RsV)))) < 0) {
+ * RdV = fSATVALN(32, ((int32_t)(RsV)))
+ * } else if (((RsV) > 0) && ((A) == 0)) {
+ * RdV = fSATVALN(32, (RsV));
+ * } else {
+ * RdV = fSAT(A);
+ * }
+ */
A little bit tricky to follow, but is this overflow condition to be the same as
x_32 = rsv << shift_amt;
y_32 = x_32 >> shift_amt;
overflow = x_32 != y_32;
i.e. overflow if we've lost bits that are not copies of the sign bit (and thus restored by
the arithmetic right shift)?
+ tcg_gen_ext_i32_i64(RsV_i64, RsV);
+ tcg_gen_ext_i32_i64(shift_amt_i64, shift_amt);
+ tcg_gen_shl_i64(A, RsV_i64, shift_amt_i64);
+
+ /* Check for saturation */
+ gen_sat_i64(A_sat_i64, A, 32);
Setting USR_OVF here...
+ tcg_gen_extrl_i64_i32(A_sat, A_sat_i64);
+ tcg_gen_xor_tl(tmp, A_sat, RsV);
+ tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, label1);
+ gen_satval(RdV_i64, RsV_i64, 32);
... and also here. Is this also computing the same saturation value that you
did above?
+ tcg_gen_extrl_i64_i32(RdV, RdV_i64);
+ tcg_gen_br(done);
+
+ gen_set_label(label1);
+ tcg_gen_brcondi_tl(TCG_COND_LE, RsV, 0, label2);
+ tcg_gen_brcondi_i64(TCG_COND_NE, A, 0, label2);
+ gen_satval(RdV_i64, RsV_i64, 32);
and again?
+ tcg_gen_extrl_i64_i32(RdV, RdV_i64);
+ tcg_gen_br(done);
+
+ gen_set_label(label2);
+ tcg_gen_mov_tl(RdV, A_sat);
+
+ gen_set_label(done);
+
+ tcg_temp_free_i64(RsV_i64);
+ tcg_temp_free_i64(shift_amt_i64);
+ tcg_temp_free_i64(A);
+ tcg_temp_free_i64(A_sat_i64);
+ tcg_temp_free(A_sat);
+ tcg_temp_free_i64(RdV_i64);
+ tcg_temp_free(tmp);
+}
+
+static void gen_sar(TCGv RdV, TCGv RsV, TCGv shift_amt)
+{
+ /*
+ * if (shift_amt < 32) {
+ * RdV = sar(RsV, shift_amt);
+ * } else {
+ * if (RsV > 0) {
+ * RdV = 0;
+ * } else {
+ * RdV = ~0;
+ * }
+ * }
+ */
+ TCGLabel *shift_ge_32 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ tcg_gen_brcondi_tl(TCG_COND_GE, shift_amt, 32, shift_ge_32);
+ tcg_gen_sar_tl(RdV, RsV, shift_amt);
+ tcg_gen_br(done);
+
+ gen_set_label(shift_ge_32);
+ tcg_gen_movcond_tl(TCG_COND_LT, RdV, RsV, tcg_constant_tl(0),
+ tcg_constant_tl(~0), tcg_constant_tl(0));
This is better done as
shift = min(shift, 31);
rdv = rsv >> shift;
without branches.
r~