On Thu, Jun 1, 2017 at 3:22 PM, Francisco Jerez <curroje...@riseup.net> wrote: > Anuj Phogat <anuj.pho...@gmail.com> writes: > >> This new field helps simplify l3 way size computations >> in next patch. >> >> Suggested-by: Francisco Jerez <curroje...@riseup.net> >> Signed-off-by: Anuj Phogat <anuj.pho...@gmail.com> >> Cc: Francisco Jerez <curroje...@riseup.net> >> --- >> src/intel/common/gen_device_info.c | 23 ++++++++++++++++++++++- >> src/intel/common/gen_device_info.h | 1 + >> 2 files changed, 23 insertions(+), 1 deletion(-) >> >> diff --git a/src/intel/common/gen_device_info.c >> b/src/intel/common/gen_device_info.c >> index 209b293..7c1f9b4 100644 >> --- a/src/intel/common/gen_device_info.c >> +++ b/src/intel/common/gen_device_info.c >> @@ -132,6 +132,7 @@ static const struct gen_device_info >> gen_device_info_snb_gt2 = { >> static const struct gen_device_info gen_device_info_ivb_gt1 = { >> GEN7_FEATURES, .is_ivybridge = true, .gt = 1, >> .num_slices = 1, >> + .l3_banks = 2, >> .max_vs_threads = 36, >> .max_tcs_threads = 36, >> .max_tes_threads = 36, >> @@ -156,6 +157,7 @@ static const struct gen_device_info >> gen_device_info_ivb_gt1 = { >> static const struct gen_device_info gen_device_info_ivb_gt2 = { >> GEN7_FEATURES, .is_ivybridge = true, .gt = 2, >> .num_slices = 1, >> + .l3_banks = 4, >> .max_vs_threads = 128, >> .max_tcs_threads = 128, >> .max_tes_threads = 128, >> @@ -180,6 +182,7 @@ static const struct gen_device_info >> gen_device_info_ivb_gt2 = { >> static const struct gen_device_info gen_device_info_byt = { >> GEN7_FEATURES, .is_baytrail = true, .gt = 1, >> .num_slices = 1, >> + .l3_banks = 1, >> .has_llc = false, >> .max_vs_threads = 36, >> .max_tcs_threads = 36, >> @@ -211,6 +214,7 @@ static const struct gen_device_info gen_device_info_byt >> = { >> static const struct gen_device_info gen_device_info_hsw_gt1 = { >> HSW_FEATURES, .gt = 1, >> .num_slices = 1, >> + .l3_banks = 2, >> .max_vs_threads = 70, >> .max_tcs_threads = 70, >> .max_tes_threads = 70, >> @@ -235,6 +239,7 @@ static const struct gen_device_info >> gen_device_info_hsw_gt1 = { >> static const struct gen_device_info gen_device_info_hsw_gt2 = { >> HSW_FEATURES, .gt = 2, >> .num_slices = 1, >> + .l3_banks = 4, >> .max_vs_threads = 280, >> .max_tcs_threads = 256, >> .max_tes_threads = 280, >> @@ -259,6 +264,7 @@ static const struct gen_device_info >> gen_device_info_hsw_gt2 = { >> static const struct gen_device_info gen_device_info_hsw_gt3 = { >> HSW_FEATURES, .gt = 3, >> .num_slices = 2, >> + .l3_banks = 8, >> .max_vs_threads = 280, >> .max_tcs_threads = 256, >> .max_tes_threads = 280, >> @@ -299,6 +305,7 @@ static const struct gen_device_info >> gen_device_info_hsw_gt3 = { >> static const struct gen_device_info gen_device_info_bdw_gt1 = { >> GEN8_FEATURES, .gt = 1, >> .num_slices = 1, >> + .l3_banks = 2, >> .max_cs_threads = 42, >> .urb = { >> .size = 192, >> @@ -318,6 +325,7 @@ static const struct gen_device_info >> gen_device_info_bdw_gt1 = { >> static const struct gen_device_info gen_device_info_bdw_gt2 = { >> GEN8_FEATURES, .gt = 2, >> .num_slices = 1, >> + .l3_banks = 4, >> .max_cs_threads = 56, >> .urb = { >> .size = 384, >> @@ -337,6 +345,7 @@ static const struct gen_device_info >> gen_device_info_bdw_gt2 = { >> static const struct gen_device_info gen_device_info_bdw_gt3 = { >> GEN8_FEATURES, .gt = 3, >> .num_slices = 2, >> + .l3_banks = 8, >> .max_cs_threads = 56, >> .urb = { >> .size = 384, >> @@ -357,6 +366,7 @@ static const struct gen_device_info gen_device_info_chv >> = { >> GEN8_FEATURES, .is_cherryview = 1, .gt = 1, >> .has_llc = false, >> .num_slices = 1, >> + .l3_banks = 2, >> .max_vs_threads = 80, >> .max_tcs_threads = 80, >> .max_tes_threads = 80, >> @@ -413,6 +423,7 @@ static const struct gen_device_info gen_device_info_chv >> = { >> .gt = 1, \ >> .has_llc = false, \ >> .num_slices = 1, \ >> + .l3_banks = 2, \ > > I don't think it makes sense to put an L3 bank count default into this > macro that's almost guaranteed to be inaccurate since every GT > configuration is different, the default of zero should make it more > obvious that something is missing. > I added it here because macro is used just for the broxton. I'll move the initialization to gen_device_info_bxt.
>> .max_vs_threads = 112, \ >> .max_tcs_threads = 112, \ >> .max_tes_threads = 112, \ >> @@ -457,22 +468,26 @@ static const struct gen_device_info >> gen_device_info_chv = { >> static const struct gen_device_info gen_device_info_skl_gt1 = { >> GEN9_FEATURES, .gt = 1, >> .num_slices = 1, >> + .l3_banks = 2, >> .urb.size = 192, >> }; >> >> static const struct gen_device_info gen_device_info_skl_gt2 = { >> GEN9_FEATURES, .gt = 2, >> .num_slices = 1, >> + .l3_banks = 4, >> }; >> >> static const struct gen_device_info gen_device_info_skl_gt3 = { >> GEN9_FEATURES, .gt = 3, >> .num_slices = 2, >> + .l3_banks = 8, >> }; >> >> static const struct gen_device_info gen_device_info_skl_gt4 = { >> GEN9_FEATURES, .gt = 4, >> .num_slices = 3, >> + .l3_banks = 12, >> /* From the "L3 Allocation and Programming" documentation: >> * >> * "URB is limited to 1008KB due to programming restrictions. This is >> not a >> @@ -489,7 +504,8 @@ static const struct gen_device_info gen_device_info_bxt >> = { >> }; >> > > Aren't you missing an update to the gen_device_info_bxt struct? > No. It's initialized to 2 by default in GEN9_LP_FEATURES. I'm now moving the initialization to gen_device_info_bxt struct. >> static const struct gen_device_info gen_device_info_bxt_2x6 = { >> - GEN9_LP_FEATURES_2X6 >> + GEN9_LP_FEATURES_2X6, >> + .l3_banks = 1, >> }; >> /* >> * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. >> @@ -504,6 +520,7 @@ static const struct gen_device_info >> gen_device_info_kbl_gt1 = { >> .max_cs_threads = 7 * 6, >> .urb.size = 192, >> .num_slices = 1, >> + .l3_banks = 2, >> }; >> >> static const struct gen_device_info gen_device_info_kbl_gt1_5 = { >> @@ -513,6 +530,7 @@ static const struct gen_device_info >> gen_device_info_kbl_gt1_5 = { >> >> .max_cs_threads = 7 * 6, >> .num_slices = 1, >> + .l3_banks = 4, >> }; >> >> static const struct gen_device_info gen_device_info_kbl_gt2 = { >> @@ -521,6 +539,7 @@ static const struct gen_device_info >> gen_device_info_kbl_gt2 = { >> .gt = 2, >> >> .num_slices = 1, >> + .l3_banks = 4, >> }; >> >> static const struct gen_device_info gen_device_info_kbl_gt3 = { >> @@ -529,6 +548,7 @@ static const struct gen_device_info >> gen_device_info_kbl_gt3 = { >> .gt = 3, >> >> .num_slices = 2, >> + .l3_banks = 8, >> }; >> >> static const struct gen_device_info gen_device_info_kbl_gt4 = { >> @@ -548,6 +568,7 @@ static const struct gen_device_info >> gen_device_info_kbl_gt4 = { >> */ >> .urb.size = 1008 / 3, >> .num_slices = 3, >> + .l3_banks = 12, >> }; >> >> static const struct gen_device_info gen_device_info_glk = { >> diff --git a/src/intel/common/gen_device_info.h >> b/src/intel/common/gen_device_info.h >> index 80676d0..6207630 100644 >> --- a/src/intel/common/gen_device_info.h >> +++ b/src/intel/common/gen_device_info.h >> @@ -96,6 +96,7 @@ struct gen_device_info >> * to change, so we program @max_cs_threads as the lower maximum. >> */ >> unsigned num_slices; >> + unsigned l3_banks; >> unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ >> unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ >> unsigned max_tes_threads; /**< Maximum Domain Shader threads */ >> -- >> 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev