Hello Srikar, Thanks for taking a look at the patch.
On Mon, Dec 07, 2020 at 05:40:42PM +0530, Srikar Dronamraju wrote: > * Gautham R. Shenoy <e...@linux.vnet.ibm.com> [2020-12-04 10:18:45]: > > > From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com> > > <snipped> > > > > > static int parse_thread_groups(struct device_node *dn, > > - struct thread_groups *tg, > > - unsigned int property) > > + struct thread_groups_list *tglp) > > { > > - int i; > > - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; > > + int i = 0; > > + u32 *thread_group_array; > > u32 *thread_list; > > size_t total_threads; > > - int ret; > > + int ret = 0, count; > > + unsigned int property_idx = 0; > > NIT: > tglx mentions in one of his recent comments to try keep a reverse fir tree > ordering of variables where possible. I suppose you mean moving the longer local variable declarations to to the top and shorter ones to the bottom. Thanks. Will fix this. > > > > > + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); > > + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); > > ret = of_property_read_u32_array(dn, "ibm,thread-groups", > > - thread_group_array, 3); > > + thread_group_array, count); > > if (ret) > > - return ret; > > - > > - tg->property = thread_group_array[0]; > > - tg->nr_groups = thread_group_array[1]; > > - tg->threads_per_group = thread_group_array[2]; > > - if (tg->property != property || > > - tg->nr_groups < 1 || > > - tg->threads_per_group < 1) > > - return -ENODATA; > > + goto out_free; > > > > - total_threads = tg->nr_groups * tg->threads_per_group; > > + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { > > + int j; > > + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; > > NIT: same as above. Ok. > > > > > - ret = of_property_read_u32_array(dn, "ibm,thread-groups", > > - thread_group_array, > > - 3 + total_threads); > > - if (ret) > > - return ret; > > + tg->property = thread_group_array[i]; > > + tg->nr_groups = thread_group_array[i + 1]; > > + tg->threads_per_group = thread_group_array[i + 2]; > > + total_threads = tg->nr_groups * tg->threads_per_group; > > + > > + thread_list = &thread_group_array[i + 3]; > > > > - thread_list = &thread_group_array[3]; > > + for (j = 0; j < total_threads; j++) > > + tg->thread_list[j] = thread_list[j]; > > + i = i + 3 + total_threads; > > Can't we simply use memcpy instead? We could. But this one makes it more explicit. > > > + } > > > > - for (i = 0 ; i < total_threads; i++) > > - tg->thread_list[i] = thread_list[i]; > > + tglp->nr_properties = property_idx; > > > > - return 0; > > +out_free: > > + kfree(thread_group_array); > > + return ret; > > } > > > > /* > > @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct > > thread_groups *tg) > > return -1; > > } > > > > -static int init_cpu_l1_cache_map(int cpu) > > +static int init_cpu_cache_map(int cpu, unsigned int cache_property) > > > > { > > struct device_node *dn = of_get_cpu_node(cpu, NULL); > > - struct thread_groups tg = {.property = 0, > > - .nr_groups = 0, > > - .threads_per_group = 0}; > > + struct thread_groups *tg = NULL; > > int first_thread = cpu_first_thread_sibling(cpu); > > int i, cpu_group_start = -1, err = 0; > > + cpumask_var_t *mask; > > + struct thread_groups_list *cpu_tgl = &tgl[cpu]; > > NIT: same as 1st comment. Sure, will fix this. > > > > > if (!dn) > > return -ENODATA; > > > > - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); > > - if (err) > > - goto out; > > + if (!(cache_property == THREAD_GROUP_SHARE_L1)) > > + return -EINVAL; > > > > - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); > > + if (!cpu_tgl->nr_properties) { > > + err = parse_thread_groups(dn, cpu_tgl); > > + if (err) > > + goto out; > > + } > > + > > + for (i = 0; i < cpu_tgl->nr_properties; i++) { > > + if (cpu_tgl->property_tgs[i].property == cache_property) { > > + tg = &cpu_tgl->property_tgs[i]; > > + break; > > + } > > + } > > + > > + if (!tg) > > + return -EINVAL; > > + > > + cpu_group_start = get_cpu_thread_group_start(cpu, tg); > > This whole hunk should be moved to a new function and called before > init_cpu_cache_map. It will simplify the logic to great extent. I suppose you are referring to the part where we select the correct tg. Yeah, that can move to a different helper. > > > > > if (unlikely(cpu_group_start == -1)) { > > WARN_ON_ONCE(1); > > @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu) > > goto out; > > } > > > > - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), > > - GFP_KERNEL, cpu_to_node(cpu)); > > + mask = &per_cpu(cpu_l1_cache_map, cpu); > > + > > + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > > > > This hunk (and the next hunk) should be moved to next patch. > The next patch is only about introducing THREAD_GROUP_SHARE_L2. Hence I put in any other code in this patch, since it seems to be a logical place to collate whatever we have in a generic form. > > for (i = first_thread; i < first_thread + threads_per_core; i++) { > > - int i_group_start = get_cpu_thread_group_start(i, &tg); > > + int i_group_start = get_cpu_thread_group_start(i, tg); > > > > if (unlikely(i_group_start == -1)) { > > WARN_ON_ONCE(1); > > @@ -843,7 +881,7 @@ static int init_cpu_l1_cache_map(int cpu) > > } > > > > if (i_group_start == cpu_group_start) > > - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); > > + cpumask_set_cpu(i, *mask); > > } > > > > out: > > @@ -924,7 +962,7 @@ static int init_big_cores(void) > > int cpu; > > > > for_each_possible_cpu(cpu) { > > - int err = init_cpu_l1_cache_map(cpu); > > + int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L1); > > > > if (err) > > return err; > > -- > > 1.9.4 > > > > -- > Thanks and Regards > Srikar Dronamraju