Hi Simon, On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <s...@chromium.org> wrote: > > At present the APs (non-boot CPUs) are inited once and then parked ready > for the OS to use them. However in some cases we want to send new requests > through, such as to change MTRRs and keep them consistent across CPUs. > > Change the last state of the flight plan to go into a wait loop, accepting > instructions from the main CPU. > > Signed-off-by: Simon Glass <s...@chromium.org> > --- > > Changes in v2: > - Add more comments > > arch/x86/cpu/mp_init.c | 126 +++++++++++++++++++++++++++++++++++--- > arch/x86/include/asm/mp.h | 11 ++++ > 2 files changed, 128 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c > index cd4cae559d..673fdc9628 100644 > --- a/arch/x86/cpu/mp_init.c > +++ b/arch/x86/cpu/mp_init.c > @@ -43,13 +43,38 @@ struct mp_flight_plan { > struct mp_flight_record *records; > }; > > +/** > + * struct mp_callback - Callback information for APs > + * > + * @func: Function to run > + * @arg: Argument to pass to the function > + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a special > + * value like MP_SELECT_BSP. It tells the AP whether it should process > this > + * callback > + */ > +struct mp_callback { > + /** > + * func() - Function to call on the AP > + * > + * @arg: Argument to pass > + */ > + void (*func)(void *arg); > + void *arg; > + int logical_cpu_number; > +}; > + > static struct mp_flight_plan mp_info; > > -struct cpu_map { > - struct udevice *dev; > - int apic_id; > - int err_code; > -}; > +/* > + * ap_callbacks - Callback mailbox array > + * > + * Array of callback, one entry for each available CPU, indexed by the CPU > + * number, which is dev->req_seq. The entry for the main CPU is never used. > + * When this is NULL, there is no pending work for the CPU to run. When > + * non-NULL it points to the mp_callback structure. This is shared between > all > + * CPUs, so should only be written by the main CPU. > + */ > +static struct mp_callback **ap_callbacks; > > static inline void barrier_wait(atomic_t *b) > { > @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index) > debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, > dev ? dev->name : "(apic_id not found)"); > > - /* Walk the flight plan */ > + /* Walk the flight plan, which never returns */ > ap_do_flight_plan(dev); > - > - /* Park the AP */ > - debug("parking\n"); > + debug("Unexpected return\n"); > done: > stop_this_cpu(); > } > @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int > *cpu_countp) > return dev->req_seq; > } > > +/** > + * read_callback() - Read the pointer in a callback slot > + * > + * This is called by APs to read their callback slow to see if there is a > + * pointer to new instructions > + * > + * @slot: Pointer to the AP's callback slot > + * @return value of that pointer > + */ > +static struct mp_callback *read_callback(struct mp_callback **slot) > +{ > + struct mp_callback *ret; > + > + asm volatile ("mov %1, %0\n" > + : "=r" (ret) > + : "m" (*slot) > + : "memory" > + );
Why do we need inline assembly here? Prevent compiler reordering the codes ("memory")? Can we use C and additional memory barrier to do this? > + return ret; > +} > + > +/** > + * store_callback() - Store a pointer to the callback slot > + * > + * This is called by APs to write NULL into the callback slot when they have > + * finished the work requested by the BSP. > + * > + * @slot: Pointer to the AP's callback slot > + * @val: Value to write (e.g. NULL) > + */ > +static void store_callback(struct mp_callback **slot, struct mp_callback > *val) > +{ > + asm volatile ("mov %1, %0\n" > + : "=m" (*slot) > + : "r" (val) > + : "memory" > + ); > +} > + > +/** > + * ap_wait_for_instruction() - Wait for and process requests from the main > CPU > + * > + * This is called by APs (here, everything other than the main boot CPU) to > + * await instructions. They arrive in the form of a function call and > argument, > + * which is then called. This uses a simple mailbox with atomic read/set > + * > + * @cpu: CPU that is waiting > + * @unused: Optional argument provided by struct mp_flight_record, not used > here > + * @return Does not return > + */ > +static int ap_wait_for_instruction(struct udevice *cpu, void *unused) > +{ > + struct mp_callback lcb; > + struct mp_callback **per_cpu_slot; > + > + per_cpu_slot = &ap_callbacks[cpu->req_seq]; > + > + while (1) { > + struct mp_callback *cb = read_callback(per_cpu_slot); > + > + if (!cb) { > + asm ("pause"); > + continue; > + } > + > + /* Copy to local variable before using the value */ > + memcpy(&lcb, cb, sizeof(lcb)); > + mfence(); > + if (lcb.logical_cpu_number == MP_SELECT_ALL || > + lcb.logical_cpu_number == MP_SELECT_APS || > + cpu->req_seq == lcb.logical_cpu_number) > + lcb.func(lcb.arg); > + > + /* Indicate we are finished */ > + store_callback(per_cpu_slot, NULL); What happens if at the same time BSP put another job to the callback slot? There is no protection here. > + } > + > + return 0; > +} > + > static int mp_init_cpu(struct udevice *cpu, void *unused) > { > struct cpu_platdata *plat = dev_get_parent_platdata(cpu); > @@ -467,6 +570,7 @@ static int mp_init_cpu(struct udevice *cpu, void *unused) > > static struct mp_flight_record mp_steps[] = { > MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), > + MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), > }; > > int mp_init(void) > @@ -504,6 +608,10 @@ int mp_init(void) > if (ret) > log_warning("Warning: Device tree does not describe all CPUs. > Extra ones will not be started correctly\n"); > > + ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *)); > + if (!ap_callbacks) > + return -ENOMEM; > + > /* Copy needed parameters so that APs have a reference to the plan */ > mp_info.num_records = ARRAY_SIZE(mp_steps); > mp_info.records = mp_steps; > diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h > index 94af819ad9..41b1575f4b 100644 > --- a/arch/x86/include/asm/mp.h > +++ b/arch/x86/include/asm/mp.h > @@ -11,6 +11,17 @@ > #include <asm/atomic.h> > #include <asm/cache.h> > > +enum { > + /* Indicates that the function should run on all CPUs */ > + MP_SELECT_ALL = -1, > + > + /* Run on boot CPUs */ > + MP_SELECT_BSP = -2, > + > + /* Run on non-boot CPUs */ > + MP_SELECT_APS = -3, > +}; > + > typedef int (*mp_callback_t)(struct udevice *cpu, void *arg); > > /* > -- Regards, Bin