Hi Bin, On Sun, 28 Jun 2020 at 00:25, Bin Meng <bmeng...@gmail.com> wrote: > > Hi Simon, > > On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <s...@chromium.org> wrote: > > > > At present the APs (non-boot CPUs) are inited once and then parked ready > > for the OS to use them. However in some cases we want to send new requests > > through, such as to change MTRRs and keep them consistent across CPUs. > > > > Change the last state of the flight plan to go into a wait loop, accepting > > instructions from the main CPU. > > > > Signed-off-by: Simon Glass <s...@chromium.org> > > --- > > > > Changes in v2: > > - Add more comments > > > > arch/x86/cpu/mp_init.c | 126 +++++++++++++++++++++++++++++++++++--- > > arch/x86/include/asm/mp.h | 11 ++++ > > 2 files changed, 128 insertions(+), 9 deletions(-) > > > > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c > > index cd4cae559d..673fdc9628 100644 > > --- a/arch/x86/cpu/mp_init.c > > +++ b/arch/x86/cpu/mp_init.c > > @@ -43,13 +43,38 @@ struct mp_flight_plan { > > struct mp_flight_record *records; > > }; > > > > +/** > > + * struct mp_callback - Callback information for APs > > + * > > + * @func: Function to run > > + * @arg: Argument to pass to the function > > + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a special > > + * value like MP_SELECT_BSP. It tells the AP whether it should process this > > + * callback > > + */ > > +struct mp_callback { > > + /** > > + * func() - Function to call on the AP > > + * > > + * @arg: Argument to pass > > + */ > > + void (*func)(void *arg); > > + void *arg; > > + int logical_cpu_number; > > +}; > > + > > static struct mp_flight_plan mp_info; > > > > -struct cpu_map { > > - struct udevice *dev; > > - int apic_id; > > - int err_code; > > -}; > > +/* > > + * ap_callbacks - Callback mailbox array > > + * > > + * Array of callback, one entry for each available CPU, indexed by the CPU > > + * number, which is dev->req_seq. The entry for the main CPU is never used. > > + * When this is NULL, there is no pending work for the CPU to run. When > > + * non-NULL it points to the mp_callback structure. This is shared between all > > + * CPUs, so should only be written by the main CPU. > > + */ > > +static struct mp_callback **ap_callbacks; > > > > static inline void barrier_wait(atomic_t *b) > > { > > @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index) > > debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, > > dev ? dev->name : "(apic_id not found)"); > > > > - /* Walk the flight plan */ > > + /* Walk the flight plan, which never returns */ > > ap_do_flight_plan(dev); > > - > > - /* Park the AP */ > > - debug("parking\n"); > > + debug("Unexpected return\n"); > > done: > > stop_this_cpu(); > > } > > @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int *cpu_countp) > > return dev->req_seq; > > } > > > > +/** > > + * read_callback() - Read the pointer in a callback slot > > + * > > + * This is called by APs to read their callback slow to see if there is a > > + * pointer to new instructions > > + * > > + * @slot: Pointer to the AP's callback slot > > + * @return value of that pointer > > + */ > > +static struct mp_callback *read_callback(struct mp_callback **slot) > > +{ > > + struct mp_callback *ret; > > + > > + asm volatile ("mov %1, %0\n" > > + : "=r" (ret) > > + : "m" (*slot) > > + : "memory" > > + ); > > Why do we need inline assembly here? Prevent compiler reordering the > codes ("memory")? Can we use C and additional memory barrier to do > this?
Yes that should work. Will fix. > > > + return ret; > > +} > > + > > +/** > > + * store_callback() - Store a pointer to the callback slot > > + * > > + * This is called by APs to write NULL into the callback slot when they have > > + * finished the work requested by the BSP. > > + * > > + * @slot: Pointer to the AP's callback slot > > + * @val: Value to write (e.g. NULL) > > + */ > > +static void store_callback(struct mp_callback **slot, struct mp_callback *val) > > +{ > > + asm volatile ("mov %1, %0\n" > > + : "=m" (*slot) > > + : "r" (val) > > + : "memory" > > + ); > > +} > > + > > +/** > > + * ap_wait_for_instruction() - Wait for and process requests from the main CPU > > + * > > + * This is called by APs (here, everything other than the main boot CPU) to > > + * await instructions. They arrive in the form of a function call and argument, > > + * which is then called. This uses a simple mailbox with atomic read/set > > + * > > + * @cpu: CPU that is waiting > > + * @unused: Optional argument provided by struct mp_flight_record, not used here > > + * @return Does not return > > + */ > > +static int ap_wait_for_instruction(struct udevice *cpu, void *unused) > > +{ > > + struct mp_callback lcb; > > + struct mp_callback **per_cpu_slot; > > + > > + per_cpu_slot = &ap_callbacks[cpu->req_seq]; > > + > > + while (1) { > > + struct mp_callback *cb = read_callback(per_cpu_slot); > > + > > + if (!cb) { > > + asm ("pause"); > > + continue; > > + } > > + > > + /* Copy to local variable before using the value */ > > + memcpy(&lcb, cb, sizeof(lcb)); > > + mfence(); > > + if (lcb.logical_cpu_number == MP_SELECT_ALL || > > + lcb.logical_cpu_number == MP_SELECT_APS || > > + cpu->req_seq == lcb.logical_cpu_number) > > + lcb.func(lcb.arg); > > + > > + /* Indicate we are finished */ > > + store_callback(per_cpu_slot, NULL); > > What happens if at the same time BSP put another job to the callback > slot? There is no protection here. The BSP is in control of sending jobs to the APs and does not return from that function (in a later patch) until all APs have finished. So at present it is not possible for the BSP to start another request on the same AP. Regards, Simon