On Tue, Jan 28, 2025 at 11:19:15AM +0100, Jerome Forissier wrote:
> Adds the COROUTINES Kconfig symbol which introduces a new internal API
> for coroutines support. As explained in the Kconfig file, this is meant
> to provide some kind of cooperative multi-tasking with the goal to
> improve performance by overlapping lengthy operations.
> 
> The API as well as the implementation is very much inspired from libaco
> [1]. The reference implementation is simplified to remove all things
> not needed in U-Boot, the coding style is updated, and the aco_ prefix
> is replaced by co_.
> 
> I believe the stack handling could be simplified: the stack of the main
> coroutine could probably probably be used by the secondary coroutines
> instead of allocating a new stack dynamically.
> 
> Only i386, x86_64 and aarch64 are supported at the moment. Other
> architectures need to provide a _co_switch() function in assembly.
> 
> Only aarch64 has been tested.
> 
> [1] https://github.com/hnes/libaco/
> 
> Signed-off-by: Jerome Forissier <jerome.foriss...@linaro.org>
> ---
>  arch/arm/cpu/armv8/Makefile    |   1 +
>  arch/arm/cpu/armv8/co_switch.S |  36 +++++++
>  include/coroutines.h           | 130 ++++++++++++++++++++++++++
>  lib/Kconfig                    |  10 ++
>  lib/Makefile                   |   2 +
>  lib/coroutines.c               | 165 +++++++++++++++++++++++++++++++++
>  6 files changed, 344 insertions(+)
>  create mode 100644 arch/arm/cpu/armv8/co_switch.S
>  create mode 100644 include/coroutines.h
>  create mode 100644 lib/coroutines.c
> 
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index 2e71ff2dc97..6d07b6aa9f9 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
>  obj-$(CONFIG_XEN) += xen/
>  obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
>  obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
> +obj-$(CONFIG_COROUTINES) += co_switch.o
> diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S
> new file mode 100644
> index 00000000000..4405e89ec56
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/co_switch.S
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/* void _co_switch(struct uco *from_co, struct uco *to_co); */
> +.text
> +.globl _co_switch
> +.type  _co_switch, @function
> +_co_switch:
> +    // x0: from_co
> +    // x1: to_co
> +    // from_co and to_co layout: { pc, sp, x19-x29 }
> +
> +    // Save context to from_co (x0)
> +    // AAPCS64 says "A subroutine invocation must preserve the contents of 
> the
> +    // registers r19-r29 and SP"
> +    adr x2, 1f  // pc we should use to resume after this function
> +    mov x3, sp
> +    stp x2, x3, [x0, #0]  // pc, sp
> +    stp x19, x20, [x0, #16]
> +    stp x21, x22, [x0, #32]
> +    stp x23, x24, [x0, #48]
> +    stp x25, x26, [x0, #64]
> +    stp x27, x28, [x0, #80]
> +    stp x29, x30, [x0, #96]
> +
> +    // Load new context from to_co (x1)
> +    ldp x2, x3, [x1, #0]  // pc, sp
> +    ldp x19, x20, [x1, #16]
> +    ldp x21, x22, [x1, #32]
> +    ldp x23, x24, [x1, #48]
> +    ldp x25, x26, [x1, #64]
> +    ldp x27, x28, [x1, #80]
> +    ldp x29, x30, [x1, #96]
> +    mov sp, x3
> +    br x2
> +
> +1:  // Return to the caller
> +    ret

We've done similar context switching in setjmp/longjmp. Is it possible
to unify this part and get rid of the duplicated assembly for each
architecture?

The jmp_buf structure is actually non-opaque to the caller through
jmp_buf_data, thus I believe this logic could be rewritten in C with
setjmp/longjmp(),

        if (!setjmp(from_co))
                longjmp(to_co);
        else
                return;

and replace co.regs with jmp_buf_data.

btw, I guess the jmp_buf_data type is kept for historical usage in EFI
implementation, but looking through our EFI code it seems the details of
jmp_buf aren't used anymore now. So maybe it's the time to clean up and
make it a better context-switching API as well.

> diff --git a/include/coroutines.h b/include/coroutines.h
> new file mode 100644
> index 00000000000..b85b656127c
> --- /dev/null
> +++ b/include/coroutines.h
> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */
> +/*
> + * Copyright 2018 Sen Han <00h...@gmail.com>
> + * Copyright 2025 Linaro Limited
> + */
> +
> +#ifndef _COROUTINES_H_
> +#define _COROUTINES_H_
> +
> +#ifndef CONFIG_COROUTINES
> +
> +static inline void co_yield(void) {}
> +static inline void co_exit(void) {}
> +
> +#else
> +
> +#ifdef __UBOOT__
> +#include <log.h>
> +#else
> +#include <assert.h>
> +#endif
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <time.h>
> +
> +#ifdef __aarch64__
> +#define CO_REG_IDX_RETADDR 0
> +#define CO_REG_IDX_SP 1
> +#else
> +#error Architecture no supported
> +#endif
> +
> +struct co_save_stack {
> +    void*  ptr;
> +    size_t sz;
> +    size_t valid_sz;
> +    size_t max_cpsz; /* max copy size in bytes */
> +};
> +
> +struct co_stack {
> +    void *ptr;
> +    size_t sz;
> +    void *align_highptr;
> +    void *align_retptr;
> +    size_t align_validsz;
> +    size_t align_limit;
> +    struct co *owner;
> +    void *real_ptr;
> +    size_t real_sz;
> +};
> +
> +struct co {
> +     /* CPU state: callee-saved registers plus SP and PC */
> +     void *reg[14];  // pc, sp, x19-x29, x30 (lr)
> +
> +     struct co *main_co;
> +     void *arg;
> +     bool done;
> +
> +     void (*fp)(void);
> +
> +     struct co_save_stack save_stack;
> +     struct co_stack *stack;
> +};
> +
> +extern struct co *current_co;
> +
> +static inline struct co *co_get_co(void)
> +{
> +     return current_co;
> +}
> +
> +static inline void *co_get_arg(void)
> +{
> +     return co_get_co()->arg;
> +}
> +
> +struct co_stack *co_stack_new(size_t sz);
> +
> +void co_stack_destroy(struct co_stack *s);
> +
> +struct co *co_create(struct co *main_co,
> +                  struct co_stack *stack,
> +                  size_t save_stack_sz, void (*fp)(void),
> +                  void *arg);
> +
> +void co_resume(struct co *resume_co);
> +
> +void co_destroy(struct co *co);
> +
> +void *_co_switch(struct co *from_co, struct co *to_co);
> +
> +static inline void _co_yield_to_main_co(struct co *yield_co)
> +{
> +    assert(yield_co);
> +    assert(yield_co->main_co);
> +    _co_switch(yield_co, yield_co->main_co);
> +}
> +
> +static inline void co_yield(void)
> +{
> +     if (current_co)
> +             _co_yield_to_main_co(current_co);
> +}
> +
> +static inline bool co_is_main_co(struct co *co)
> +{
> +     return !co->main_co;
> +}
> +
> +static inline void co_exit(void)
> +{
> +     struct co *co = co_get_co();
> +
> +     if (!co)
> +             return;
> +     co->done = true;
> +     assert(co->stack->owner == co);
> +     co->stack->owner = NULL;
> +     co->stack->align_validsz = 0;
> +     _co_yield_to_main_co(co);
> +     assert(false);
> +}
> +
> +#endif /* CONFIG_COROUTINES */
> +#endif /* _COROUTINES_H_ */
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 8f1a96d98c4..b6c1380b927 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ
>         enable this config option to distinguish them using
>         phandles in fdtdec_get_alias_seq() function.
>  
> +config COROUTINES
> +     bool "Enable coroutine support"
> +     help
> +       Coroutines allow to implement a simple form of cooperative
> +       multi-tasking. The main thread of execution registers one or
> +       more functions as coroutine entry points, then it schedules one
> +       of them. At any point the scheduled coroutine may yield, that is,
> +       suspend its execution and return back to the main thread. At this
> +       point another coroutine may be scheduled and so on until all the
> +       registered coroutines are done.
>  endmenu
>  
>  source "lib/fwu_updates/Kconfig"
> diff --git a/lib/Makefile b/lib/Makefile
> index 5cb3278d2ef..7b809151f5a 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o
>  
>  obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
>  
> +obj-$(CONFIG_COROUTINES) += coroutines.o
> +
>  #
>  # Build a fast OID lookup registry from include/linux/oid_registry.h
>  #
> diff --git a/lib/coroutines.c b/lib/coroutines.c
> new file mode 100644
> index 00000000000..20c5aba5510
> --- /dev/null
> +++ b/lib/coroutines.c
> @@ -0,0 +1,165 @@
> +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
> +
> +// Copyright 2018 Sen Han <00h...@gmail.com>
> +// Copyright 2025 Linaro Limited
> +
> +#include <coroutines.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +
> +
> +/* Current co-routine */
> +struct co *current_co;
> +
> +struct co_stack *co_stack_new(size_t sz)
> +{
> +     struct co_stack *p = calloc(1, sizeof(*p));
> +     uintptr_t u_p;
> +
> +     if (!p)
> +             return NULL;
> +
> +     if (sz < 4096)
> +             sz = 4096;
> +
> +     p->sz = sz;
> +     p->ptr = malloc(sz);
> +     if (!p->ptr) {
> +             free(p);
> +             return NULL;
> +     }
> +
> +     p->owner = NULL;
> +     u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
> +     u_p = (u_p >> 4) << 4;
> +     p->align_highptr = (void*)u_p;
> +     p->align_retptr  = (void*)(u_p - sizeof(void*));
> +     assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
> +     p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
> +
> +     return p;
> +}
> +
> +void co_stack_destroy(struct co_stack *s){
> +     if (!s)
> +             return;
> +     free(s->ptr);
> +     free(s);
> +}
> +
> +struct co *co_create(struct co *main_co,
> +                  struct co_stack *stack,
> +                  size_t save_stack_sz,
> +                  void (*fp)(void), void *arg)
> +{
> +     struct co *p = malloc(sizeof(*p));
> +     assert(p);
> +     memset(p, 0, sizeof(*p));
> +
> +     if (main_co) {
> +             assert(stack);
> +             p->stack = stack;
> +             p->reg[CO_REG_IDX_RETADDR] = (void *)fp;
> +             // FIXME original code uses align_retptr; causes a crash
> +             p->reg[CO_REG_IDX_SP] = p->stack->align_highptr;
> +             p->main_co = main_co;
> +             p->arg = arg;
> +             p->fp = fp;
> +             if (!save_stack_sz)
> +                     save_stack_sz = 64;
> +             p->save_stack.ptr = malloc(save_stack_sz);
> +             assert(p->save_stack.ptr);
> +             p->save_stack.sz = save_stack_sz;
> +             p->save_stack.valid_sz = 0;
> +     } else {
> +             p->main_co = NULL;
> +             p->arg = arg;
> +             p->fp = fp;
> +             p->stack = NULL;
> +             p->save_stack.ptr = NULL;
> +     }
> +     return p;
> +}
> +
> +static void grab_stack(struct co *resume_co)
> +{
> +     struct co *owner_co = resume_co->stack->owner;
> +
> +     if (owner_co) {
> +             assert(owner_co->stack == resume_co->stack);
> +             assert((uintptr_t)(owner_co->stack->align_retptr) >=
> +                    (uintptr_t)(owner_co->reg[CO_REG_IDX_SP]));
> +             assert((uintptr_t)owner_co->stack->align_highptr -
> +                             (uintptr_t)owner_co->stack->align_limit
> +                     <= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]);
> +             owner_co->save_stack.valid_sz =
> +                     (uintptr_t)owner_co->stack->align_retptr -
> +                     (uintptr_t)owner_co->reg[CO_REG_IDX_SP];
> +             if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
> +                     free(owner_co->save_stack.ptr);
> +                     owner_co->save_stack.ptr = NULL;
> +                     do {
> +                             owner_co->save_stack.sz <<= 1;
> +                             assert(owner_co->save_stack.sz > 0);
> +                     } while (owner_co->save_stack.sz <
> +                              owner_co->save_stack.valid_sz);
> +                     owner_co->save_stack.ptr =
> +                             malloc(owner_co->save_stack.sz);
> +                     assert(owner_co->save_stack.ptr);
> +             }
> +             if (owner_co->save_stack.valid_sz > 0)
> +                     memcpy(owner_co->save_stack.ptr,
> +                            owner_co->reg[CO_REG_IDX_SP],
> +                            owner_co->save_stack.valid_sz);
> +             if (owner_co->save_stack.valid_sz >
> +                 owner_co->save_stack.max_cpsz)
> +                     owner_co->save_stack.max_cpsz =
> +                             owner_co->save_stack.valid_sz;
> +             owner_co->stack->owner = NULL;
> +             owner_co->stack->align_validsz = 0;
> +     }
> +     assert(!resume_co->stack->owner);
> +     assert(resume_co->save_stack.valid_sz <=
> +            resume_co->stack->align_limit - sizeof(void *));
> +     if (resume_co->save_stack.valid_sz > 0)
> +             memcpy((void*)
> +                    (uintptr_t)(resume_co->stack->align_retptr) -
> +                             resume_co->save_stack.valid_sz,
> +                    resume_co->save_stack.ptr,
> +                    resume_co->save_stack.valid_sz);
> +     if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
> +             resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
> +     resume_co->stack->align_validsz =
> +             resume_co->save_stack.valid_sz + sizeof(void *);
> +     resume_co->stack->owner = resume_co;
> +}
> +
> +void co_resume(struct co *resume_co)
> +{
> +     assert(resume_co && resume_co->main_co && !resume_co->done);
> +
> +     if (resume_co->stack->owner != resume_co)
> +             grab_stack(resume_co);
> +
> +     current_co = resume_co;
> +     _co_switch(resume_co->main_co, resume_co);
> +     current_co = resume_co->main_co;
> +}
> +
> +void co_destroy(struct co *co){
> +     if (!co)
> +             return;
> +
> +     if(co_is_main_co(co)){
> +             free(co);
> +             current_co = NULL;
> +     } else {
> +             if(co->stack->owner == co){
> +                     co->stack->owner = NULL;
> +                     co->stack->align_validsz = 0;
> +             }
> +             free(co->save_stack.ptr);
> +             co->save_stack.ptr = NULL;
> +             free(co);
> +     }
> +}
> -- 
> 2.43.0
> 

Reply via email to