Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

Meng Xu Mon, 15 Feb 2016 19:57:38 -0800

Hi Tianyang,

Thanks for the patch! Great work and really quick action! :-)
I will just comment on something I quickly find out and would look
forwarding to Dario's comment.


On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <ti...@seas.upenn.edu> wrote:
> Changes since v4:
>     removed unnecessary replenishment queue checks in vcpu_wake()
>     extended replq_remove() to all cases in vcpu_sleep()
>     used _deadline_queue_insert() helper function for both queues
>     _replq_insert() and _replq_remove() program timer internally
>
> Changes since v3:
>     removed running queue.
>     added repl queue to keep track of repl events.
>     timer is now per scheduler.
>     timer is init on a valid cpu in a cpupool.
>
> Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
> Signed-off-by: Meng Xu <men...@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
> ---
>  xen/common/sched_rt.c |  337
++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 251 insertions(+), 86 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -16,6 +16,7 @@
>  #include <xen/delay.h>
>  #include <xen/event.h>
>  #include <xen/time.h>
> +#include <xen/timer.h>
>  #include <xen/perfc.h>
>  #include <xen/sched-if.h>
>  #include <xen/softirq.h>
> @@ -87,7 +88,7 @@
>  #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
>
>  #define UPDATE_LIMIT_SHIFT      10
> -#define MAX_SCHEDULE            (MILLISECS(1))
> +
>  /*
>   * Flags
>   */
> @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
>   */
>  static unsigned int nr_rt_ops;
>
> +/* handler for the replenishment timer */
> +static void repl_handler(void *data);
> +
> +/* checks if a timer is active or not */
> +bool_t active_timer(struct timer* t);
> +
>  /*
>   * Systme-wide private data, include global RunQueue/DepletedQ
>   * Global lock is referenced by schedule_data.schedule_lock from all
> @@ -152,7 +159,9 @@ struct rt_private {
>      struct list_head sdom;      /* list of availalbe domains, used for
dump */
>      struct list_head runq;      /* ordered list of runnable vcpus */
>      struct list_head depletedq; /* unordered list of depleted vcpus */
> +    struct list_head replq;     /* ordered list of vcpus that need
replenishment */
>      cpumask_t tickled;          /* cpus been tickled */
> +    struct timer *repl_timer;   /* replenishment timer */
>  };
>
>  /*
> @@ -160,6 +169,7 @@ struct rt_private {
>   */
>  struct rt_vcpu {
>      struct list_head q_elem;    /* on the runq/depletedq list */
> +    struct list_head replq_elem;/* on the repl event list */
>
>      /* Up-pointers */
>      struct rt_dom *sdom;
> @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const
struct scheduler *ops)
>      return &rt_priv(ops)->depletedq;
>  }
>
> +static inline struct list_head *rt_replq(const struct scheduler *ops)
> +{
> +    return &rt_priv(ops)->replq;
> +}
> +
>  /*
> - * Queue helper functions for runq and depletedq
> + * Queue helper functions for runq, depletedq
> + * and replenishment event queue
>   */
>  static int
>  __vcpu_on_q(const struct rt_vcpu *svc)
> @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
>      return list_entry(elem, struct rt_vcpu, q_elem);
>  }
>
> +static struct rt_vcpu *
> +__replq_elem(struct list_head *elem)
> +{
> +    return list_entry(elem, struct rt_vcpu, replq_elem);
> +}
> +
> +static int
> +__vcpu_on_replq(const struct rt_vcpu *svc)
> +{
> +   return !list_empty(&svc->replq_elem);
> +}
> +
>  /*
>   * Debug related code, dump vcpu/cpu information
>   */
> @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
>  static void
>  rt_dump(const struct scheduler *ops)
>  {
> -    struct list_head *runq, *depletedq, *iter;
> +    struct list_head *runq, *depletedq, *replq, *iter;
>      struct rt_private *prv = rt_priv(ops);
>      struct rt_vcpu *svc;
>      struct rt_dom *sdom;
> @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>
>      runq = rt_runq(ops);
>      depletedq = rt_depletedq(ops);
> +    replq = rt_replq(ops);
>
>      printk("Global RunQueue info:\n");
>      list_for_each( iter, runq )
> @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
>          rt_dump_vcpu(ops, svc);
>      }
>
> +    printk("Global Replenishment Event info:\n");
> +    list_for_each( iter, replq )
> +    {
> +        svc = __replq_elem(iter);
> +        rt_dump_vcpu(ops, svc);
> +    }
> +
>      printk("Domain info:\n");
>      list_for_each( iter, &prv->sdom )
>      {
> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>  }
>
>  /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer* repl_timer = prv->repl_timer;
> +
> +    if ( __vcpu_on_replq(svc) )
> +    {
> +        /*
> +         * disarm the timer if removing the first replenishment event
> +         * which is going to happen next
> +         */
> +        if( active_timer(repl_timer) )
> +        {
> +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> +            if( next_repl->cur_deadline == svc->cur_deadline )
> +                repl_timer->expires = 0;
> +
> +            list_del_init(&svc->replq_elem);
> +
> +            /* re-arm the timer for the next replenishment event */
> +            if( !list_empty(replq) )
> +            {
> +                struct rt_vcpu *svc_next = __replq_elem(replq->next);
> +                set_timer(repl_timer, svc_next->cur_deadline);
> +            }
> +        }
> +

This blank line should go away.. It is quite misleading. At very first
glance, I thought it is the else for if ( __vcpu_on_replq(svc) );
But after second read, I found it is actually for the if(
active_timer(repl_timer) )

> +        else
> +            list_del_init(&svc->replq_elem);
> +    }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head
*elem),
> +    struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
> +{
> +    struct list_head *iter;
> +
> +    list_for_each(iter, queue)
> +    {
> +        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> +        if ( svc->cur_deadline <= iter_svc->cur_deadline )
> +                break;
> +    }
> +
> +    list_add_tail(elem, iter);
> +}
> +
> +/*
>   * Insert svc with budget in RunQ according to EDF:
>   * vcpus with smaller deadlines go first.
>   * Insert svc without budget in DepletedQ unsorted;
> @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct
rt_vcpu *svc)
>  {
>      struct rt_private *prv = rt_priv(ops);
>      struct list_head *runq = rt_runq(ops);
> -    struct list_head *iter;
>
>      ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct
rt_vcpu *svc)
>
>      /* add svc to runq if svc still has budget */
>      if ( svc->cur_budget > 0 )
> -    {
> -        list_for_each(iter, runq)
> -        {
> -            struct rt_vcpu * iter_svc = __q_elem(iter);
> -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> -                    break;
> -         }
> -        list_add_tail(&svc->q_elem, iter);
> -    }
> +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
>      else
> -    {
>          list_add(&svc->q_elem, &prv->depletedq);
> -    }
>  }
>
>  /*
> + * Insert svc into the repl even list:
> + * vcpus that needs to be repl earlier go first.
> + * scheduler private lock serializes this operation
> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. Also, this is used to program
> + * the timer for the first time.
> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct list_head *replq = rt_replq(ops);
> +    struct rt_private *prv = rt_priv(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +
> +    ASSERT( !__vcpu_on_replq(svc) );
> +
> +    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
> +
> +    if( repl_timer->expires == 0 ||
> +        ( active_timer(repl_timer) && repl_timer->expires >
svc->cur_deadline ) )
> +        set_timer(repl_timer,svc->cur_deadline);
> +}
> +
> +
> +/*
>   * Init/Free related code
>   */
>  static int
> @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
>      INIT_LIST_HEAD(&prv->sdom);
>      INIT_LIST_HEAD(&prv->runq);
>      INIT_LIST_HEAD(&prv->depletedq);
> +    INIT_LIST_HEAD(&prv->replq);
>
>      cpumask_clear(&prv->tickled);
>
>      ops->sched_data = prv;
>
> +    /*
> +     * The timer initialization will happen later when
> +     * the first pcpu is added to this pool in alloc_pdata
> +     */
> +    prv->repl_timer = NULL;
> +
>      return 0;
>
>   no_mem:
> @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
>          xfree(_cpumask_scratch);
>          _cpumask_scratch = NULL;
>      }
> +
> +    kill_timer(prv->repl_timer);
> +    xfree(prv->repl_timer);
> +
>      xfree(prv);
>  }
>
> @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
>      if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
>          return NULL;
>
> +    if( prv->repl_timer == NULL )
> +    {
> +        /* allocate the timer on the first cpu of this pool */
> +        prv->repl_timer = xzalloc(struct timer);
> +
> +        if(prv->repl_timer == NULL )
> +            return NULL;
> +
> +        init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
> +    }
> +
>      /* 1 indicates alloc. succeed in schedule.c */
>      return (void *)1;
>  }
> @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct
vcpu *vc, void *dd)
>          return NULL;
>
>      INIT_LIST_HEAD(&svc->q_elem);
> +    INIT_LIST_HEAD(&svc->replq_elem);
>      svc->flags = 0U;
>      svc->sdom = dd;
>      svc->vcpu = vc;
> @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
>  }
>
>  /*
> - * This function is called in sched_move_domain() in schedule.c
> + * It is called in sched_move_domain() and sched_init_vcpu
> + * in schedule.c
>   * When move a domain to a new cpupool.
>   * It inserts vcpus of moving domain to the scheduler's RunQ in
>   * dest. cpupool.
> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct
vcpu *vc)
>      lock = vcpu_schedule_lock_irq(vc);
>      if ( __vcpu_on_q(svc) )
>          __q_remove(svc);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops,svc);
> +
>      vcpu_schedule_unlock_irq(lock, vc);
>  }
>
> @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const
cpumask_t *mask)
>  }
>
>  /*
> - * Update vcpu's budget and
> - * sort runq by insert the modifed vcpu back to runq
> - * lock is grabbed before calling this function
> - */
> -static void
> -__repl_update(const struct scheduler *ops, s_time_t now)
> -{
> -    struct list_head *runq = rt_runq(ops);
> -    struct list_head *depletedq = rt_depletedq(ops);
> -    struct list_head *iter;
> -    struct list_head *tmp;
> -    struct rt_vcpu *svc = NULL;
> -
> -    list_for_each_safe(iter, tmp, runq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now < svc->cur_deadline )
> -            break;
> -
> -        rt_update_deadline(now, svc);
> -        /* reinsert the vcpu if its deadline is updated */
> -        __q_remove(svc);
> -        __runq_insert(ops, svc);
> -    }
> -
> -    list_for_each_safe(iter, tmp, depletedq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now >= svc->cur_deadline )
> -        {
> -            rt_update_deadline(now, svc);
> -            __q_remove(svc); /* remove from depleted queue */
> -            __runq_insert(ops, svc); /* add to runq */
> -        }
> -    }
> -}
> -
> -/*
>   * schedule function for rt scheduler.
>   * The lock is already grabbed in schedule.c, no need to lock here
>   */
> @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>      /* burn_budget would return for IDLE VCPU */
>      burn_budget(ops, scurr, now);
>
> -    __repl_update(ops, now);
>
>      if ( tasklet_work_scheduled )
>      {
> @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>          set_bit(__RTDS_delayed_runq_add, &scurr->flags);
>
>      snext->last_start = now;
> +
> +    ret.time =  -1; /* if an idle vcpu is picked */
>      if ( !is_idle_vcpu(snext->vcpu) )
>      {
>          if ( snext != scurr )
> @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>              snext->vcpu->processor = cpu;
>              ret.migrated = 1;
>          }
> +
> +        ret.time = snext->budget; /* invoke the scheduler next time */
> +
>      }
>
> -    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
>      ret.task = snext->vcpu;
>
>      /* TRACE */
> @@ -914,7 +1017,7 @@ static void
>  rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>  {
>      struct rt_vcpu * const svc = rt_vcpu(vc);
> -
> +

Next patch should avoid this. You may have some trailing space in the line.
git has some command to mark the trailing whitespace.
You can have a look at this post:
http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output

>      BUG_ON( is_idle_vcpu(vc) );
>      SCHED_STAT_CRANK(vcpu_sleep);
>
> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct
vcpu *vc)
>          __q_remove(svc);
>      else if ( svc->flags & RTDS_delayed_runq_add )
>          clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops, svc);
>  }
>
>  /*
> @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>  {
>      struct rt_vcpu * const svc = rt_vcpu(vc);
>      s_time_t now = NOW();
> -    struct rt_private *prv = rt_priv(ops);
> -    struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
> -    struct rt_dom *sdom = NULL;
> -    cpumask_t *online;
>
>      BUG_ON( is_idle_vcpu(vc) );
>
> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>      else
>          SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>
> +    /* budget repl here is needed before inserting back to runq. If so,
> +     * it should be re-inserted back to the replenishment queue.
> +     */
> +    if ( now >= svc->cur_deadline)
> +    {
> +        rt_update_deadline(now, svc);
> +        __replq_remove(ops, svc);
> +    }
> +
> +    if( !__vcpu_on_replq(svc) )
> +        __replq_insert(ops, svc);
> +
>      /* If context hasn't been saved for this vcpu yet, we can't put it on
>       * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
>       * put on the Runqueue/DepletedQ after the context has been saved.
> @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>          return;
>      }
>
> -    if ( now >= svc->cur_deadline)
> -        rt_update_deadline(now, svc);
> -
>      /* insert svc to runq/depletedq because svc is not in queue now */
>      __runq_insert(ops, svc);
>
> -    __repl_update(ops, now);
> -
> -    ASSERT(!list_empty(&prv->sdom));
> -    sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -    online = cpupool_domain_cpumask(sdom->dom);
> -    snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus
*/
> -
> -    runq_tickle(ops, snext);
> -
> -    return;
> +    runq_tickle(ops, svc);
>  }
>
>  /*
> @@ -1087,10 +1189,6 @@ static void
>  rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
>  {
>      struct rt_vcpu *svc = rt_vcpu(vc);
> -    struct rt_vcpu *snext = NULL;
> -    struct rt_dom *sdom = NULL;
> -    struct rt_private *prv = rt_priv(ops);
> -    cpumask_t *online;
>      spinlock_t *lock = vcpu_schedule_lock_irq(vc);
>
>      clear_bit(__RTDS_scheduled, &svc->flags);
> @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops,
struct vcpu *vc)
>           likely(vcpu_runnable(vc)) )
>      {
>          __runq_insert(ops, svc);
> -        __repl_update(ops, NOW());
> -
> -        ASSERT(!list_empty(&prv->sdom));
> -        sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -        online = cpupool_domain_cpumask(sdom->dom);
> -        snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
> -
> -        runq_tickle(ops, snext);
> +        runq_tickle(ops, svc);
>      }
>  out:
>      vcpu_schedule_unlock_irq(lock, vc);
> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
>      return rc;
>  }
>
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> +    unsigned long flags;
> +    s_time_t now = NOW();
> +    struct scheduler *ops = data;
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +    struct list_head *iter, *tmp;
> +    struct rt_vcpu *svc = NULL;
> +
> +    spin_lock_irqsave(&prv->lock, flags);
> +
> +    stop_timer(repl_timer);
> +
> +    list_for_each_safe(iter, tmp, replq)
> +    {
> +        svc = __replq_elem(iter);
> +
> +        if ( now >= svc->cur_deadline )
> +        {
> +            rt_update_deadline(now, svc);
> +
> +            /*
> +             * when the replenishment happens
> +             * svc is either on a pcpu or on
> +             * runq/depletedq
> +             */
> +            if( __vcpu_on_q(svc) )
> +            {
> +                /* put back to runq */
> +                __q_remove(svc);
> +                __runq_insert(ops, svc);
> +            }
> +
> +            /*
> +             * tickle regardless where it's at
> +             * because a running vcpu could have
> +             * a later deadline than others after
> +             * replenishment
> +             */
> +            runq_tickle(ops, svc);
> +
> +            /* update replenishment event queue */
> +            __replq_remove(ops, svc);
> +            __replq_insert(ops, svc);
> +        }
> +

This white line should be removed...

> +        else
> +            break;
> +    }
> +
> +    /*
> +     * use the vcpu that's on the top
> +     * or else don't program the timer
> +     */
> +    if( !list_empty(replq) )
> +        set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
> +
> +    spin_unlock_irqrestore(&prv->lock, flags);
> +
> +}
> +
> +/* checks if a timer has been stopped or not */
> +bool_t active_timer(struct timer *timer)
> +{
> +    ASSERT(timer->status >= TIMER_STATUS_inactive);
> +    ASSERT(timer->status <= TIMER_STATUS_in_list);
> +    return (timer->status >= TIMER_STATUS_in_heap);
> +}
> +
>  static struct rt_private _rt_priv;
>
>  static const struct scheduler sched_rtds_def = {
> --
> 1.7.9.5
>

Thanks and best regards,

Meng

-- 


-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

Reply via email to