Hi Tianyang, Thanks for the patch! Great work and really quick action! :-) I will just comment on something I quickly find out and would look forwarding to Dario's comment.
On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <ti...@seas.upenn.edu> wrote: > Changes since v4: > removed unnecessary replenishment queue checks in vcpu_wake() > extended replq_remove() to all cases in vcpu_sleep() > used _deadline_queue_insert() helper function for both queues > _replq_insert() and _replq_remove() program timer internally > > Changes since v3: > removed running queue. > added repl queue to keep track of repl events. > timer is now per scheduler. > timer is init on a valid cpu in a cpupool. > > Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu> > Signed-off-by: Meng Xu <men...@cis.upenn.edu> > Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu> > --- > xen/common/sched_rt.c | 337 ++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 251 insertions(+), 86 deletions(-) > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > index 2e5430f..1f0bb7b 100644 > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -16,6 +16,7 @@ > #include <xen/delay.h> > #include <xen/event.h> > #include <xen/time.h> > +#include <xen/timer.h> > #include <xen/perfc.h> > #include <xen/sched-if.h> > #include <xen/softirq.h> > @@ -87,7 +88,7 @@ > #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) > > #define UPDATE_LIMIT_SHIFT 10 > -#define MAX_SCHEDULE (MILLISECS(1)) > + > /* > * Flags > */ > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch; > */ > static unsigned int nr_rt_ops; > > +/* handler for the replenishment timer */ > +static void repl_handler(void *data); > + > +/* checks if a timer is active or not */ > +bool_t active_timer(struct timer* t); > + > /* > * Systme-wide private data, include global RunQueue/DepletedQ > * Global lock is referenced by schedule_data.schedule_lock from all > @@ -152,7 +159,9 @@ struct rt_private { > struct list_head sdom; /* list of availalbe domains, used for dump */ > struct list_head runq; /* ordered list of runnable vcpus */ > struct list_head depletedq; /* unordered list of depleted vcpus */ > + struct list_head replq; /* ordered list of vcpus that need replenishment */ > cpumask_t tickled; /* cpus been tickled */ > + struct timer *repl_timer; /* replenishment timer */ > }; > > /* > @@ -160,6 +169,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Queue helper functions for runq, depletedq > + * and replenishment event queue > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +__replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +__vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = __replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > } > > /* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if the timer is currently active > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + if ( __vcpu_on_replq(svc) ) > + { > + /* > + * disarm the timer if removing the first replenishment event > + * which is going to happen next > + */ > + if( active_timer(repl_timer) ) > + { > + struct rt_vcpu *next_repl = __replq_elem(replq->next); > + > + if( next_repl->cur_deadline == svc->cur_deadline ) > + repl_timer->expires = 0; > + > + list_del_init(&svc->replq_elem); > + > + /* re-arm the timer for the next replenishment event */ > + if( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = __replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + } > + This blank line should go away.. It is quite misleading. At very first glance, I thought it is the else for if ( __vcpu_on_replq(svc) ); But after second read, I found it is actually for the if( active_timer(repl_timer) ) > + else > + list_del_init(&svc->replq_elem); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF) > + */ > +static void > +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + > + list_for_each(iter, queue) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + } > + > + list_add_tail(elem, iter); > +} > + > +/* > * Insert svc with budget in RunQ according to EDF: > * vcpus with smaller deadlines go first. > * Insert svc without budget in DepletedQ unsorted; > @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > > @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > - } > } > > /* > + * Insert svc into the repl even list: > + * vcpus that needs to be repl earlier go first. > + * scheduler private lock serializes this operation > + * it could re-program the timer if it fires later than > + * this vcpu's cur_deadline. Also, this is used to program > + * the timer for the first time. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !__vcpu_on_replq(svc) ); > + > + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq); > + > + if( repl_timer->expires == 0 || > + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) ) > + set_timer(repl_timer,svc->cur_deadline); > +} > + > + > +/* > * Init/Free related code > */ > static int > @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > xfree(prv); > } > > @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if( prv->repl_timer == NULL ) > + { > + /* allocate the timer on the first cpu of this pool */ > + prv->repl_timer = xzalloc(struct timer); > + > + if(prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > > if ( tasklet_work_scheduled ) > { > @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + > + ret.time = snext->budget; /* invoke the scheduler next time */ > + > } > > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > /* TRACE */ > @@ -914,7 +1017,7 @@ static void > rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > - > + Next patch should avoid this. You may have some trailing space in the line. git has some command to mark the trailing whitespace. You can have a look at this post: http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output > BUG_ON( is_idle_vcpu(vc) ); > SCHED_STAT_CRANK(vcpu_sleep); > > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > __q_remove(svc); > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops, svc); > } > > /* > @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > + /* budget repl here is needed before inserting back to runq. If so, > + * it should be re-inserted back to the replenishment queue. > + */ > + if ( now >= svc->cur_deadline) > + { > + rt_update_deadline(now, svc); > + __replq_remove(ops, svc); > + } > + > + if( !__vcpu_on_replq(svc) ) > + __replq_insert(ops, svc); > + > /* If context hasn't been saved for this vcpu yet, we can't put it on > * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > * put on the Runqueue/DepletedQ after the context has been saved. > @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1087,10 +1189,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > out: > vcpu_schedule_unlock_irq(lock, vc); > @@ -1168,6 +1259,80 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); > + > + stop_timer(repl_timer); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = __replq_elem(iter); > + > + if ( now >= svc->cur_deadline ) > + { > + rt_update_deadline(now, svc); > + > + /* > + * when the replenishment happens > + * svc is either on a pcpu or on > + * runq/depletedq > + */ > + if( __vcpu_on_q(svc) ) > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + > + /* > + * tickle regardless where it's at > + * because a running vcpu could have > + * a later deadline than others after > + * replenishment > + */ > + runq_tickle(ops, svc); > + > + /* update replenishment event queue */ > + __replq_remove(ops, svc); > + __replq_insert(ops, svc); > + } > + This white line should be removed... > + else > + break; > + } > + > + /* > + * use the vcpu that's on the top > + * or else don't program the timer > + */ > + if( !list_empty(replq) ) > + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > + > +} > + > +/* checks if a timer has been stopped or not */ > +bool_t active_timer(struct timer *timer) > +{ > + ASSERT(timer->status >= TIMER_STATUS_inactive); > + ASSERT(timer->status <= TIMER_STATUS_in_list); > + return (timer->status >= TIMER_STATUS_in_heap); > +} > + > static struct rt_private _rt_priv; > > static const struct scheduler sched_rtds_def = { > -- > 1.7.9.5 > Thanks and best regards, Meng -- ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel