Re: [patch V2 08/10] timer: Implement the hierarchical pull model

Peter Zijlstra Wed, 19 Apr 2017 02:09:57 -0700

On Tue, Apr 18, 2017 at 01:11:10PM +0200, Thomas Gleixner wrote:
> +static void __tmigr_handle_remote(struct tmigr_group *group, unsigned int 
> cpu,
> +                               u64 now, unsigned long jif, bool walkup)
> +{
> +     struct timerqueue_node *tmr;
> +     struct tmigr_group *parent;
> +     struct tmigr_event *evt;
> +
> +again:
> +     raw_spin_lock_irq(&group->lock);
> +     /*
> +      * Handle the group only if @cpu is the migrator or if the group
> +      * has no migrator. Otherwise the group is active and is handled by
> +      * its own migrator.
> +      */
> +     if (group->migrator != cpu && group->migrator != TMIGR_NONE) {
> +             raw_spin_unlock_irq(&group->lock);
> +             return;
> +     }
> +
> +     tmr = timerqueue_getnext(&group->events);
> +     if (tmr && now >= tmr->expires) {
> +             /*
> +              * Remove the expired entry from the queue and handle
> +              * it. If this is a leaf group, call the timer poll
> +              * function for the given cpu. Otherwise handle the group
> +              * itself.  Drop the group lock here in both cases to avoid
> +              * lock ordering inversions.
> +              */
> +             evt = container_of(tmr, struct tmigr_event, nextevt);
> +             tmigr_remove_evt(group, evt);
> +
> +             raw_spin_unlock_irq(&group->lock);
> +
> +             /*
> +              * If the event is a group event walk down the hierarchy of
> +              * that group to the CPU leafs. If not, handle the expired
> +              * timer from the remote CPU.
> +              */
> +             if (evt->group) {
> +                     __tmigr_handle_remote(evt->group, cpu, now, jif, false);


Recursion..

> +             } else {
> +                     timer_expire_remote(evt->cpu);
> +                     tmigr_update_remote(evt->cpu, now, jif);
> +             }
> +             goto again;
> +     }
> +
> +     /*
> +      * If @group is not active, queue the next event in the parent
> +      * group. This is required, because the next event of @group
> +      * could have been changed by tmigr_update_remote() above.
> +      */
> +     parent = group->parent;
> +     if (parent && !group->active) {
> +             raw_spin_lock_nested(&parent->lock, parent->level);
> +             tmigr_add_evt(parent, &group->groupevt);
> +             raw_spin_unlock(&parent->lock);
> +     }
> +     raw_spin_unlock_irq(&group->lock);
> +
> +     /* Walk the hierarchy up? */
> +     if (!walkup || !parent)
> +             return;
> +
> +     /* Racy lockless check: See comment in tmigr_handle_remote() */
> +     if (parent->migrator == cpu)
> +             __tmigr_handle_remote(parent, cpu, now, jif, true);

(hopefully) Tail recursion..

> +}

> +static u64 tmigr_set_cpu_inactive(struct tmigr_group *group,
> +                               struct tmigr_group *child,
> +                               struct tmigr_event *evt,
> +                               unsigned int cpu)
> +{
> +     struct tmigr_group *parent;
> +     u64 nextevt = KTIME_MAX;
> +
> +     raw_spin_lock_nested(&group->lock, group->level);
> +
> +     DBG_BUG_ON(!group->active);
> +
> +     cpumask_clear_cpu(cpu, group->cpus);
> +     group->active--;
> +
> +     /*
> +      * If @child is not NULL, then this is a recursive invocation to
> +      * propagate the deactivation of @cpu. If @child has a new migrator
> +      * set it active in @group.
> +      */
> +     if (child && child->migrator != TMIGR_NONE) {
> +             cpumask_set_cpu(child->migrator, group->cpus);
> +             group->active++;
> +     }
> +
> +     /* Add @evt to @group */
> +     tmigr_add_evt(group, evt);
> +
> +     /* If @cpu is not the active migrator, everything is up to date */
> +     if (group->migrator != cpu)
> +             goto done;
> +
> +     /* Update the migrator. */
> +     if (!group->active)
> +             group->migrator = TMIGR_NONE;
> +     else
> +             group->migrator = cpumask_first(group->cpus);
> +
> +     parent = group->parent;
> +     if (parent) {
> +             /*
> +              * @cpu was the migrator in @group, so it is marked as
> +              * active in its parent group(s) as well. Propagate the
> +              * migrator change.
> +              */
> +             evt = group->active ? NULL : &group->groupevt;
> +             nextevt = tmigr_set_cpu_inactive(parent, group, evt, cpu);

Recursion..

> +     } else {
> +             /*
> +              * This is the top level of the hierarchy. If @cpu is about
> +              * to go offline wake up some random other cpu so it will
> +              * take over the migrator duty and program its timer
> +              * proper. Ideally wake the cpu with the closest expiry
> +              * time, but that's overkill to figure out.
> +              */
> +             if (!per_cpu(tmigr_cpu, cpu).online) {
> +                     cpu = cpumask_any_but(cpu_online_mask, cpu);
> +                     smp_send_reschedule(cpu);
> +             }
> +             /*
> +              * Return the earliest event of the top level group to make
> +              * sure that its handled.
> +              *
> +              * This could be optimized by keeping track of the last
> +              * global scheduled event and only arming it on @cpu if the
> +              * new event is earlier. Not sure if its worth the
> +              * complexity.
> +              */
> +             nextevt = group->groupevt.nextevt.expires;
> +     }
> +done:
> +     raw_spin_unlock(&group->lock);
> +     return nextevt;
> +}

Would it be very onerous to rewrite that into regular loops? That avoids
us having to think (and worry) about blowing our stack.

Re: [patch V2 08/10] timer: Implement the hierarchical pull model

Reply via email to