* Benjamin Herrenschmidt <b...@kernel.crashing.org> wrote (on 2017-07-25 20:07:28 +1000):
> On Tue, 2017-07-25 at 12:26 +0530, Santosh Sivaraj wrote: > > > +static notrace void kernel_get_tspec(struct timespec *tp, > > + struct vdso_data *vdata, u32 *wtom_sec, > > + u32 *wtom_nsec) > > +{ > > + u64 tb; > > + u32 update_count; > > This is broken: > > > + do { > > + /* check for update count & load values */ > > + update_count = vdata->tb_update_count; > > + > > + /* Get TB, offset it and scale result */ > > + tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12, > > + vdata->tb_to_xs) + vdata->stamp_sec_fraction; > > + tp->tv_sec = vdata->stamp_xtime.tv_sec; > > + if (wtom_sec) > > + *wtom_sec = vdata->wtom_clock_sec; > > + if (wtom_nsec) > > + *wtom_nsec = vdata->wtom_clock_nsec; > > + } while (update_count != vdata->tb_update_count); > > The assembly code is carefuly crafted to create a chain of data > dependencies in order to enforce the ordering in this function, > you completely broke it. > > IE. the pointer used to access tb_orig_stamp etc... depends on the > initial update count, and the final read of the update count depends > on all the previously read values (or should), thus ordering those > loads. Withtout that you'll need more expensive lwsync's. > > Additionally, you broke another semantic of the seqlock which is > to spin on the first update count if it has an odd value. > > The same problem exist in all your other implementations. > > I am really NOT a fan of that attempt at converting to C. The code is > hand crafted assembly for a number of reasons, including the above ones > and maximum performance. > > As it is, it's deeply broken. I get the point. I looked at the generated assembly a bit closer, the update count is optimized out. Will send the alternative asm only patch. Thanks, Santosh > > > + > > + tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32; > > + tp->tv_sec += (tb >> 32); > > +} > > + > > +static notrace int clock_get_realtime(struct timespec *tp, > > + struct vdso_data *vdata) > > +{ > > + kernel_get_tspec(tp, vdata, NULL, NULL); > > + > > + return 0; > > +} > > + > > +static notrace int clock_get_monotonic(struct timespec *tp, > > + struct vdso_data *vdata) > > +{ > > + __s32 wtom_sec, wtom_nsec; > > + u64 nsec; > > + > > + kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec); > > + > > + tp->tv_sec += wtom_sec; > > + > > + nsec = tp->tv_nsec; > > + tp->tv_nsec = 0; > > + timespec_add_ns(tp, nsec + wtom_nsec); > > + > > + return 0; > > +} > > + > > +static notrace int clock_realtime_coarse(struct timespec *tp, > > + struct vdso_data *vdata) > > +{ > > + u32 update_count; > > + > > + do { > > + /* check for update count & load values */ > > + update_count = vdata->tb_update_count; > > + > > + tp->tv_sec = vdata->stamp_xtime.tv_sec; > > + tp->tv_nsec = vdata->stamp_xtime.tv_nsec; > > + } while (update_count != vdata->tb_update_count); > > + > > + return 0; > > +} > > + > > +static notrace int clock_monotonic_coarse(struct timespec *tp, > > + struct vdso_data *vdata) > > +{ > > + __s32 wtom_sec, wtom_nsec; > > + u64 nsec; > > + u32 update_count; > > + > > + do { > > + /* check for update count & load values */ > > + update_count = vdata->tb_update_count; > > + > > + tp->tv_sec = vdata->stamp_xtime.tv_sec; > > + tp->tv_nsec = vdata->stamp_xtime.tv_nsec; > > + wtom_sec = vdata->wtom_clock_sec; > > + wtom_nsec = vdata->wtom_clock_nsec; > > + } while (update_count != vdata->tb_update_count); > > + > > + tp->tv_sec += wtom_sec; > > + nsec = tp->tv_nsec; > > + tp->tv_nsec = 0; > > + timespec_add_ns(tp, nsec + wtom_nsec); > > + > > + return 0; > > +} > > + > > +notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp) > > +{ > > + int ret; > > + struct vdso_data *vdata = __get_datapage(); > > + > > + if (!tp || !vdata) > > + return -EBADR; > > + > > + switch (clk_id) { > > + case CLOCK_REALTIME: > > + ret = clock_get_realtime(tp, vdata); > > + break; > > + case CLOCK_MONOTONIC: > > + ret = clock_get_monotonic(tp, vdata); > > + break; > > + case CLOCK_REALTIME_COARSE: > > + ret = clock_realtime_coarse(tp, vdata); > > + break; > > + case CLOCK_MONOTONIC_COARSE: > > + ret = clock_monotonic_coarse(tp, vdata); > > + break; > > + default: > > + /* fallback to syscall */ > > + ret = -1; > > + break; > > + } > > + > > + return ret; > > +} > > diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S > > b/arch/powerpc/kernel/vdso64/gettimeofday.S > > index 3820213..c3f6b24 100644 > > --- a/arch/powerpc/kernel/vdso64/gettimeofday.S > > +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S > > @@ -16,6 +16,8 @@ > > #include <asm/asm-offsets.h> > > #include <asm/unistd.h> > > > > +.global kernel_clock_gettime > > + > > .text > > /* > > * Exact prototype of gettimeofday > > @@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday) > > */ > > V_FUNCTION_BEGIN(__kernel_clock_gettime) > > .cfi_startproc > > - /* Check for supported clock IDs */ > > - cmpwi cr0,r3,CLOCK_REALTIME > > - cmpwi cr1,r3,CLOCK_MONOTONIC > > - cror cr0*4+eq,cr0*4+eq,cr1*4+eq > > - bne cr0,99f > > - > > - mflr r12 /* r12 saves lr */ > > - .cfi_register lr,r12 > > - mr r11,r4 /* r11 saves tp */ > > - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ > > - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ > > - ori r7,r7,NSEC_PER_SEC@l > > -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & > > kernel */ > > - bne cr1,80f /* if not monotonic, all done */ > > - > > - /* > > - * CLOCK_MONOTONIC > > - */ > > - > > - /* now we must fixup using wall to monotonic. We need to snapshot > > - * that value and do the counter trick again. Fortunately, we still > > - * have the counter value in r8 that was returned by __do_get_tspec. > > - * At this point, r4,r5 contain our sec/nsec values. > > - */ > > - > > - lwa r6,WTOM_CLOCK_SEC(r3) > > - lwa r9,WTOM_CLOCK_NSEC(r3) > > - > > - /* We now have our result in r6,r9. We create a fake dependency > > - * on that result and re-check the counter > > - */ > > - or r0,r6,r9 > > - xor r0,r0,r0 > > - add r3,r3,r0 > > - ld r0,CFG_TB_UPDATE_COUNT(r3) > > - cmpld cr0,r0,r8 /* check if updated */ > > - bne- 50b > > - > > - /* Add wall->monotonic offset and check for overflow or underflow. > > - */ > > - add r4,r4,r6 > > - add r5,r5,r9 > > - cmpd cr0,r5,r7 > > - cmpdi cr1,r5,0 > > - blt 1f > > - subf r5,r7,r5 > > - addi r4,r4,1 > > -1: bge cr1,80f > > - addi r4,r4,-1 > > - add r5,r5,r7 > > - > > -80: std r4,TSPC64_TV_SEC(r11) > > - std r5,TSPC64_TV_NSEC(r11) > > - > > - mtlr r12 > > + mflr r6 /* r12 saves lr */ > > + stwu r1,-112(r1) > > + .cfi_register lr,r6 > > + std r6,24(r1) > > + std r3,32(r1) > > + std r4,40(r1) > > crclr cr0*4+so > > - li r3,0 > > - blr > > - > > - /* > > - * syscall fallback > > - */ > > -99: > > + bl V_LOCAL_FUNC(kernel_clock_gettime) > > + cmpwi r3,0 > > + beq 77f > > li r0,__NR_clock_gettime > > + ld r3,32(r1) > > + ld r4,40(r1) > > sc > > +77: ld r6,24(r1) > > + addi r1,r1,112 > > + mtlr r6 > > blr > > .cfi_endproc > > V_FUNCTION_END(__kernel_clock_gettime) --