On Mon, May 03, 2010 at 08:13:00PM +0000, Alexander Krizhanovsky wrote:
Kostik,
thank you very much for the review!
Kostik Belousov wrote:
On Mon, Apr 19, 2010 at 12:46:48AM +0000, Alexander Krizhanovsky wrote:
Hi all!
This patch implements per-thread rusage statistic (like RUSAGE_THREAD in
Linux and RUSAGE_LWP in OpenSolaris).
Unfortunately, we have to acquire a number of locks to read/update
system and user times for current thread rusage information because it's
also used for whole process statistic and needs to be zeroed.
Any comments are very appreciated.
It's tested against 8.0-RELEASE. Appropriate PR is submitted.
--
Alexander Krizhanovsky
NatSys Lab. (http://natsys-lab.com)
tel: +7 (916) 717-3899, +7 (499) 747-6304
e-mail: a...@natsys-lab.com
I think this should be somewhat modified before commit.
First, please separate patch into two pieces. One would introduce
ruxagg_tlock() helper and use it in existing locations instead of
three-liners. Possibly, add K&R->ANSI C kern_getrusage definition
change.
Second would actually add RUSAGE_THREAD. There, please follow
the style(9), in particular, do not initialize the local variables
at the declaration, instead, use explicit initialization in the code.
Done. I have also introduced third patch for casting microseconds to
timeval and replaced a few appropriate places in whole kernel code.
patch-rusage-thread-03052010.txt is incremental for
patch-ruxagg_tlock-03052010.txt, which is by turn incremental for
patch-usec2timeval-03052010.txt.
I have made following change for calcru1():
- if ((int64_t)tu < 0) {
- /* XXX: this should be an assert /phk */
- printf("calcru: negative runtime of %jd usec for pid %d
(%s)\n",
- (intmax_t)tu, p->p_pid, p->p_comm);
- tu = ruxp->rux_tu;
- }
+ KASSERT((int64_t)tu < 0, ("calcru: negative runtime of %jd usec "
+ "for pid %d (%s)\n",
+ (intmax_t)tu, p->p_pid, p->p_comm));
tu can become negative at about 300 thousand years of uptime, so this
condition seems quite unrealistic and indeed should be replaced by an
assertion. However I didn't understand why it isn't done so far and the
comment only was added. Did I miss something?
Should calctru() share the code with calcru1() ? I am esp. concerned
with sanity check like negative runtime etc. Possibly, this code
>from calcru1() should be separated into function used from both
calcru1() and calctru().
Man page for getrusage(2) should be updated.
It's added to patch-rusage-thread-03052010.txt.
In the end - shoud I raise a new PR (the original one is kern/145813)?
Thanks for the submission.
It was some time, I already committed ruxagg_tlock() part, that caused
some feedback, and I reworked the rest of the patch myself. See svn-src@
for some discussion, and the latest version that I intend to commit
shortly is below. Your comments are welcome.
Lets discuss third patch after this is landed.
diff --git a/lib/libc/sys/getrusage.2 b/lib/libc/sys/getrusage.2
index bdf5d45..423503f 100644
--- a/lib/libc/sys/getrusage.2
+++ b/lib/libc/sys/getrusage.2
@@ -28,7 +28,7 @@
.\" @(#)getrusage.2 8.1 (Berkeley) 6/4/93
.\" $FreeBSD$
.\"
-.Dd June 4, 1993
+.Dd May 1, 2010
.Dt GETRUSAGE 2
.Os
.Sh NAME
@@ -42,6 +42,7 @@
.In sys/resource.h
.Fd "#define RUSAGE_SELF 0"
.Fd "#define RUSAGE_CHILDREN -1"
+.Fd "#define RUSAGE_THREAD 1"
.Ft int
.Fn getrusage "int who" "struct rusage *rusage"
.Sh DESCRIPTION
@@ -49,11 +50,12 @@ The
.Fn getrusage
system call
returns information describing the resources utilized by the current
-process, or all its terminated child processes.
+thread, the current process, or all its terminated child processes.
The
.Fa who
argument is either
-.Dv RUSAGE_SELF
+.Dv RUSAGE_THREAD ,
+.Dv RUSAGE_SELF ,
or
.Dv RUSAGE_CHILDREN .
The buffer to which
@@ -175,6 +177,10 @@ The
.Fn getrusage
system call appeared in
.Bx 4.2 .
+The
+.Dv RUSAGE_THREAD
+facility first appeared in
+.Fx 8.1 .
.Sh BUGS
There is no way to obtain information about a child process
that has not yet terminated.
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 49a3097..6c50f1f 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -901,7 +901,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
int preferthread)
kp->ki_pri.pri_user = td->td_user_pri;
if (preferthread) {
- kp->ki_runtime = cputick2usec(td->td_runtime);
+ kp->ki_runtime = td->td_rux.rux_runtime;
kp->ki_pctcpu = sched_pctcpu(td);
kp->ki_estcpu = td->td_estcpu;
}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index a3ed75d..0bc78d0 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -76,7 +76,7 @@ static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
-static void ruxagg_tlock(struct proc *p, struct thread *td);
+static void ruxagg(struct proc *p, struct thread *td);
/*
* Resource controls and accounting.
@@ -630,7 +630,7 @@ lim_cb(void *arg)
return;
PROC_SLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
}
PROC_SUNLOCK(p);
if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
@@ -841,7 +841,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval
*sp)
FOREACH_THREAD_IN_PROC(p, td) {
if (td->td_incruntime == 0)
continue;
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
}
calcru1(p, &p->p_rux, up, sp);
}
@@ -961,6 +961,16 @@ kern_getrusage(struct thread *td, int who, struct rusage
*rup)
calccru(p, &rup->ru_utime, &rup->ru_stime);
break;
+ case RUSAGE_THREAD:
+ PROC_SLOCK(p);
+ ruxagg(p, td);
+ PROC_SUNLOCK(p);
+ thread_lock(td);
+ *rup = td->td_ru;
+ calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime);
+ thread_unlock(td);
+ break;
+
default:
error = EINVAL;
}
@@ -1001,7 +1011,7 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct
rusage *ru2,
* Aggregate tick counts into the proc's rusage_ext.
*/
void
-ruxagg(struct rusage_ext *rux, struct thread *td)
+ruxagg_locked(struct rusage_ext *rux, struct thread *td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -1010,18 +1020,19 @@ ruxagg(struct rusage_ext *rux, struct thread *td)
rux->rux_uticks += td->td_uticks;
rux->rux_sticks += td->td_sticks;
rux->rux_iticks += td->td_iticks;
- td->td_incruntime = 0;
- td->td_uticks = 0;
- td->td_iticks = 0;
- td->td_sticks = 0;
}
static void
-ruxagg_tlock(struct proc *p, struct thread *td)
+ruxagg(struct proc *p, struct thread *td)
{
thread_lock(td);
- ruxagg(&p->p_rux, td);
+ ruxagg_locked(&p->p_rux, td);
+ ruxagg_locked(&td->td_rux, td);
+ td->td_incruntime = 0;
+ td->td_uticks = 0;
+ td->td_iticks = 0;
+ td->td_sticks = 0;
thread_unlock(td);
}
@@ -1039,7 +1050,7 @@ rufetch(struct proc *p, struct rusage *ru)
*ru = p->p_ru;
if (p->p_numthreads > 0) {
FOREACH_THREAD_IN_PROC(p, td) {
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
rucollect(ru, &td->td_ru);
}
}
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 9be4c2f..b32c584 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -432,7 +432,7 @@ thread_exit(void)
PROC_UNLOCK(p);
thread_lock(td);
/* Save our tick information with both the thread and proc locked */
- ruxagg(&p->p_rux, td);
+ ruxagg_locked(&p->p_rux, td);
PROC_SUNLOCK(p);
td->td_state = TDS_INACTIVE;
#ifdef WITNESS
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fb31cfc..e32e494 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -173,6 +173,27 @@ struct kdtrace_thread;
struct cpuset;
/*
+ * XXX: Does this belong in resource.h or resourcevar.h instead?
+ * Resource usage extension. The times in rusage structs in the kernel are
+ * never up to date. The actual times are kept as runtimes and tick counts
+ * (with control info in the "previous" times), and are converted when
+ * userland asks for rusage info. Backwards compatibility prevents putting
+ * this directly in the user-visible rusage struct.
+ *
+ * Locking for p_rux: (cj) means (j) for p_rux and (c) for p_crux.
+ * Locking for td_rux: (t) for all fields.
+ */
+struct rusage_ext {
+ u_int64_t rux_runtime; /* (cj) Real time. */
+ u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
+ u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
+ u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
+ u_int64_t rux_uu; /* (c) Previous user time in usec. */
+ u_int64_t rux_su; /* (c) Previous sys time in usec. */
+ u_int64_t rux_tu; /* (c) Previous total time in usec. */
+};
+
+/*
* Kernel runnable context (thread).
* This is what is put to sleep and reactivated.
* Thread context. Processes may have multiple threads.
@@ -219,7 +240,8 @@ struct thread {
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
- struct rusage td_ru; /* (t) rusage information */
+ struct rusage td_ru; /* (t) rusage information. */
+ struct rusage_ext td_rux; /* (t) Internal rusage information. */
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */
u_int td_pticks; /* (t) Statclock hits for profiling */
@@ -426,26 +448,6 @@ do {
\
#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN
/*
- * XXX: Does this belong in resource.h or resourcevar.h instead?
- * Resource usage extension. The times in rusage structs in the kernel are
- * never up to date. The actual times are kept as runtimes and tick counts
- * (with control info in the "previous" times), and are converted when
- * userland asks for rusage info. Backwards compatibility prevents putting
- * this directly in the user-visible rusage struct.
- *
- * Locking: (cj) means (j) for p_rux and (c) for p_crux.
- */
-struct rusage_ext {
- u_int64_t rux_runtime; /* (cj) Real time. */
- u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
- u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
- u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
- u_int64_t rux_uu; /* (c) Previous user time in usec. */
- u_int64_t rux_su; /* (c) Previous sys time in usec. */
- u_int64_t rux_tu; /* (c) Previous total time in usec. */
-};
-
-/*
* Process structure.
*/
struct proc {
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
index 9af96af..e703744 100644
--- a/sys/sys/resource.h
+++ b/sys/sys/resource.h
@@ -56,6 +56,7 @@
#define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1
+#define RUSAGE_THREAD 1
struct rusage {
struct timeval ru_utime; /* user time used */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 21728aa..95a9b49 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -131,7 +131,7 @@ void rucollect(struct rusage *ru, struct rusage
*ru2);
void rufetch(struct proc *p, struct rusage *ru);
void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
struct timeval *sp);
-void ruxagg(struct rusage_ext *rux, struct thread *td);
+void ruxagg_locked(struct rusage_ext *rux, struct thread *td);
int suswintr(void *base, int word);
struct uidinfo
*uifind(uid_t uid);