On 13/11/2024 06:21, Mark Geisert wrote:
This program provides an up-to-the-moment load average measurement.  The
user can take 1 sample, or obtain the average of N samples by number or

Sorry about the inordinate time it's take for me to look at this.


So, this seems like two separate things shoved together

* A daemon which calls getloadavg() every 5 seconds
* A tool which exercises the loadavg estimation code

Does it really make sense to bundle them together?

time duration.  There is a daemon mode to have the global load average
stats updated such that 'uptime' and other tools provide a more reasonable
load average calculation over time.

I'm wondering what's "unreasonable" about the current behavior (estimate is calculated on demand, in a rate-limited fashion)

If it's that working correctly (after your fixes), is this actually needed (i.e. what difference does also having loadavg in daemon mode running make)?

A release note is provided for Cygwin 3.5.5.
Documentation for doc/utils.xml is pending.

A man page isn't optional :)

Or, I wonder if it really makes sense to ship this as useful to end-users (as opposed to marking it noinst_ in Makefile.am, so we just keep it around for developers)

Reported-by: Mark Liam Brown <brownmarkl...@gmail.com>
Addresses: https://cygwin.com/pipermail/cygwin/2024-August/256361.html
Signed-off-by: Mark Geisert <m...@maxrnd.com>
Fixes: N/A (new code)

---
  winsup/cygwin/release/3.5.5 |   8 +
  winsup/utils/Makefile.am    |   2 +
  winsup/utils/loadavg.c      | 380 ++++++++++++++++++++++++++++++++++++
  3 files changed, 390 insertions(+)
  create mode 100644 winsup/utils/loadavg.c

[...]
+
+/* estimate the current load based on certain counter values */
+double
+get_load (void)
+{
+  PDH_STATUS ret = PdhCollectQueryData (query);
+  if (ret != ERROR_SUCCESS) {
+    fprintf (stderr, "PdhCollectQueryData %s\n", pdh_status (ret));
+    return 0.0;
+  }
+
+  /* Estimate the number of running processes as
+     (NumberOfProcessors) * (% Processor Time) */
+  PDH_FMT_COUNTERVALUE fmtvalue1;
+  ret = PdhGetFormattedCounterValue (counter1, PDH_FMT_DOUBLE, NULL, 
&fmtvalue1);
+  if (ret != ERROR_SUCCESS) {
+    fprintf (stderr, "PdhGetFormattedCounterValue#1 %s\n", pdh_status (ret));
+    return 0.0;
+  }
+
+  double ncpus = (double) sysinfo.dwNumberOfProcessors;
+  if (verbose) {
+    fprintf (stdout, "%llu ", GetTickCount64 ());
+
+    fprintf (stdout, "ncpus: %d, %%ptime: %3.2f, ", (int) ncpus,
+                    fmtvalue1.doubleValue);
+  }
+  double running = fmtvalue1.doubleValue * ncpus / 100.0;
+
+  /* Estimate the number of runnable threads as
+     (ProcessorQueueLength) / (NumberOfProcessors) */
+  double rql = 0.0;
+  PDH_FMT_COUNTERVALUE fmtvalue2;
+  ret = PdhGetFormattedCounterValue (counter2, PDH_FMT_LONG, NULL, &fmtvalue2);
+  if (ret == ERROR_SUCCESS) {
+    rql = (double) fmtvalue2.longValue;
+    rql /= ncpus; /* make the measure a per-cpu queue length */
+  } else {
+    ++once; /* counter is missing; just print an error message once (below) */

I'd tend to make this static with function scope, rather than a global, but you are entitled to a different opinion.

+  }
+
+  double load = running + rql;
+  if (verbose ) {
+    fprintf (stdout, "running: %3.2f, effrql: %3.2f, load: %3.2f\n",
+                    running, rql, load);
+  }
+  if (once == 1)
+    fprintf (stderr, "PdhGetFormattedCounterValue#2 %s\n", pdh_status (ret));
+
+  ++samples;
+  return load;
+}
+
+int
+start_daemon (void)
+{
+  char  buf[132];
+  int   fd = -1;
+  pid_t pid = 0;
+
+top:
+  fd = open (PIDFILE, O_RDWR | O_CREAT | O_EXCL);
+  if (fd == -1) {
+    fd = open (PIDFILE, O_RDONLY);
+    if (fd == -1) {
+      fprintf (stderr, "unable to open pid file\n");
+      return 1;
+    }
+
+    memset (buf, 0, sizeof buf);
+    read (fd, buf, sizeof buf);
+    close (fd);
+
+    pid = atoi (buf);
+    /* does pid still exist? */
+    if (kill (pid, 0) == 0) {
+      fprintf (stderr, "daemon already running as pid %d\n", pid);
+      return 1;
+    }
+    unlink (PIDFILE);
+    goto top;
+  }
+  fchmod (fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+
+  (void) daemon (0, 0);
+
+  /* this code runs in the daemon; the parent process has exited via daemon() 
*/
+  snprintf (buf, sizeof buf, "%d", getpid ());
+  write (fd, buf, strlen (buf));
+  /* don't close(fd).. keep it open to protect against another daemon */
+
+  double loadavg[3];
+  while (1) {
+    (void) getloadavg (loadavg, 3);
+    sleep (5/*seconds*/);
+  }
+  return 0;
+}
+
+int
+stop_daemon (void)
+{
+  char     buf[132];
+  int      fd = -1;
+  ssize_t  len = 0;
+  pid_t    pid = 0;
+  char    *winpath = NULL;
+
+  fd = open (PIDFILE, O_RDONLY);
+  if (fd == -1) {
+    fprintf (stderr, "unable to open pid file\n");
+    return 1;
+  }
+  memset (buf, 0, sizeof buf);
+  read (fd, buf, sizeof buf);
+  close (fd);
+
+  pid = atoi (buf);
+  /* does pid still exist? */
+  if (kill (pid, 0) == -1) {
+    fprintf (stderr, "daemon pid %d already gone\n", pid);
+    unlink (PIDFILE);
+    return 1;
+  }
+
+  /* using kill() to terminate the daemon fails (why?); work around that */

Yeah, why! This is hugely suspicious.

I forget if getloadavg() is sigwrapper wrapped, but if not, that might explain it (or not, the daemon should spend nearly all of it's time in sleep(), right?)

+  winpath = getenv ("SYSTEMROOT");
+  len = cygwin_conv_path (CCP_WIN_A_TO_POSIX | CCP_PROC_CYGDRIVE,
+                          winpath, buf, sizeof buf);
+  len = strlen (buf);
+  snprintf (&buf[len], (sizeof buf) - len,
+            "/System32/taskkill /f /pid `cat /proc/%d/winpid`", pid);
+  fprintf (stdout, "Windows says: ");
+  fflush (stdout);
+  system (buf);
+
+  /* if pid is gone, delete pid file */
+  if (kill (pid, 0) == -1)
+    unlink (PIDFILE);
+  return 0;
+}
+

Reply via email to