date:20241219

[hackers] [PATCH 1/2] Add libutil/random.c for fast PCG-based PRNG

2024-12-19 Thread Elie Le Vaillant

---
 Makefile |  1 +
 libutil/random.c | 87 
 util.h   |  5 +++
 3 files changed, 93 insertions(+)
 create mode 100644 libutil/random.c

diff --git a/Makefile b/Makefile
index 4e20cb3..35c4c25 100644
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@ LIBUTILOBJ =\
libutil/strsep.o\
libutil/strnsubst.o\
libutil/strtonum.o\
+   libutil/random.o\
libutil/unescape.o\
libutil/writeall.o
 
diff --git a/libutil/random.c b/libutil/random.c
new file mode 100644
index 000..72385fb
--- /dev/null
+++ b/libutil/random.c
@@ -0,0 +1,87 @@
+/* This is a simple random API. It aims to provide high-quality random
+ * numbers. 2 use-cases are common:
+ *  - generating numbers in a range
+ *  - generating random bits
+ *
+ * On OpenBSD, random bits are generated with arc4random, a CSPRNG.
+ * Otherwise, it falls back to a PCG construction, a fast PRNG.
+ * In both cases, generating numbers in a range use a procedure
+ * based on Lemire's method.
+ */
+#include 
+#include 
+#include 
+
+#ifdef OpenBSD
+
+uint32_t
+rng32(void)
+{
+   return arc4random();
+}
+
+void
+rng32_randomseed()
+{
+   return;
+}
+
+#else /* standalone, PCG construction */
+
+static uint64_t globalstate;
+
+/*
+ * PCG construction
+ * seeding the RNG means merely setting the initial state.
+ * the increment could also be made part of the seed, just make sure it's odd.
+ */
+uint32_t
+rng32(void)
+{
+   uint64_t oldstate = globalstate;
+   uint32_t r, v;
+
+   *state *= 6364136223846793005ULL;
+   *state += globalid; /* we must have it as odd */
+
+   r = oldstate >> (64 - 5);
+   v = (oldstate ^ (oldstate >> 18)) >> (32 - 5);
+   v = (v >> (-r & 31)) | (v << r);
+   return v;
+}
+
+void
+rng32_randomseed(void)
+{
+   struct timespec ts;
+   clock_gettime(CLOCK_REALTIME, &ts);
+   globalstate = (intptr_t)&printf ^ ts.tv_sec ^ ((unsigned 
long)ts.tv_nsec * 0xAC5533CD);
+   globalid = 1442695040888963407ULL;
+}
+
+#endif /* standalone, PCG construction */
+
+/*
+ * Based on optimized Lemire's method
+ * https://pcg-random.org/posts/bounded-rands.html
+ */
+uint32_t
+rng32_bounded(uint32_t bound) {
+   uint32_t x = rng32();
+   uint64_t m = (uint64_t)x * (uint64_t)bound;
+   uint32_t l = (uint32_t)m;
+   if (l < range) {
+   uint32_t t = -range;
+   if (t >= range) {
+   t -= range;
+   if (t >= range) 
+   t %= range;
+   }
+   while (l < t) {
+   x = rng32(state, id);
+   m = (uint64_t)x * (uint64_t)bound;
+   l = (uint32_t)m;
+   }
+   }
+   return m >> 32;
+}
diff --git a/util.h b/util.h
index 346f6ca..730ba53 100644
--- a/util.h
+++ b/util.h
@@ -3,6 +3,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #include "arg.h"
@@ -91,3 +92,7 @@ int mkdirp(const char *, mode_t, mode_t);
 #undef memmem
 #define memmem xmemmem
 void *memmem(const void *, size_t, const void *, size_t);
+
+uint32_t rng32(void);
+uint32_t rng32_bounded(uint32_t);
+void rng32_seed(void);
-- 
2.47.1

[hackers] [PATCH 2/2] cron: heavy refactor of parsefield() and matchentry()

2024-12-19 Thread Elie Le Vaillant

This patch heavily simplifies the parsing logic of parsefield(), and
makes the grammar more standards-compliant.  Before, this cron
implementation would only recognize repeats ("/n" at the end of a
range, or of a wildcar) for wildcars, and list elements could only
be numbers.  Now, the basic type is a range, which also includes and
simplifies numbers, wildcars and "repeats".  A field is thus a list of
this basic types.  So now we can have things such as "*/3,14-18" as a
field.  Every range (except for numbers) allow the "/n" syntax.

Also added: random fields, using ~. This is a non-standard addition,
but which matches many cron implementations, like OpenBSD's for
instance.

The new grammar is as follows:

field  = range (',' range)*
   ;
range  = number
   | number? '~' number? ('/' number)?
   | number '-' number ('/' number)?
   ;
number = [0-9]+
   ;
---
 cron.1 |  18 +
 cron.c | 246 -
 2 files changed, 141 insertions(+), 123 deletions(-)

diff --git a/cron.1 b/cron.1
index 1cb90a4..da8284e 100644
--- a/cron.1
+++ b/cron.1
@@ -21,3 +21,21 @@ instead of the default
 .It Fl n
 Do not daemonize.
 .El
+.Sh NOTES
+This
+.Nm
+accepts Vixie cron syntax.
+It also includes the '~' syntax for random ranges,
+which is non-standard.
+.Pp
+This
+.Nm
+doesn't support crontabs in /var/spool. If one wishes to have
+per-user crontabs, one should, at login time, or at boot time,
+start
+.Nm
+with the user-supplied file and the correct user permissions.
+Accordingly, crontab will not work with this
+.Nm .
+.Sh SEE ALSO
+.Xf crontab 5
diff --git a/cron.c b/cron.c
index 77304cc..59e7fbc 100644
--- a/cron.c
+++ b/cron.c
@@ -17,19 +17,13 @@
 #include "queue.h"
 #include "util.h"
 
-struct field {
-   enum {
-   ERROR,
-   WILDCARD,
-   NUMBER,
-   RANGE,
-   REPEAT,
-   LIST
-   } type;
-   long *val;
-   int len;
+struct range {
+   long low, high, repeat;
+   TAILQ_ENTRY(range) entry;
 };
 
+TAILQ_HEAD(field, range);
+
 struct ctabentry {
struct field min;
struct field hour;
@@ -202,143 +196,146 @@ matchentry(struct ctabentry *cte, struct tm *tm)
{ .f = &cte->wday, .tm = tm->tm_wday, .len = 7  },
};
size_t i;
-   int j;
+   int found, t;
+   struct range *r;
 
for (i = 0; i < LEN(matchtbl); i++) {
-   switch (matchtbl[i].f->type) {
-   case WILDCARD:
-   continue;
-   case NUMBER:
-   if (matchtbl[i].f->val[0] == matchtbl[i].tm)
-   continue;
-   break;
-   case RANGE:
-   if (matchtbl[i].f->val[0] <= matchtbl[i].tm)
-   if (matchtbl[i].f->val[1] >= matchtbl[i].tm)
-   continue;
-   break;
-   case REPEAT:
-   if (matchtbl[i].tm > 0) {
-   if (matchtbl[i].tm % matchtbl[i].f->val[0] == 0)
-   continue;
-   } else {
-   if (matchtbl[i].len % matchtbl[i].f->val[0] == 
0)
-   continue;
+   found = 0;
+   t = matchtbl[i].tm;
+   TAILQ_FOREACH(r, matchtbl[i].f, entry) {
+   if (r->low <= t && r->high >= t && t % r->repeat == 0) {
+   found = 1;
+   break;
}
-   break;
-   case LIST:
-   for (j = 0; j < matchtbl[i].f->len; j++)
-   if (matchtbl[i].f->val[j] == matchtbl[i].tm)
-   break;
-   if (j < matchtbl[i].f->len)
-   continue;
-   break;
-   default:
-   break;
}
-   break;
+   if (!found)
+   break;
}
if (i != LEN(matchtbl))
return 0;
+
return 1;
 }
 
 static int
-parsefield(const char *field, long low, long high, struct field *f)
+parserange(char *str, long low, long high, struct range *r)
 {
-   int i;
-   char *e1, *e2;
-   const char *p;
-
-   p = field;
-   while (isdigit(*p))
-   p++;
-
-   f->type = ERROR;
-
-   switch (*p) {
-   case '*':
-   if (strcmp(field, "*") == 0) {
-   f->val = NULL;
-   f->len = 0;
-   f->type = WILDCARD;
-   } else if (strncmp(field, "*/", 2) == 0) {
-   f->val = emalloc(sizeof(*f->val));
-   f->len = 1;
-
-

Re: [hackers] [sbase][PATCH] cron: fix parsing and '~' behavior

2024-12-19 Thread Roberto E . Vargas Caballero

Quoth Elie Le Vaillant :
> In parserange(), we tested wether range was null, to test wether or
> not the repeat number was the end of the string (to test if we had
> something like "*/3/34").  But it is str that we should be testing,
> not range, as its value as a pointer doesn't mean anything in the
> current context.
> 
> This makes this cron more in line with other interpretations concerning
> '~'.  In other crons, a random number is picked for the starting field,
> and it doesn't change during all of the program's lifetime, whereas this
> one used to change its random number everytime it was matched.
> 
> 

I think you forgot the diff in this patch.

Regards,

Re: [hackers] [sbase] tar: fix long names crashing tar archiving

2024-12-19 Thread Roberto E . Vargas Caballero

Quoth Andrea Calligaris :
> It was already mentioned in Dec 2016 here:
> https://lists.suckless.org/dev/1612/30852.html
> but never fixed since.
> 
> *Extracting* long paths was added in May 2022 here:
> https://lists.suckless.org/hackers/2205/18321.html
> but not *archiving*.
> 
> Next thing to do should be supporting links while archiving.
> ---

I applied a different patch from the same purpouse. I like more
your change, and if you resend it from the current master I will
commit it.

Regards,

[hackers] [sbase] cron: heavy refactor of parsefield() and matchentry() || Elie Le Vaillant

2024-12-19 Thread git

commit 6c8dc1522ca47f65260f9605b6003fae547f9454
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:35 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 11:40:06 2024 +0100

cron: heavy refactor of parsefield() and matchentry()

This patch heavily simplifies the parsing logic of parsefield(), and
makes the grammar more standards-compliant.  Before, this cron
implementation would only recognize repeats ("/n" at the end of a
range, or of a wildcar) for wildcars, and list elements could only
be numbers.  Now, the basic type is a range, which also includes and
simplifies numbers, wildcars and "repeats".  A field is thus a list of
this basic types.  So now we can have things such as "*/3,14-18" as a
field.  Every range (except for numbers) allow the "/n" syntax.

Also added: random fields, using ~.  This is a non-standard addition,
but which matches many cron implementations, like OpenBSD's for
instance.  Its usefulness is debatable.

diff --git a/cron.1 b/cron.1
index 1cb90a4..fe907f4 100644
--- a/cron.1
+++ b/cron.1
@@ -21,3 +21,11 @@ instead of the default
 .It Fl n
 Do not daemonize.
 .El
+.Sh NOTES
+This
+.Nm
+accepts normal, standard syntax.
+It also includes the '~' syntax for random ranges,
+which is non-standard.
+.Sh SEE ALSO
+.Xf crontab 5
diff --git a/cron.c b/cron.c
index 77304cc..5d940cd 100644
--- a/cron.c
+++ b/cron.c
@@ -17,19 +17,13 @@
 #include "queue.h"
 #include "util.h"
 
-struct field {
-   enum {
-   ERROR,
-   WILDCARD,
-   NUMBER,
-   RANGE,
-   REPEAT,
-   LIST
-   } type;
-   long *val;
-   int len;
+struct range {
+   long low, high, repeat, random;
+   TAILQ_ENTRY(range) entry;
 };
 
+TAILQ_HEAD(field, range);
+
 struct ctabentry {
struct field min;
struct field hour;
@@ -202,143 +196,159 @@ matchentry(struct ctabentry *cte, struct tm *tm)
{ .f = &cte->wday, .tm = tm->tm_wday, .len = 7  },
};
size_t i;
-   int j;
+   int found, t;
+   long low;
+   struct range *r;
 
for (i = 0; i < LEN(matchtbl); i++) {
-   switch (matchtbl[i].f->type) {
-   case WILDCARD:
-   continue;
-   case NUMBER:
-   if (matchtbl[i].f->val[0] == matchtbl[i].tm)
-   continue;
-   break;
-   case RANGE:
-   if (matchtbl[i].f->val[0] <= matchtbl[i].tm)
-   if (matchtbl[i].f->val[1] >= matchtbl[i].tm)
-   continue;
-   break;
-   case REPEAT:
-   if (matchtbl[i].tm > 0) {
-   if (matchtbl[i].tm % matchtbl[i].f->val[0] == 0)
-   continue;
-   } else {
-   if (matchtbl[i].len % matchtbl[i].f->val[0] == 
0)
-   continue;
+   found = 0;
+   t = matchtbl[i].tm;
+   TAILQ_FOREACH(r, matchtbl[i].f, entry) {
+   if (r->random)
+   low = r->random;
+   else
+   low = r->low;
+   if (low <= t && r->high >= t && t % r->repeat == 0) {
+   found = 1;
+   break;
}
-   break;
-   case LIST:
-   for (j = 0; j < matchtbl[i].f->len; j++)
-   if (matchtbl[i].f->val[j] == matchtbl[i].tm)
-   break;
-   if (j < matchtbl[i].f->len)
-   continue;
-   break;
-   default:
-   break;
}
-   break;
+   if (!found)
+   break;
}
if (i != LEN(matchtbl))
return 0;
+
+   for (i = 0; i < LEN(matchtbl); i++) { /* only if entry is matched */
+   TAILQ_FOREACH(r, matchtbl[i].f, entry) {
+   if (r->random)
+   r->random = random_uniform(r->high - r->low) + 
r->low;
+   }
+   }
+
return 1;
 }
 
+
 static int
-parsefield(const char *field, long low, long high, struct field *f)
+parserange(char *str, long low, long high, struct range *r)
 {
-   int i;
-   char *e1, *e2;
-   const char *p;
-
-   p = field;
-   while (isdigit(*p))
-   p++;
-
-   f->type = ERROR;
-
-   switch (*p) {
-   case '*':
-   if (strcmp(field, "*") == 0) {
-   f->val = NULL;
-   f->len = 0;
-

[hackers] [sbase] cron: fix parsing and '~' behavior || Elie Le Vaillant

2024-12-19 Thread git

commit 12c212e50580cbfa6d929ae444c67732af842cb7
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:36 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 11:40:06 2024 +0100

cron: fix parsing and '~' behavior

In parserange(), we tested wether range was null, to test wether or
not the repeat number was the end of the string (to test if we had
something like "*/3/34").  But it is str that we should be testing,
not range, as its value as a pointer doesn't mean anything in the
current context.

This makes this cron more in line with other interpretations concerning
'~'.  In other crons, a random number is picked for the starting field,
and it doesn't change during all of the program's lifetime, whereas this
one used to change its random number everytime it was matched.

diff --git a/cron.c b/cron.c
index 5d940cd..2f3cedc 100644
--- a/cron.c
+++ b/cron.c
@@ -18,7 +18,7 @@
 #include "util.h"
 
 struct range {
-   long low, high, repeat, random;
+   long low, high, repeat;
TAILQ_ENTRY(range) entry;
 };
 
@@ -204,11 +204,7 @@ matchentry(struct ctabentry *cte, struct tm *tm)
found = 0;
t = matchtbl[i].tm;
TAILQ_FOREACH(r, matchtbl[i].f, entry) {
-   if (r->random)
-   low = r->random;
-   else
-   low = r->low;
-   if (low <= t && r->high >= t && t % r->repeat == 0) {
+   if (r->low <= t && r->high >= t && t % r->repeat == 0) {
found = 1;
break;
}
@@ -219,13 +215,6 @@ matchentry(struct ctabentry *cte, struct tm *tm)
if (i != LEN(matchtbl))
return 0;
 
-   for (i = 0; i < LEN(matchtbl); i++) { /* only if entry is matched */
-   TAILQ_FOREACH(r, matchtbl[i].f, entry) {
-   if (r->random)
-   r->random = random_uniform(r->high - r->low) + 
r->low;
-   }
-   }
-
return 1;
 }
 
@@ -239,8 +228,9 @@ parserange(char *str, long low, long high, struct range *r)
 */
char *range, *repeat, *strlow, *strhigh;
char *e;
+   int random;
 
-   r->random = 0;
+   random = 0;
 
range = strsep(&str, "/");
repeat = strsep(&str, "/");
@@ -249,7 +239,7 @@ parserange(char *str, long low, long high, struct range *r)
 
switch (*range) {
case '~':
-   r->random = 1;
+   random = 1;
case '*': /* fallthru */
if (range[1] != '\0')
return -1;
@@ -281,7 +271,7 @@ parserange(char *str, long low, long high, struct range *r)
if (strhigh) {
if (!*strhigh || strlow != NULL) /* i.e. N~ or 
N~M~... */
return -1;
-   r->random = 1;
+   random = 1;
 
errno = 0;
r->high = strtol(strhigh, &e, 10);
@@ -293,7 +283,7 @@ parserange(char *str, long low, long high, struct range *r)
}
 
if (repeat) {
-   if (!*repeat || range != NULL)
+   if (!*repeat || str != NULL)
return -1;
errno = 0;
r->repeat = strtol(repeat, &e, 10);
@@ -303,9 +293,10 @@ parserange(char *str, long low, long high, struct range *r)
r->repeat = 1;
}
 
-   if (r->random) {
-   /* random replaces low in matchentry(), if it is >0 */
-   r->random = random_uniform(r->high - r->low) + r->low;
+   if (random) {
+   /* random replaces low in matchentry() */
+   r->repeat = r->low; /* so that it doesn't repeat */
+   r->low = random_uniform(r->high - r->low+1) + r->low;
}
 
if (r->low < low || r->low > high || r->high < low || r->high > high || 
r->repeat < low || r->repeat > high) {

[hackers] [sbase] sort: remove useless allocation || Elie Le Vaillant

2024-12-19 Thread git

commit 2e6e8fe4306abe501e572dc29c459e19abeec8c4
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:34 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 11:40:06 2024 +0100

sort: remove useless allocation

I'm not sure why we're doing malloc() then memcpy() here, when
we could just make col->line.data point to start.data. This is costy
for huge sorts (3 time slower than other implementations with no real
reason). Since we are now working with the original line.data we need
to revert the s/\n/\0/ that happens in columns().

diff --git a/sort.c b/sort.c
index fbb1abf..dd992e0 100644
--- a/sort.c
+++ b/sort.c
@@ -123,11 +123,7 @@ columns(struct line *line, const struct keydef *kd, struct 
column *col)
end.len = 1;
}
col->line.len = MAX(0, end.data - start.data);
-   if (!(col->line.data) || col->cap < col->line.len + 1) {
-   free(col->line.data);
-   col->line.data = emalloc(col->line.len + 1);
-   }
-   memcpy(col->line.data, start.data, col->line.len);
+   col->line.data = start.data;
col->line.data[col->line.len] = '\0';
 }
 
@@ -423,6 +419,7 @@ main(int argc, char *argv[])
for (i = 0; i < linebuf.nlines; i++) {
if (!uflag || i == 0 ||
slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) 
{
+   linebuf.lines[i].data[linebuf.lines[i].len-1] = 
'\n';
fwrite(linebuf.lines[i].data, 1,
   linebuf.lines[i].len, ofp);
}

[hackers] [sbase] cron: fix '~' range parsing || Elie Le Vaillant

2024-12-19 Thread git

commit 74a1fbed375bac92df1f3fea36127945f071aa84
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:47 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 11:40:06 2024 +0100

cron: fix '~' range parsing

'~' after number was recognized as abnormal.

diff --git a/cron.c b/cron.c
index 2f3cedc..bd9f247 100644
--- a/cron.c
+++ b/cron.c
@@ -254,7 +254,7 @@ parserange(char *str, long low, long high, struct range *r)
 
errno = 0;
r->low = strtol(strlow, &e, 10);
-   if (*e || errno != 0)
+   if ((*e && *e != '~') || errno != 0)
return -1;
if (strhigh) {
if (!*strhigh || range != NULL) /* i.e. N- or N-M-... */

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit bb367fd6c7a05bd60112ade77098b943a3ed62ef
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 16:20:39 2024 +0100

Move more things around

diff --git a/ubase/fallocate.1 b/linux/fallocate.1
similarity index 100%
rename from ubase/fallocate.1
rename to linux/fallocate.1
diff --git a/ubase/fallocate.c b/linux/fallocate.c
similarity index 100%
rename from ubase/fallocate.c
rename to linux/fallocate.c
diff --git a/ubase/rtc.h b/linux/rtc.h
similarity index 100%
rename from ubase/rtc.h
rename to linux/rtc.h
diff --git a/ubase/arg.h b/ubase/arg.h
deleted file mode 100644
index aeab52a..000
--- a/ubase/arg.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copy me if you can.
- * by 20h
- */
-
-#ifndef ARG_H__
-#define ARG_H__
-
-extern char *argv0;
-
-/* use main(int argc, char *argv[]) */
-#define ARGBEGIN   for (argv0 = *argv, argv++, argc--;\
-   argv[0] && argv[0][0] == '-'\
-   && argv[0][1];\
-   argc--, argv++) {\
-   char argc_;\
-   char **argv_;\
-   int brk_;\
-   if (argv[0][1] == '-' && argv[0][2] == '\0') {\
-   argv++;\
-   argc--;\
-   break;\
-   }\
-   for (brk_ = 0, argv[0]++, argv_ = argv;\
-   argv[0][0] && !brk_;\
-   argv[0]++) {\
-   if (argv_ != argv)\
-   break;\
-   argc_ = argv[0][0];\
-   switch (argc_)
-
-/* Handles obsolete -NUM syntax */
-#define ARGNUM case '0':\
-   case '1':\
-   case '2':\
-   case '3':\
-   case '4':\
-   case '5':\
-   case '6':\
-   case '7':\
-   case '8':\
-   case '9'
-
-#define ARGEND }\
-   }
-
-#define ARGC() argc_
-
-#define ARGNUMF(base)  (brk_ = 1, estrtol(argv[0], (base)))
-
-#define EARGF(x)   ((argv[0][1] == '\0' && argv[1] == NULL)?\
-   ((x), abort(), (char *)0) :\
-   (brk_ = 1, (argv[0][1] != '\0')?\
-   (&argv[0][1]) :\
-   (argc--, argv++, argv[0])))
-
-#define ARGF() ((argv[0][1] == '\0' && argv[1] == NULL)?\
-   (char *)0 :\
-   (brk_ = 1, (argv[0][1] != '\0')?\
-   (&argv[0][1]) :\
-   (argc--, argv++, argv[0])))
-
-#endif
diff --git a/ubase/queue.h b/ubase/queue.h
deleted file mode 100644
index f8f09bf..000
--- a/ubase/queue.h
+++ /dev/null
@@ -1,648 +0,0 @@
-/* $OpenBSD: queue.h,v 1.38 2013/07/03 15:05:21 fgsch Exp $*/
-/* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $   */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *may be used to endorse or promote products derived from this software
- *without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSE

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

2024-12-19 Thread Elie Le Vaillant

Hi,

On Thu Dec 19, 2024 at 12:55 PM CET, Roberto E. Vargas Caballero wrote:
> I am sorry, there are so many patches, just fixing them
> that is impossible to follow anything. I am going to revert
> the changes to cron, but I am going to keep the last version
> of the random functions.
>
> Can you resend your patches for cron with a clean history and
> the current head?

Will do. I made some tweaks to the random functions which improve
them somewhat (better constants, no reentrant versions). I also
added an ifdef that uses arc4random on OpenBSD.

Maybe those aren't essential. The constants are objectively better
(they make the underlying LCG succeed more at spectral tests).
I think it's better to remove code we don't use, and we don't use
the reentrant versions. I'm not sure about arc4random, maybe it's
overkill?

On a somewhat-related note, I have implementations for:
- realpath(1). Not POSIX, but present in most Linuxes and in
  Net, Free and OpenBSD.
- shuf(1). Not POSIX, nor BSD, but it makes sense as the
  alternative would be jot(1) from the BSDs, which I believe
  would render seq(1) useless, and be overcomplicated.
  Simplifies sort(1) (no -R flag), but also has a different
  behavior on identical lines.
- tac(1). Not POSIX nor BSD, but coreutils and toy,busy-box.
  Simplifies tail(1) and sort(1) (no -r flag for either).
- ts(1). Moreutils, toybox and OpenBSD have it. I believe it
  can sometimes be useful.

What patches should I send?

With the sbase-ubase branch/unification, the project has somewhat
shifted in scope (a bit broader). Would it be adapted for this
branch?

I'm willing to work on a dc(1) implementation. Should I use
libzahl?

Cheers,
Elie Le Vaillant

Re: [hackers] [sbase][PATCH] sort: remove useless allocation

2024-12-19 Thread Roberto E . Vargas Caballero

Quoth Elie Le Vaillant :
> I'm not sure why we're doing malloc() then memcpy() here, when
> we could just make col->line.data point to start.data. This is costy
> for huge sorts (3 time slower than other implementations with no real
> reason). Since we are now working with the original line.data we need
> to revert the s/\n/\0/ that happens in columns().
> ---

Applied thanks,

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit 7c207664f8acba7228cf7b9234fded698e07ed84
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 15:38:32 2024 +0100

Move more things around

diff --git a/ubase/mkswap.8 b/linux/mkswap.8
similarity index 100%
rename from ubase/mkswap.8
rename to linux/mkswap.8
diff --git a/ubase/mkswap.c b/linux/mkswap.c
similarity index 100%
rename from ubase/mkswap.c
rename to linux/mkswap.c
diff --git a/ubase/pivot_root.8 b/linux/pivot_root.8
similarity index 100%
rename from ubase/pivot_root.8
rename to linux/pivot_root.8
diff --git a/ubase/pivot_root.c b/linux/pivot_root.c
similarity index 100%
rename from ubase/pivot_root.c
rename to linux/pivot_root.c
diff --git a/ubase/swaplabel.8 b/linux/swaplabel.8
similarity index 100%
rename from ubase/swaplabel.8
rename to linux/swaplabel.8
diff --git a/ubase/swaplabel.c b/linux/swaplabel.c
similarity index 100%
rename from ubase/swaplabel.c
rename to linux/swaplabel.c
diff --git a/ubase/switch_root.8 b/linux/switch_root.8
similarity index 100%
rename from ubase/switch_root.8
rename to linux/switch_root.8
diff --git a/ubase/switch_root.c b/linux/switch_root.c
similarity index 100%
rename from ubase/switch_root.c
rename to linux/switch_root.c

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit 8291582b6ee219c6941dea3de87de1ed3f571636
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 16:09:48 2024 +0100

Move more things around

diff --git a/posix/libutil/concat.c b/libutil/concat.c
similarity index 100%
rename from posix/libutil/concat.c
rename to libutil/concat.c
diff --git a/posix/libutil/cp.c b/libutil/cp.c
similarity index 100%
rename from posix/libutil/cp.c
rename to libutil/cp.c
diff --git a/posix/libutil/crypt.c b/libutil/crypt.c
similarity index 100%
rename from posix/libutil/crypt.c
rename to libutil/crypt.c
diff --git a/posix/libutil/ealloc.c b/libutil/ealloc.c
similarity index 100%
rename from posix/libutil/ealloc.c
rename to libutil/ealloc.c
diff --git a/posix/libutil/enmasse.c b/libutil/enmasse.c
similarity index 100%
rename from posix/libutil/enmasse.c
rename to libutil/enmasse.c
diff --git a/posix/libutil/eprintf.c b/libutil/eprintf.c
similarity index 100%
rename from posix/libutil/eprintf.c
rename to libutil/eprintf.c
diff --git a/posix/libutil/eregcomp.c b/libutil/eregcomp.c
similarity index 100%
rename from posix/libutil/eregcomp.c
rename to libutil/eregcomp.c
diff --git a/posix/libutil/estrtod.c b/libutil/estrtod.c
similarity index 100%
rename from posix/libutil/estrtod.c
rename to libutil/estrtod.c
diff --git a/posix/libutil/fnck.c b/libutil/fnck.c
similarity index 100%
rename from posix/libutil/fnck.c
rename to libutil/fnck.c
diff --git a/posix/libutil/fshut.c b/libutil/fshut.c
similarity index 100%
rename from posix/libutil/fshut.c
rename to libutil/fshut.c
diff --git a/posix/libutil/getlines.c b/libutil/getlines.c
similarity index 100%
rename from posix/libutil/getlines.c
rename to libutil/getlines.c
diff --git a/posix/libutil/human.c b/libutil/human.c
similarity index 100%
rename from posix/libutil/human.c
rename to libutil/human.c
diff --git a/posix/libutil/linecmp.c b/libutil/linecmp.c
similarity index 100%
rename from posix/libutil/linecmp.c
rename to libutil/linecmp.c
diff --git a/posix/libutil/md5.c b/libutil/md5.c
similarity index 100%
rename from posix/libutil/md5.c
rename to libutil/md5.c
diff --git a/posix/libutil/memmem.c b/libutil/memmem.c
similarity index 100%
rename from posix/libutil/memmem.c
rename to libutil/memmem.c
diff --git a/posix/libutil/mkdirp.c b/libutil/mkdirp.c
similarity index 100%
rename from posix/libutil/mkdirp.c
rename to libutil/mkdirp.c
diff --git a/posix/libutil/mode.c b/libutil/mode.c
similarity index 100%
rename from posix/libutil/mode.c
rename to libutil/mode.c
diff --git a/posix/libutil/parseoffset.c b/libutil/parseoffset.c
similarity index 100%
rename from posix/libutil/parseoffset.c
rename to libutil/parseoffset.c
diff --git a/posix/libutil/putword.c b/libutil/putword.c
similarity index 100%
rename from posix/libutil/putword.c
rename to libutil/putword.c
diff --git a/posix/libutil/reallocarray.c b/libutil/reallocarray.c
similarity index 100%
rename from posix/libutil/reallocarray.c
rename to libutil/reallocarray.c
diff --git a/posix/libutil/recurse.c b/libutil/recurse.c
similarity index 100%
rename from posix/libutil/recurse.c
rename to libutil/recurse.c
diff --git a/posix/libutil/rm.c b/libutil/rm.c
similarity index 100%
rename from posix/libutil/rm.c
rename to libutil/rm.c
diff --git a/posix/libutil/sha1.c b/libutil/sha1.c
similarity index 100%
rename from posix/libutil/sha1.c
rename to libutil/sha1.c
diff --git a/posix/libutil/sha224.c b/libutil/sha224.c
similarity index 100%
rename from posix/libutil/sha224.c
rename to libutil/sha224.c
diff --git a/posix/libutil/sha256.c b/libutil/sha256.c
similarity index 100%
rename from posix/libutil/sha256.c
rename to libutil/sha256.c
diff --git a/posix/libutil/sha384.c b/libutil/sha384.c
similarity index 100%
rename from posix/libutil/sha384.c
rename to libutil/sha384.c
diff --git a/posix/libutil/sha512-224.c b/libutil/sha512-224.c
similarity index 100%
rename from posix/libutil/sha512-224.c
rename to libutil/sha512-224.c
diff --git a/posix/libutil/sha512-256.c b/libutil/sha512-256.c
similarity index 100%
rename from posix/libutil/sha512-256.c
rename to libutil/sha512-256.c
diff --git a/posix/libutil/sha512.c b/libutil/sha512.c
similarity index 100%
rename from posix/libutil/sha512.c
rename to libutil/sha512.c
diff --git a/posix/libutil/strcasestr.c b/libutil/strcasestr.c
similarity index 100%
rename from posix/libutil/strcasestr.c
rename to libutil/strcasestr.c
diff --git a/posix/libutil/strlcat.c b/libutil/strlcat.c
similarity index 100%
rename from posix/libutil/strlcat.c
rename to libutil/strlcat.c
diff --git a/posix/libutil/strlcpy.c b/libutil/strlcpy.c
similarity index 100%
rename from posix/libutil/strlcpy.c
rename to libutil/strlcpy.c
diff --git a/posix/libutil/strnsubst.c b/libutil/strnsubst.c
similarity index 100%
rename from posix/libutil/strnsubst.c
rename to libutil/strnsubst.c
diff --git a/posix/libutil/strsep.c b/libutil/strsep.c
similarity index 100%
rename from posix

[hackers] [PATCH] rev, tail: replace hardcoded code by UTF8_POINT macro

2024-12-19 Thread Elie Le Vaillant

---
 rev.c  | 2 +-
 tail.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rev.c b/rev.c
index 2d89df1..9ac1da6 100644
--- a/rev.c
+++ b/rev.c
@@ -25,7 +25,7 @@ rev(FILE *fp)
lf = n && line[n - 1] == '\n';
i = n -= lf;
for (n = 0; i--;) {
-   if ((line[i] & 0xC0) == 0x80) {
+   if (UTF8_POINT(line[i])) {
n++;
} else {
fwrite(line + i, 1, n + 1, stdout);
diff --git a/tail.c b/tail.c
index ce3be9d..bbc5ad5 100644
--- a/tail.c
+++ b/tail.c
@@ -122,7 +122,7 @@ taketail(int fd, const char *fname, size_t count)
case 'm':  /* runes */
for (p = buf + len - 1, left = count; p >= buf; p--) {
/* skip utf-8 continuation bytes */
-   if ((*p & 0xc0) == 0x80)
+   if (UTF8_POINT(*p))
continue;
left--;
if (!left)
-- 
2.47.1

[hackers] [sbase] head: remove useless buffering || Elie Le Vaillant

2024-12-19 Thread git

commit 83182aa959b2100ea0cf6766e6ef3a553877a710
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:44 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 11:58:49 2024 +0100

head: remove useless buffering

getline isn't useful here, because we just need to read then output
lines. We do not need anything more complex than counting '\n's, so
we shouldn't use a buffer like we currently do.

diff --git a/head.c b/head.c
index ae550c0..230ad21 100644
--- a/head.c
+++ b/head.c
@@ -9,15 +9,16 @@
 static void
 head(FILE *fp, const char *fname, size_t n)
 {
-   char *buf = NULL;
-   size_t i = 0, size = 0;
-   ssize_t len;
+   int c;
+   size_t i = 0;
 
-   while (i < n && (len = getline(&buf, &size, fp)) > 0) {
-   fwrite(buf, 1, len, stdout);
-   i += (len && (buf[len - 1] == '\n'));
+   while (i < n && (c = fgetc(fp)) != EOF) {
+   if (putchar(c) == EOF)
+   eprintf("fputc:");
+   if (c == '\n')
+   i++;
}
-   free(buf);
+
if (ferror(fp))
eprintf("getline %s:", fname);
 }

Re: [hackers] [sbase][PATCH] head: remove useless buffering

2024-12-19 Thread Roberto E . Vargas Caballero

Quoth NRK :
> > +   while (i < n && (c = fgetc(fp)) != EOF) {
> > +   if (fputc(c, stdout) == EOF)
> 
> I don't see this as an improvement. Each one of the fgetc and fputc are
> going to go through a mutex lock (along with possibly going through a
> call into a dynamic function).

These kind of things depend so much of the libc implementation that
we should not care about them. The code compiled without -pthread
can link with versions of the library without locks (making getc
equal to getc_unlocked) and in that scenario fputc can be inlined
(in fact, putc that can be used in this case, used to be a macro,
and it is still in some libcs).

This new version seems more idiomatic to me, and it is the usual
way of doing this kind of things. For this reason, I am going to
apply the patch just changing fputc to putc.

Regards,

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

2024-12-19 Thread Roberto E . Vargas Caballero

Quoth Elie Le Vaillant :
> ---
>  cron.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/cron.c b/cron.c
> index e95c661..9da0c8a 100644

I am sorry, there are so many patches, just fixing them
that is impossible to follow anything. I am going to revert
the changes to cron, but I am going to keep the last version
of the random functions.

Can you resend your patches for cron with a clean history and
the current head?

Regards,

[hackers] [sbase] libutil/random: rewrite whole algorithm || Elie Le Vaillant

2024-12-19 Thread git

commit 11c53a1739e17c6fe5fb233e187e35d9600e6c60
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:40 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 12:52:39 2024 +0100

libutil/random: rewrite whole algorithm

libutil/random.c now includes a custom PRNG, which is the PCG family.
It also overhauls random_uniform(), making it way faster. Names were
changed (s/random/rng32/g), and reentrant versions added.
The PRNG is faster than libc's random().
It is way faster than the previous version, which used

diff --git a/libutil/random.c b/libutil/random.c
index 48eeb79..db5e124 100644
--- a/libutil/random.c
+++ b/libutil/random.c
@@ -1,46 +1,77 @@
 #include 
-#include 
+#include 
 #include 
 
+static uint64_t globalstate;
+
 /*
- * Uniformity is achieved by generating new random numbers until the one
- * returned is outside the range [0, 2**32 % upper_bound).  This
- * guarantees the selected random number will be inside
- * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
- * after reduction modulo upper_bound.
- *
- * Copied off OpenBSD (original is arc4random_uniform)
+ * PCG construction
+ * seeding the RNG means merely setting the initial state.
+ * the increment could also be made part of the seed, just make sure it's odd.
  */
 uint32_t
-random_uniform(uint32_t upper_bound)
+rng32_r(uint64_t *state)
 {
-   uint32_t r, min;
-
-   if (upper_bound < 2)
-   return 0;
-
-   /* 2**32 % x == (2**32 - x) % x */
-   min = -upper_bound % upper_bound;
-
-   /*
-* This could theoretically loop forever but each retry has
-* p > 0.5 (worst case, usually far better) of selecting a
-* number inside the range we need, so it should rarely need
-* to re-roll.
-*/
-   for (;;) {
-   r = random(); /* arc4random() is better, but we don't always 
have it */
-   if (r >= min)
-   break;
+   uint64_t oldstate = *state;
+   uint32_t r, v;
+
+   *state *= UINT64_C(0x9E3793492EEDC3F7);
+   *state += 0x1337;
+
+   r = oldstate >> (64 - 5);
+   v = (oldstate ^ (oldstate >> 18)) >> (32 - 5);
+   v = (v >> (-r & 31)) | (v << r);
+   return v;
+}
+
+uint32_t
+rng32(void)
+{
+   return rng32_r(&globalstate);
+}
+
+/*
+ * Based on optimized Lemire's method
+ * https://pcg-random.org/posts/bounded-rands.html
+ */
+uint32_t
+rng32_bounded_r(uint64_t *state, uint32_t range) {
+   uint32_t x = rng32_r(state);
+   uint64_t m = (uint64_t)x * (uint64_t)range;
+   uint32_t l = (uint32_t)m;
+   if (l < range) {
+   uint32_t t = -range;
+   if (t >= range) {
+   t -= range;
+   if (t >= range)
+   t %= range;
+   }
+   while (l < t) {
+   x = rng32_r(state);
+   m = (uint64_t)x * (uint64_t)range;
+   l = (uint32_t)m;
+   }
}
+   return m >> 32;
+}
 
-   return r % upper_bound;
+uint64_t
+rng32_bounded(uint32_t range)
+{
+   return rng32_bounded_r(&globalstate, range);
 }
 
+/* Initialize state with somewhat random number */
 void
-random_seed(void)
+rng32_seed_r(uint64_t *state)
 {
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
-   srandom(ts.tv_nsec); /* not a good source of randomness, but eh */
+   *state = (intptr_t)&printf ^ ts.tv_sec ^ ((unsigned long)ts.tv_nsec * 
0xAC5533CD);
+}
+
+void
+rng32_seed(void)
+{
+   rng32_seed_r(&globalstate);
 }
diff --git a/util.h b/util.h
index 346f6ca..3818fbd 100644
--- a/util.h
+++ b/util.h
@@ -91,3 +91,6 @@ int mkdirp(const char *, mode_t, mode_t);
 #undef memmem
 #define memmem xmemmem
 void *memmem(const void *, size_t, const void *, size_t);
+uint32_t rng32_r(uint64_t*);
+uint32_t rng32(void);
+uint32_t rng32_bounded_r(uint64_t*, uint32_t);

[hackers] [sbase] cron: Revert to version before 6c8dc15 || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit 5f6a3dad770e6e57218c3b82f96a8ed9b7a5a23d
Author: Roberto E. Vargas Caballero 
AuthorDate: Thu Dec 19 12:53:24 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 12:53:24 2024 +0100

cron: Revert to version before 6c8dc15

The changes introduced after 6c8dc15 had some problems and it
was really hard to follow the chain of changes, and for that
reason I took the conservative decision of reverting to the
original code.

diff --git a/cron.c b/cron.c
index bd9f247..77304cc 100644
--- a/cron.c
+++ b/cron.c
@@ -17,13 +17,19 @@
 #include "queue.h"
 #include "util.h"
 
-struct range {
-   long low, high, repeat;
-   TAILQ_ENTRY(range) entry;
+struct field {
+   enum {
+   ERROR,
+   WILDCARD,
+   NUMBER,
+   RANGE,
+   REPEAT,
+   LIST
+   } type;
+   long *val;
+   int len;
 };
 
-TAILQ_HEAD(field, range);
-
 struct ctabentry {
struct field min;
struct field hour;
@@ -196,148 +202,141 @@ matchentry(struct ctabentry *cte, struct tm *tm)
{ .f = &cte->wday, .tm = tm->tm_wday, .len = 7  },
};
size_t i;
-   int found, t;
-   long low;
-   struct range *r;
+   int j;
 
for (i = 0; i < LEN(matchtbl); i++) {
-   found = 0;
-   t = matchtbl[i].tm;
-   TAILQ_FOREACH(r, matchtbl[i].f, entry) {
-   if (r->low <= t && r->high >= t && t % r->repeat == 0) {
-   found = 1;
-   break;
+   switch (matchtbl[i].f->type) {
+   case WILDCARD:
+   continue;
+   case NUMBER:
+   if (matchtbl[i].f->val[0] == matchtbl[i].tm)
+   continue;
+   break;
+   case RANGE:
+   if (matchtbl[i].f->val[0] <= matchtbl[i].tm)
+   if (matchtbl[i].f->val[1] >= matchtbl[i].tm)
+   continue;
+   break;
+   case REPEAT:
+   if (matchtbl[i].tm > 0) {
+   if (matchtbl[i].tm % matchtbl[i].f->val[0] == 0)
+   continue;
+   } else {
+   if (matchtbl[i].len % matchtbl[i].f->val[0] == 
0)
+   continue;
}
-   }
-   if (!found)
break;
+   case LIST:
+   for (j = 0; j < matchtbl[i].f->len; j++)
+   if (matchtbl[i].f->val[j] == matchtbl[i].tm)
+   break;
+   if (j < matchtbl[i].f->len)
+   continue;
+   break;
+   default:
+   break;
+   }
+   break;
}
if (i != LEN(matchtbl))
return 0;
-
return 1;
 }
 
-
 static int
-parserange(char *str, long low, long high, struct range *r)
+parsefield(const char *field, long low, long high, struct field *f)
 {
-   /* range = number |
-* [number] "~" [number] ["/" number] |
-* number "-" number ["/" number]
-*/
-   char *range, *repeat, *strlow, *strhigh;
-   char *e;
-   int random;
-
-   random = 0;
-
-   range = strsep(&str, "/");
-   repeat = strsep(&str, "/");
-   if (!range || !*range)
-   return -1;
+   int i;
+   char *e1, *e2;
+   const char *p;
+
+   p = field;
+   while (isdigit(*p))
+   p++;
+
+   f->type = ERROR;
+
+   switch (*p) {
+   case '*':
+   if (strcmp(field, "*") == 0) {
+   f->val = NULL;
+   f->len = 0;
+   f->type = WILDCARD;
+   } else if (strncmp(field, "*/", 2) == 0) {
+   f->val = emalloc(sizeof(*f->val));
+   f->len = 1;
 
-   switch (*range) {
-   case '~':
-   random = 1;
-   case '*': /* fallthru */
-   if (range[1] != '\0')
-   return -1;
-   r->low = low;
-   r->high = high;
+   errno = 0;
+   f->val[0] = strtol(field + 2, &e1, 10);
+   if (e1[0] != '\0' || errno != 0 || f->val[0] == 0)
+   break;
+
+   f->type = REPEAT;
+   }
break;
-   ARGNUM:
-   strlow = strsep(&range, "-");
-   strhigh = strsep(&range, "-");
-   if (!*strlow) /* i.e. - */
-   return -1;
+   case '\0':
+   f->val = emalloc(sizeof(*f->val))

[hackers] [sbase] libutil: add random.c || Elie Le Vaillant

2024-12-19 Thread git

commit 2677235ed7f86f26c817ca5eca7c730f2418638a
Author: Elie Le Vaillant 
AuthorDate: Fri Dec 6 10:37:35 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 12:28:05 2024 +0100

libutil: add random.c

Some programs need a good PRNG, such as shuf(1), or cron(1).  This adds
to libutil a random_uniform function which simply solves the problem of
creating integers uniformly in a range.  random_seed seeds the
generator.  arc4random would probably be a better PRNG than random, but
it is less portable unfortunately.

diff --git a/Makefile b/Makefile
index e3b6936..8b2728c 100644
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@ LIBUTILOBJ =\
libutil/strsep.o\
libutil/strnsubst.o\
libutil/strtonum.o\
+   libutil/random.o\
libutil/unescape.o\
libutil/writeall.o
 
diff --git a/libutil/random.c b/libutil/random.c
new file mode 100644
index 000..48eeb79
--- /dev/null
+++ b/libutil/random.c
@@ -0,0 +1,46 @@
+#include 
+#include 
+#include 
+
+/*
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound).  This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ *
+ * Copied off OpenBSD (original is arc4random_uniform)
+ */
+uint32_t
+random_uniform(uint32_t upper_bound)
+{
+   uint32_t r, min;
+
+   if (upper_bound < 2)
+   return 0;
+
+   /* 2**32 % x == (2**32 - x) % x */
+   min = -upper_bound % upper_bound;
+
+   /*
+* This could theoretically loop forever but each retry has
+* p > 0.5 (worst case, usually far better) of selecting a
+* number inside the range we need, so it should rarely need
+* to re-roll.
+*/
+   for (;;) {
+   r = random(); /* arc4random() is better, but we don't always 
have it */
+   if (r >= min)
+   break;
+   }
+
+   return r % upper_bound;
+}
+
+void
+random_seed(void)
+{
+   struct timespec ts;
+   clock_gettime(CLOCK_REALTIME, &ts);
+   srandom(ts.tv_nsec); /* not a good source of randomness, but eh */
+}

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit ca9f7351bbd0f8e6f9aa1cceb3e1910220aff399
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 13:40:49 2024 +0100

Move more things around

diff --git a/ubase/libutil/cp.c b/ubase/libutil/cp.c
new file mode 100644
index 000..23275ac
--- /dev/null
+++ b/ubase/libutil/cp.c
@@ -0,0 +1,170 @@
+/* See LICENSE file for copyright and license details. */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../fs.h"
+#include "../util.h"
+
+int cp_aflag  = 0;
+int cp_fflag  = 0;
+int cp_pflag  = 0;
+int cp_rflag  = 0;
+int cp_vflag  = 0;
+int cp_status = 0;
+int cp_follow;
+
+int
+cp(const char *s1, const char *s2, int depth)
+{
+   DIR *dp;
+   int f1, f2, flags = 0;
+   struct dirent *d;
+   struct stat st;
+   struct timespec times[2];
+   ssize_t r;
+   char target[PATH_MAX], ns1[PATH_MAX], ns2[PATH_MAX];
+
+   if (cp_follow == 'P' || (cp_follow == 'H' && depth))
+   flags |= AT_SYMLINK_NOFOLLOW;
+
+   if (fstatat(AT_FDCWD, s1, &st, flags) < 0) {
+   weprintf("stat %s:", s1);
+   cp_status = 1;
+   return 0;
+   }
+
+   if (cp_vflag)
+   printf("%s -> %s\n", s1, s2);
+
+   if (S_ISLNK(st.st_mode)) {
+   if ((r = readlink(s1, target, sizeof(target) - 1)) >= 0) {
+   target[r] = '\0';
+   if (cp_fflag && unlink(s2) < 0 && errno != ENOENT) {
+   weprintf("unlink %s:", s2);
+   cp_status = 1;
+   return 0;
+   } else if (symlink(target, s2) < 0) {
+   weprintf("symlink %s -> %s:", s2, target);
+   cp_status = 1;
+   return 0;
+   }
+   }
+   } else if (S_ISDIR(st.st_mode)) {
+   if (!cp_rflag) {
+   weprintf("%s is a directory\n", s1);
+   cp_status = 1;
+   return 0;
+   }
+   if (!(dp = opendir(s1))) {
+   weprintf("opendir %s:", s1);
+   cp_status = 1;
+   return 0;
+   }
+   if (mkdir(s2, st.st_mode) < 0 && errno != EEXIST) {
+   weprintf("mkdir %s:", s2);
+   cp_status = 1;
+   closedir(dp);
+   return 0;
+   }
+
+   while ((d = readdir(dp))) {
+   if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+   continue;
+
+   estrlcpy(ns1, s1, sizeof(ns1));
+   if (s1[strlen(s1) - 1] != '/')
+   estrlcat(ns1, "/", sizeof(ns1));
+   estrlcat(ns1, d->d_name, sizeof(ns1));
+
+   estrlcpy(ns2, s2, sizeof(ns2));
+   if (s2[strlen(s2) - 1] != '/')
+   estrlcat(ns2, "/", sizeof(ns2));
+   estrlcat(ns2, d->d_name, sizeof(ns2));
+
+   fnck(ns1, ns2, cp, depth + 1);
+   }
+
+   closedir(dp);
+   } else if (cp_aflag && (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) ||
+  S_ISSOCK(st.st_mode) || S_ISFIFO(st.st_mode))) {
+   if (cp_fflag && unlink(s2) < 0 && errno != ENOENT) {
+   weprintf("unlink %s:", s2);
+   cp_status = 1;
+   return 0;
+   } else if (mknod(s2, st.st_mode, st.st_rdev) < 0) {
+   weprintf("mknod %s:", s2);
+   cp_status = 1;
+   return 0;
+   }
+   } else {
+   if ((f1 = open(s1, O_RDONLY)) < 0) {
+   weprintf("open %s:", s1);
+   cp_status = 1;
+   return 0;
+   }
+   if ((f2 = creat(s2, st.st_mode)) < 0 && cp_fflag) {
+   if (unlink(s2) < 0 && errno != ENOENT) {
+   weprintf("unlink %s:", s2);
+   cp_status = 1;
+   close(f1);
+   return 0;
+   }
+   f2 = creat(s2, st.st_mode);
+   }
+   if (f2 < 0) {
+   weprintf("creat %s:", s2);
+   cp_status = 1;
+   close(f1);
+   return 0;
+   }
+   if (concat(f1, s1, f2, s2) < 0) {
+   cp_status = 1;
+   close(f1);
+   close(f2);
+

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit 91aed23a5d5032b4a820a10bd3ddd6dd3968eefa
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 13:30:32 2024 +0100

Move more things around

diff --git a/ubase/libutil/concat.c b/ubase/libutil/concat.c
index ef1e5b9..2e9aa52 100644
--- a/ubase/libutil/concat.c
+++ b/ubase/libutil/concat.c
@@ -1,21 +1,23 @@
 /* See LICENSE file for copyright and license details. */
-#include 
+#include 
 
-#include "../text.h"
 #include "../util.h"
 
-void
-concat(FILE *fp1, const char *s1, FILE *fp2, const char *s2)
+int
+concat(int f1, const char *s1, int f2, const char *s2)
 {
char buf[BUFSIZ];
-   size_t n;
+   ssize_t n;
 
-   while ((n = fread(buf, 1, sizeof(buf), fp1)) > 0) {
-   if (fwrite(buf, 1, n, fp2) != n)
-   eprintf("%s: write error:", s2);
-   if (feof(fp1))
-   break;
+   while ((n = read(f1, buf, sizeof(buf))) > 0) {
+   if (writeall(f2, buf, n) < 0) {
+   weprintf("write %s:", s2);
+   return -2;
+   }
}
-   if (ferror(fp1))
-   eprintf("%s: read error:", s1);
+   if (n < 0) {
+   weprintf("read %s:", s1);
+   return -1;
+   }
+   return 0;
 }

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

2024-12-19 Thread git

commit 6591fb94755d9f3c065add8ace0bf3747157
Author: Roberto E. Vargas Caballero 
AuthorDate: Fri Mar 22 04:32:56 2024 +0100
Commit: Roberto E. Vargas Caballero 
CommitDate: Thu Dec 19 13:35:08 2024 +0100

Move more things around

diff --git a/ubase/libutil/ealloc.c b/ubase/libutil/ealloc.c
index 05bdd62..320865d 100644
--- a/ubase/libutil/ealloc.c
+++ b/ubase/libutil/ealloc.c
@@ -6,42 +6,83 @@
 
 void *
 ecalloc(size_t nmemb, size_t size)
+{
+   return encalloc(1, nmemb, size);
+}
+
+void *
+emalloc(size_t size)
+{
+   return enmalloc(1, size);
+}
+
+void *
+erealloc(void *p, size_t size)
+{
+   return enrealloc(1, p, size);
+}
+
+char *
+estrdup(const char *s)
+{
+   return enstrdup(1, s);
+}
+
+char *
+estrndup(const char *s, size_t n)
+{
+   return enstrndup(1, s, n);
+}
+
+void *
+encalloc(int status, size_t nmemb, size_t size)
 {
void *p;
 
p = calloc(nmemb, size);
if (!p)
-   eprintf("calloc: out of memory\n");
+   enprintf(status, "calloc: out of memory\n");
return p;
 }
 
 void *
-emalloc(size_t size)
+enmalloc(int status, size_t size)
 {
void *p;
 
p = malloc(size);
if (!p)
-   eprintf("malloc: out of memory\n");
+   enprintf(status, "malloc: out of memory\n");
return p;
 }
 
 void *
-erealloc(void *p, size_t size)
+enrealloc(int status, void *p, size_t size)
 {
p = realloc(p, size);
if (!p)
-   eprintf("realloc: out of memory\n");
+   enprintf(status, "realloc: out of memory\n");
return p;
 }
 
 char *
-estrdup(const char *s)
+enstrdup(int status, const char *s)
 {
char *p;
 
p = strdup(s);
if (!p)
-   eprintf("strdup: out of memory\n");
+   enprintf(status, "strdup: out of memory\n");
+   return p;
+}
+
+char *
+enstrndup(int status, const char *s, size_t n)
+{
+   char *p;
+
+   p = strndup(s, n);
+   if (!p)
+   enprintf(status, "strndup: out of memory\n");
return p;
 }
diff --git a/ubase/libutil/putword.c b/ubase/libutil/putword.c
index c460703..80a9860 100644
--- a/ubase/libutil/putword.c
+++ b/ubase/libutil/putword.c
@@ -4,13 +4,13 @@
 #include "../util.h"
 
 void
-putword(const char *s)
+putword(FILE *fp, const char *s)
 {
static int first = 1;
 
if (!first)
-   putchar(' ');
+   fputc(' ', fp);
 
-   fputs(s, stdout);
+   fputs(s, fp);
first = 0;
 }
diff --git a/ubase/libutil/recurse.c b/ubase/libutil/recurse.c
index 318987d..e66efaf 100644
--- a/ubase/libutil/recurse.c
+++ b/ubase/libutil/recurse.c
@@ -1,5 +1,7 @@
 /* See LICENSE file for copyright and license details. */
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -8,35 +10,99 @@
 #include 
 #include 
 
+#include "../fs.h"
 #include "../util.h"
 
+int recurse_status = 0;
+
 void
-recurse(const char *path, void (*fn)(const char *))
+recurse(int dirfd, const char *name, void *data, struct recursor *r)
 {
-   char buf[PATH_MAX];
struct dirent *d;
-   struct stat st;
+   struct history *new, *h;
+   struct stat st, dst;
DIR *dp;
+   int flags = 0, fd;
+   size_t pathlen = r->pathlen;
+
+   if (dirfd == AT_FDCWD)
+   pathlen = estrlcpy(r->path, name, sizeof(r->path));
+
+   if (r->follow == 'P' || (r->follow == 'H' && r->depth))
+   flags |= AT_SYMLINK_NOFOLLOW;
 
-   if (lstat(path, &st) == -1 || S_ISDIR(st.st_mode) == 0)
+   if (fstatat(dirfd, name, &st, flags) < 0) {
+   if (!(r->flags & SILENT)) {
+   weprintf("stat %s:", r->path);
+   recurse_status = 1;
+   }
return;
+   }
+   if (!S_ISDIR(st.st_mode)) {
+   r->fn(dirfd, name, &st, data, r);
+   return;
+   }
 
-   if (!(dp = opendir(path)))
-   eprintf("opendir %s:", path);
-
-   while ((d = readdir(dp))) {
-   if (strcmp(d->d_name, ".") == 0 ||
-   strcmp(d->d_name, "..") == 0)
-   continue;
-   if (strlcpy(buf, path, sizeof(buf)) >= sizeof(buf))
-   eprintf("path too long\n");
-   if (buf[strlen(buf) - 1] != '/')
-   if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
-   eprintf("path too long\n");
-   if (strlcat(buf, d->d_name, sizeof(buf)) >= sizeof(buf))
-   eprintf("path too long\n");
-   fn(buf);
+   new = emalloc(sizeof(struct history));
+   new->prev  = r->hist;
+   r->hist= new;
+   new->dev   = st.st_dev;
+   new->ino   = st.st_ino;
+
+   for (h = new->prev; h; h = h->prev)
+   if (h->ino == st.st_ino && h->dev == st.st_dev)
+   return;
+
+   if (!r->depth && (r->

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

2024-12-19 Thread Roberto E . Vargas Caballero

Hi,

Quoth Elie Le Vaillant :
> Will do. I made some tweaks to the random functions which improve
> them somewhat (better constants, no reentrant versions). I also
> added an ifdef that uses arc4random on OpenBSD.

That seems nice.

> 
> Maybe those aren't essential. The constants are objectively better
> (they make the underlying LCG succeed more at spectral tests).
> I think it's better to remove code we don't use, and we don't use
> the reentrant versions. I'm not sure about arc4random, maybe it's
> overkill?

I think here we should try to think if we need a so good PRNG
or we can live with normal random(). I think using things like
arc4random makes more sense when you have to deal with crypto
stuff, that as far as I know it is not our case. Saying that,
I applied the first versions of your random functions but I
was not very sure about them for the reasons that I commented
before (and having less code is a good thing).

> On a somewhat-related note, I have implementations for:
> - realpath(1). Not POSIX, but present in most Linuxes and in
>   Net, Free and OpenBSD.
> - shuf(1). Not POSIX, nor BSD, but it makes sense as the
>   alternative would be jot(1) from the BSDs, which I believe
>   would render seq(1) useless, and be overcomplicated.
>   Simplifies sort(1) (no -R flag), but also has a different
>   behavior on identical lines.
> - tac(1). Not POSIX nor BSD, but coreutils and toy,busy-box.
>   Simplifies tail(1) and sort(1) (no -r flag for either).
> - ts(1). Moreutils, toybox and OpenBSD have it. I believe it
>   can sometimes be useful.

I would delay these until the sbase-ubase unification is done.
We are very, very, very far to have a working branch, I am just
clasifying things, and I can tell you that trying to compile
anything in that branch will fail. Sadly, I don't have so much
time as I had in the past, and I have to many projects to maintain,
so I think the unification will take time.

> I'm willing to work on a dc(1) implementation. Should I use
> libzahl?

That seems nice. I have a bc version almost finished that uses
the classical bc-dc pipe. If you write dc then I can resume my
work in bc and try to have it working with your dc version. I am
not sure about using libzahl, because that would mean to have a
dependency with libzahl and I am not sure about it. I would suggest
you to read the original paper with the description of dc, where
they explain very well how they implemented the original version,
and it does not seem too complex.

Regards,

Re: [hackers] [sbase] head: remove useless buffering || Elie Le Vaillant

2024-12-19 Thread Hiltjo Posthuma

On Thu, Dec 19, 2024 at 11:59:35AM +0100, g...@suckless.org wrote:
> commit 83182aa959b2100ea0cf6766e6ef3a553877a710
> Author: Elie Le Vaillant 
> AuthorDate: Fri Dec 6 10:37:44 2024 +0100
> Commit: Roberto E. Vargas Caballero 
> CommitDate: Thu Dec 19 11:58:49 2024 +0100
> 
> head: remove useless buffering
> 
> getline isn't useful here, because we just need to read then output
> lines. We do not need anything more complex than counting '\n's, so
> we shouldn't use a buffer like we currently do.
> 

>From a simplicity standpoint you're right, but this might also have performance
implications.

Was this tested?

> diff --git a/head.c b/head.c
> index ae550c0..230ad21 100644
> --- a/head.c
> +++ b/head.c
> @@ -9,15 +9,16 @@
>  static void
>  head(FILE *fp, const char *fname, size_t n)
>  {
> - char *buf = NULL;
> - size_t i = 0, size = 0;
> - ssize_t len;
> + int c;
> + size_t i = 0;
>  
> - while (i < n && (len = getline(&buf, &size, fp)) > 0) {
> - fwrite(buf, 1, len, stdout);
> - i += (len && (buf[len - 1] == '\n'));
> + while (i < n && (c = fgetc(fp)) != EOF) {
> + if (putchar(c) == EOF)
> + eprintf("fputc:");
> + if (c == '\n')
> + i++;
>   }
> - free(buf);
> +
>   if (ferror(fp))
>   eprintf("getline %s:", fname);
>  }
> 

-- 
Kind regards,
Hiltjo

[hackers] [PATCH 1/2] Add libutil/random.c for fast PCG-based PRNG

[hackers] [PATCH 2/2] cron: heavy refactor of parsefield() and matchentry()

Re: [hackers] [sbase][PATCH] cron: fix parsing and '~' behavior

Re: [hackers] [sbase] tar: fix long names crashing tar archiving

[hackers] [sbase] cron: heavy refactor of parsefield() and matchentry() || Elie Le Vaillant

[hackers] [sbase] cron: fix parsing and '~' behavior || Elie Le Vaillant

[hackers] [sbase] sort: remove useless allocation || Elie Le Vaillant

[hackers] [sbase] cron: fix '~' range parsing || Elie Le Vaillant

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

Re: [hackers] [sbase][PATCH] sort: remove useless allocation

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

[hackers] [PATCH] rev, tail: replace hardcoded code by UTF8_POINT macro

[hackers] [sbase] head: remove useless buffering || Elie Le Vaillant

Re: [hackers] [sbase][PATCH] head: remove useless buffering

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

[hackers] [sbase] libutil/random: rewrite whole algorithm || Elie Le Vaillant

[hackers] [sbase] cron: Revert to version before 6c8dc15 || Roberto E. Vargas Caballero

[hackers] [sbase] libutil: add random.c || Elie Le Vaillant

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

[hackers] [sbase/ubase-merge] Move more things around || Roberto E. Vargas Caballero

Re: [hackers] [sbase][PATCH] cron: fix repeat for random, and format code

Re: [hackers] [sbase] head: remove useless buffering || Elie Le Vaillant

25 matches

Site Navigation

Mail list logo

Footer information