On 01/17/17 09:34, Denys Vlasenko wrote:
> 
> 
> On 01/17/2017 06:15 PM, Theodore Ts'o wrote:
>> On Tue, Jan 17, 2017 at 09:21:31AM +0100, Denys Vlasenko wrote:
>>>> If someone wants to send me a patch, I'll happily take a look at it,
>>>
>>> Will something along these lines be accepted?
>>
>> The problem is that this won't work.  In the cases that we're talking
>> about, the entropy counter in the secondary pool is not zero, but
>> close to zero, we'll still have short reads.  And that's going to
>> happen a fair amount of the time.
>>
>> Perhaps the best *hacky* solution would be to say, ok if the entropy
>> count is less than some threshold, don't use the correct entropy
>> calculation, but rather assume that all of the new bits won't land on
>> top of existing entropy bits.
> 
> IOW, something like this:
> 
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -653,6 +653,9 @@ static void credit_entropy_bits(struct
> entropy_store *r, int nbits)
>         if (nfrac < 0) {
>                 /* Debit */
>                 entropy_count += nfrac;
> +       } else if (entropy_count < ((8 * 8) << ENTROPY_SHIFT)) {
> +               /* Credit, and the pool is almost empty */
> +               entropy_count += nfrac;
>         } else {
>                 /*
>                  * Credit: we have to account for the possibility of
>                  * overwriting already present entropy.  Even in the
> 
> Want the patch? If yes, what name of the constant you prefer? How about
> 

This seems very wrong.  The whole point is that we keep it conservative
-- always less than or equal to the correct number.  You chould derate
the value based on the top part of the threshold using a more
conservative constant (using smaller fill steps) than the 3/4 used in
the current derating algorithm, but first of all, you would only recover
<= 1/4 of the credit in the first place, so it is questionable if it
really buys you all that much.

I really, really would hate to see something that introduces an active
error to cope with a broken application somewhere.

> On Mon, Jan 16, 2017 at 07:50:55PM +0100, Denys Vlasenko wrote:
>>
>> /dev/random can legitimately returns short reads
>> when there is not enough entropy for the full request.
> 
> Yes, but callers of /dev/random should be able to handle short reads.
> So it's a bug in the application as well.

It's not a bug in the application "as well", it is a bug in the
application, *period*.  There are a number of other conditions which
could cause this exact effect.

If there is a real need to hack around this, then I would instead
suggest modifying random_read() to block rather than return if the user
requests below a certain value, O_NONBLOCK is not set, and the whole
request cannot be fulfilled.  It probably needs to be a sysctl
configurable, though, and most likely defaulting to 1, as it could just
as easily break properly functioning applications.

A *completely* untested patch attached...

        -hpa

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1ef2640..618ca9b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -320,6 +320,13 @@ static int random_write_wakeup_bits = 28 * 
OUTPUT_POOL_WORDS;
 static int random_min_urandom_seed = 60;
 
 /*
+ * If /dev/random can't fulfil a request, block unless we can return
+ * this many bytes.  If O_NONBLOCK is set, we always return,
+ * unconditionally.
+ */
+static int random_min_return = 1;
+
+/*
  * Originally, we used a primitive polynomial of degree .poolwords
  * over GF(2).  The taps for various sizes are defined below.  They
  * were chosen to be evenly spaced except for the last tap, which is 1
@@ -1702,24 +1709,26 @@ static ssize_t
 _random_read(int nonblock, char __user *buf, size_t nbytes)
 {
        ssize_t n;
+       size_t done = 0;
 
        if (nbytes == 0)
                return 0;
 
        nbytes = min_t(size_t, nbytes, SEC_XFER_SIZE);
-       while (1) {
-               n = extract_entropy_user(&blocking_pool, buf, nbytes);
+       while (done < nbytes) {
+               n = extract_entropy_user(&blocking_pool, buf, nbytes-done);
                if (n < 0)
-                       return n;
+                       return done ? done : n;
                trace_random_read(n*8, (nbytes-n)*8,
                                  ENTROPY_BITS(&blocking_pool),
                                  ENTROPY_BITS(&input_pool));
-               if (n > 0)
-                       return n;
+               done += n;
+               if (done >= min_t(size_t, nbytes, random_min_read))
+                       break;
 
                /* Pool is (near) empty.  Maybe wait and retry. */
                if (nonblock)
-                       return -EAGAIN;
+                       return done ? done : -EAGAIN;
 
                wait_event_interruptible(random_read_wait,
                        ENTROPY_BITS(&input_pool) >=
@@ -1727,6 +1736,7 @@ _random_read(int nonblock, char __user *buf, size_t 
nbytes)
                if (signal_pending(current))
                        return -ERESTARTSYS;
        }
+       return done;
 }
 
 static ssize_t
@@ -1909,6 +1919,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, 
count,
 
 #include <linux/sysctl.h>
 
+static int min_random_min_read = 1;
+static int max_random_min_read = SEC_XFER_SIZE;
 static int min_read_thresh = 8, min_write_thresh;
 static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
 static int max_write_thresh = INPUT_POOL_WORDS * 32;
@@ -2022,6 +2034,15 @@ struct ctl_table random_table[] = {
                .mode           = 0444,
                .proc_handler   = proc_do_uuid,
        },
+       {
+               .procname       = "random_min_return",
+               .data           = &random_min_return,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &min_random_min_read,
+               .extra2         = &max_random_min_read,
+       },
 #ifdef ADD_INTERRUPT_BENCH
        {
                .procname       = "add_interrupt_avg_cycles",

Reply via email to