From: Masami Hiramatsu (Google) <[email protected]>

Skip invalid sub-buffers when validating the persistent ring buffer
instead of discarding the entire ring buffer. Only skipped buffers
are invalidated (cleared).

If the cache data in memory fails to be synchronized during a reboot,
the persistent ring buffer may become partially corrupted, but other
sub-buffers may still contain readable event data. Only discard the
subbuffersa that ar found to be corrupted.

Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
  Changes in v7:
  - Combined with Handling RB_MISSED_* flags patch, focus on validation at boot.
  - Remove checking subbuffer data when validating metadata, because it should 
be done
    later.
  - Do not mark the discarded sub buffer page but just reset it.
  Changes in v6:
  - Show invalid page detection message once per CPU.
  Changes in v5:
  - Instead of showing errors for each page, just show the number
    of discarded pages at last.
  Changes in v3:
  - Record missed data event on commit.
---
 kernel/trace/ring_buffer.c |   63 +++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b6f3ac99834f..8599de5cf59b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -396,6 +396,12 @@ static __always_inline unsigned int rb_page_commit(struct 
buffer_page *bpage)
        return local_read(&bpage->page->commit);
 }
 
+/* Size is determined by what has been committed */
+static __always_inline unsigned int rb_page_size(struct buffer_page *bpage)
+{
+       return rb_page_commit(bpage) & ~RB_MISSED_MASK;
+}
+
 static void free_buffer_page(struct buffer_page *bpage)
 {
        /* Range pages are not to be freed */
@@ -1819,7 +1825,7 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta 
*meta, int cpu,
 
        bitmap_clear(subbuf_mask, 0, meta->nr_subbufs);
 
-       /* Is the meta buffers and the subbufs themselves have correct data? */
+       /* Is the meta buffers themselves have correct data? */
        for (i = 0; i < meta->nr_subbufs; i++) {
                if (meta->buffers[i] < 0 ||
                    meta->buffers[i] >= meta->nr_subbufs) {
@@ -1827,11 +1833,6 @@ static bool rb_cpu_meta_valid(struct 
ring_buffer_cpu_meta *meta, int cpu,
                        return false;
                }
 
-               if ((unsigned)local_read(&subbuf->commit) > subbuf_size) {
-                       pr_info("Ring buffer boot meta [%d] buffer invalid 
commit\n", cpu);
-                       return false;
-               }
-
                if (test_bit(meta->buffers[i], subbuf_mask)) {
                        pr_info("Ring buffer boot meta [%d] array has 
duplicates\n", cpu);
                        return false;
@@ -1902,13 +1903,16 @@ static int rb_read_data_buffer(struct buffer_data_page 
*dpage, int tail, int cpu
        return events;
 }
 
-static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
+static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
+                             struct ring_buffer_cpu_meta *meta)
 {
        unsigned long long ts;
        u64 delta;
        int tail;
 
        tail = local_read(&dpage->commit);
+       if (tail <= 0 || tail > meta->subbuf_size)
+               return -1;
        return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
 }
 
@@ -1919,6 +1923,7 @@ static void rb_meta_validate_events(struct 
ring_buffer_per_cpu *cpu_buffer)
        struct buffer_page *head_page, *orig_head;
        unsigned long entry_bytes = 0;
        unsigned long entries = 0;
+       int discarded = 0;
        int ret;
        u64 ts;
        int i;
@@ -1929,13 +1934,13 @@ static void rb_meta_validate_events(struct 
ring_buffer_per_cpu *cpu_buffer)
        orig_head = head_page = cpu_buffer->head_page;
 
        /* Do the reader page first */
-       ret = rb_validate_buffer(cpu_buffer->reader_page->page, 
cpu_buffer->cpu);
+       ret = rb_validate_buffer(cpu_buffer->reader_page->page, 
cpu_buffer->cpu, meta);
        if (ret < 0) {
                pr_info("Ring buffer reader page is invalid\n");
                goto invalid;
        }
        entries += ret;
-       entry_bytes += local_read(&cpu_buffer->reader_page->page->commit);
+       entry_bytes += rb_page_size(cpu_buffer->reader_page);
        local_set(&cpu_buffer->reader_page->entries, ret);
 
        ts = head_page->page->time_stamp;
@@ -1964,7 +1969,7 @@ static void rb_meta_validate_events(struct 
ring_buffer_per_cpu *cpu_buffer)
                        break;
 
                /* Stop rewind if the page is invalid. */
-               ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+               ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, 
meta);
                if (ret < 0)
                        break;
 
@@ -2043,21 +2048,24 @@ static void rb_meta_validate_events(struct 
ring_buffer_per_cpu *cpu_buffer)
                if (head_page == cpu_buffer->reader_page)
                        continue;
 
-               ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+               ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, 
meta);
                if (ret < 0) {
-                       pr_info("Ring buffer meta [%d] invalid buffer page\n",
-                               cpu_buffer->cpu);
-                       goto invalid;
-               }
-
-               /* If the buffer has content, update pages_touched */
-               if (ret)
-                       local_inc(&cpu_buffer->pages_touched);
-
-               entries += ret;
-               entry_bytes += local_read(&head_page->page->commit);
-               local_set(&cpu_buffer->head_page->entries, ret);
+                       if (!discarded)
+                               pr_info("Ring buffer meta [%d] invalid buffer 
page detected\n",
+                                       cpu_buffer->cpu);
+                       discarded++;
+                       /* Instead of discard whole ring buffer, discard only 
this sub-buffer. */
+                       local_set(&head_page->entries, 0);
+                       local_set(&head_page->page->commit, RB_MISSED_EVENTS);
+               } else {
+                       /* If the buffer has content, update pages_touched */
+                       if (ret)
+                               local_inc(&cpu_buffer->pages_touched);
 
+                       entries += ret;
+                       entry_bytes += rb_page_size(head_page);
+                       local_set(&cpu_buffer->head_page->entries, ret);
+               }
                if (head_page == cpu_buffer->commit_page)
                        break;
        }
@@ -2071,7 +2079,8 @@ static void rb_meta_validate_events(struct 
ring_buffer_per_cpu *cpu_buffer)
        local_set(&cpu_buffer->entries, entries);
        local_set(&cpu_buffer->entries_bytes, entry_bytes);
 
-       pr_info("Ring buffer meta [%d] is from previous boot!\n", 
cpu_buffer->cpu);
+       pr_info("Ring buffer meta [%d] is from previous boot! (%d pages 
discarded)\n",
+               cpu_buffer->cpu, discarded);
        return;
 
  invalid:
@@ -3258,12 +3267,6 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
        return NULL;
 }
 
-/* Size is determined by what has been committed */
-static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
-{
-       return rb_page_commit(bpage) & ~RB_MISSED_MASK;
-}
-
 static __always_inline unsigned
 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
 {


Reply via email to