Hello,

It looks like powertop requires a kernel patch to display VFS ops/sec.
The last patch was for kernel v3.3.0. The patch needs to be updated,
otherwise powertop always displays 0 VFS ops/sec.

Considering changes starting with kernel v3.9, the kernel side patch
would look like as below. I have tested it with powertop and
linux-linaro-tracking branch (v3.14.0) and with this patch powertop
displays and updates VFS ops/sec properly.

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5584bea..c6a9832 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1141,6 +1141,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        if ((inode->i_state & flags) == flags)
                return;

+       if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES))
+               trace_writeback_inode_dirty(inode, flags);
+
        if (unlikely(block_dump > 1))
                block_dump___mark_inode_dirty(inode);

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 464ea82..7956d8f 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -101,6 +101,35 @@ DEFINE_EVENT(writeback_dirty_inode_template,
writeback_dirty_inode,
        TP_ARGS(inode, flags)
 );

+/*
+ * Tracepoint for dirtying an inode; used by PowerTOP
+ */
+TRACE_EVENT(writeback_inode_dirty,
+
+       TP_PROTO(struct inode *inode, int flags),
+
+       TP_ARGS(inode, flags),
+
+       TP_STRUCT__entry(
+                __field(        __kernel_dev_t, dev             )
+                __field(        ino_t,          ino             )
+                __field(        u32,            flags           )
+        ),
+
+       TP_fast_assign(
+                __entry->dev    = inode->i_sb->s_dev;
+                __entry->ino    = inode->i_ino;
+                __entry->flags  = flags;
+        ),
+
+       TP_printk("dev %d:%d ino %lu flags %d %s", MAJOR(__entry->dev),
+                 MINOR(__entry->dev),
+                  (unsigned long) __entry->ino,
+                  __entry->flags,
+                 show_inode_state(__entry->flags)
+       )
+);
+
 DECLARE_EVENT_CLASS(writeback_write_inode_template,

        TP_PROTO(struct inode *inode, struct writeback_control *wbc),


However, there is an alternative thought.

Starting with kernel v3.9, kernel provides a built in trace event
called - "writeback_dirty_inode" -

DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,

        TP_PROTO(struct inode *inode, int flags),

        TP_ARGS(inode, flags)
);


But since the event powertop expects is "writeback_inode_dirty" and
the names and output format don't match,
to take advantage of the built-in kernel trace event, the below two
patches together make it work.

powertop side patch is necessary to match the TRACE_EVENT name. Also,
checking for dev > 0 is not necessary,
 since now there is name instead of device major and minor number in
kernel trace.

kernel side patch is necessary to account for DIRTY_PAGES.


powertop side patch -

diff --git a/src/process/do_process.cpp b/src/process/do_process.cpp
index ba164eb..2e2cbea 100644
--- a/src/process/do_process.cpp
+++ b/src/process/do_process.cpp
@@ -625,21 +625,14 @@ void
perf_process_bundle::handle_trace_point(void *trace, int cpu, uint64_t
time
                        consumer->gpu_ops++;
                }
        }
-       else if (strcmp(event->name, "writeback_inode_dirty") == 0) {
-               static uint64_t prev_time;
+       else if (strcmp(event->name, "writeback_dirty_inode") == 0) {
+               static uint64_t prev_time = 0;
                class power_consumer *consumer = NULL;
-               int dev;

                consumer = current_consumer(cpu);

-               ret = pevent_get_field_val(NULL, event, "dev", &rec, &val, 0);
-               if (ret < 0)
-
-                       return;
-               dev = (int)val;
-
                if (consumer && strcmp(consumer->name(),
-                       "process")==0 && dev > 0) {
+                       "process")==0) {

                        consumer->disk_hits++;

@@ -674,7 +667,7 @@ void start_process_measurement(void)
                perf_events->add_event("workqueue:workqueue_execute_end");
                perf_events->add_event("i915:i915_gem_ring_dispatch");
                perf_events->add_event("i915:i915_gem_request_submit");
-               perf_events->add_event("writeback:writeback_inode_dirty");
+               perf_events->add_event("writeback:writeback_dirty_inode");
        }

        first_stamp = ~0ULL;


kernel side patch -

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5584bea..c2ab700 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1141,6 +1141,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        if ((inode->i_state & flags) == flags)
                return;

+       if (flags & I_DIRTY_PAGES)
+               trace_writeback_dirty_inode(inode, flags);
+
        if (unlikely(block_dump > 1))
                block_dump___mark_inode_dirty(inode);


I have tested both solutions and both displays and updates VFS
ops/sec. However, since second solution changes powertop,
that means powertop won't work with pre v3.9 kernel.


Any comment or thought on this?



--
Thanks,
-Meraj

_______________________________________________
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev

Reply via email to