On Thu, 2010-03-04 at 09:00 +0800, Zhang, Yanmin wrote:
> On Wed, 2010-03-03 at 11:15 +0100, Peter Zijlstra wrote:
> > On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote:
> > > -#ifndef perf_misc_flags
> > > -#define perf_misc_flags(regs)  (user_mode(regs) ? PERF_RECORD_MISC_USER 
> > > : \
> > > -                                PERF_RECORD_MISC_KERNEL)
> > > -#define perf_instruction_pointer(regs) instruction_pointer(regs)
> > > -#endif 
> > 
> > Ah, that #ifndef is for powerpc, which I think you just broke.
> Thanks for the reminder. I deleted powerpc codes when building cscope
> lib.
> 
> It seems perf_save_virt_ip/perf_reset_virt_ip interfaces are ugly. I plan to
> change them to a callback function struct and kvm registers its version to 
> perf.
> 
> Such like:
> struct perf_guest_info_callbacks {
>       int (*is_in_guest)();
>       u64 (*get_guest_ip)();
>       int (*copy_guest_stack)();
>       int (*reset_in_guest)();
>       ...
> };
> int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *);
> int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *);
> 
> It's more scalable and neater.
In case you guys might lose patience, I worked out a new patch against 
2.6.34-rc1.

It could work with:
#perf kvm --guest --guestkallsyms /guest/os/kernel/proc/kallsyms --guestmodules
/guest/os/proc/modules top
It also support to collect both host side and guest side at the same time:
#perf kvm --host --guest --guestkallsyms /guest/os/kernel/proc/kallsyms 
--guestmodules
/guest/os/proc/modules top

The first output line of top has guest kernel/user space percentage.

Or just host side:
#perf kvm --host

As tool perf source codes have lots of changes, I am still working on perf kvm 
record
and report.

---

diff -Nraup linux-2.6.34-rc1/arch/x86/include/asm/ptrace.h 
linux-2.6.34-rc1_work/arch/x86/include/asm/ptrace.h
--- linux-2.6.34-rc1/arch/x86/include/asm/ptrace.h      2010-03-09 
13:04:20.730596079 +0800
+++ linux-2.6.34-rc1_work/arch/x86/include/asm/ptrace.h 2010-03-10 
17:06:34.228953260 +0800
@@ -167,6 +167,15 @@ static inline int user_mode(struct pt_re
 #endif
 }
 
+static inline int user_mode_cs(u16 cs)
+{
+#ifdef CONFIG_X86_32
+       return (cs & SEGMENT_RPL_MASK) == USER_RPL;
+#else
+       return !!(cs & 3);
+#endif
+}
+
 static inline int user_mode_vm(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
diff -Nraup linux-2.6.34-rc1/arch/x86/kvm/vmx.c 
linux-2.6.34-rc1_work/arch/x86/kvm/vmx.c
--- linux-2.6.34-rc1/arch/x86/kvm/vmx.c 2010-03-09 13:04:20.758593132 +0800
+++ linux-2.6.34-rc1_work/arch/x86/kvm/vmx.c    2010-03-10 17:11:49.709019136 
+0800
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
 #include <linux/ftrace_event.h>
+#include <linux/perf_event.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -3632,6 +3633,43 @@ static void update_cr8_intercept(struct 
        vmcs_write32(TPR_THRESHOLD, irr);
 }
 
+DEFINE_PER_CPU(int, kvm_in_guest) = {0};
+
+static void kvm_set_in_guest(void)
+{
+       percpu_write(kvm_in_guest, 1);
+}
+
+static int kvm_is_in_guest(void)
+{
+       return percpu_read(kvm_in_guest);
+}
+
+static int kvm_is_user_mode(void)
+{
+       int user_mode;
+       user_mode = user_mode_cs(vmcs_read16(GUEST_CS_SELECTOR));
+       return user_mode;
+}
+
+static u64 kvm_get_guest_ip(void)
+{
+       return vmcs_readl(GUEST_RIP);
+}
+
+static void kvm_reset_in_guest(void)
+{
+       if (percpu_read(kvm_in_guest))
+               percpu_write(kvm_in_guest, 0);
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+       .is_in_guest            = kvm_is_in_guest,
+       .is_user_mode           = kvm_is_user_mode,
+       .get_guest_ip           = kvm_get_guest_ip,
+       .reset_in_guest         = kvm_reset_in_guest
+};
+
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
        u32 exit_intr_info;
@@ -3653,8 +3691,11 @@ static void vmx_complete_interrupts(stru
 
        /* We need to handle NMIs before interrupts are enabled */
        if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK))
+               (exit_intr_info & INTR_INFO_VALID_MASK)) {
+               kvm_set_in_guest();
                asm("int $2");
+               kvm_reset_in_guest();
+       }
 
        idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
@@ -4251,6 +4292,8 @@ static int __init vmx_init(void)
        if (bypass_guest_pf)
                kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
 
+       perf_register_guest_info_callbacks(&kvm_guest_cbs);
+
        return 0;
 
 out3:
@@ -4266,6 +4309,8 @@ out:
 
 static void __exit vmx_exit(void)
 {
+       perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+
        free_page((unsigned long)vmx_msr_bitmap_legacy);
        free_page((unsigned long)vmx_msr_bitmap_longmode);
        free_page((unsigned long)vmx_io_bitmap_b);
diff -Nraup linux-2.6.34-rc1/include/linux/perf_event.h 
linux-2.6.34-rc1_work/include/linux/perf_event.h
--- linux-2.6.34-rc1/include/linux/perf_event.h 2010-03-09 13:04:28.905944253 
+0800
+++ linux-2.6.34-rc1_work/include/linux/perf_event.h    2010-03-10 
17:06:34.228953260 +0800
@@ -287,11 +287,13 @@ struct perf_event_mmap_page {
        __u64   data_tail;              /* user-space written tail */
 };
 
-#define PERF_RECORD_MISC_CPUMODE_MASK          (3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_MASK          (7 << 0)
 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN       (0 << 0)
 #define PERF_RECORD_MISC_KERNEL                        (1 << 0)
 #define PERF_RECORD_MISC_USER                  (2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR            (3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL          (4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER            (5 << 0)
 
 struct perf_event_header {
        __u32   type;
@@ -439,6 +441,13 @@ enum perf_callchain_context {
 # include <asm/perf_event.h>
 #endif
 
+struct perf_guest_info_callbacks {
+       int (*is_in_guest) (void);
+       int (*is_user_mode) (void);
+       u64 (*get_guest_ip) (void);
+       void (*reset_in_guest) (void);
+};
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <asm/hw_breakpoint.h>
 #endif
@@ -849,6 +858,10 @@ static inline void perf_event_mmap(struc
                __perf_event_mmap(vma);
 }
 
+extern u64 perf_instruction_pointer(struct pt_regs *regs);
+int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *);
+int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *);
+
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
@@ -862,12 +875,6 @@ extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int 
entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
-#ifndef perf_misc_flags
-#define perf_misc_flags(regs)  (user_mode(regs) ? PERF_RECORD_MISC_USER : \
-                                PERF_RECORD_MISC_KERNEL)
-#define perf_instruction_pointer(regs) instruction_pointer(regs)
-#endif
-
 extern int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_event *event, unsigned int size,
                             int nmi, int sample);
@@ -902,6 +909,13 @@ perf_sw_event(u32 event_id, u64 nr, int 
 static inline void
 perf_bp_event(struct perf_event *event, void *data)                    { }
 
+static inline int perf_register_guest_info_callbacks
+(struct perf_guest_info_callbacks *)   {return 0; }
+static inline int perf_unregister_guest_info_callbacks
+(struct perf_guest_info_callbacks *)   {return 0; }
+
+#define perf_instruction_pointer(event, regs)  instruction_pointer(regs)
+
 static inline void perf_event_mmap(struct vm_area_struct *vma)         { }
 static inline void perf_event_comm(struct task_struct *tsk)            { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
diff -Nraup linux-2.6.34-rc1/kernel/perf_event.c 
linux-2.6.34-rc1_work/kernel/perf_event.c
--- linux-2.6.34-rc1/kernel/perf_event.c        2010-03-09 13:04:30.085942017 
+0800
+++ linux-2.6.34-rc1_work/kernel/perf_event.c   2010-03-10 17:06:34.232905199 
+0800
@@ -2807,6 +2807,50 @@ __weak struct perf_callchain_entry *perf
 }
 
 /*
+ * We assume there is only KVM supporting the callbacks.
+ * Later on, we might change it to a list if there is
+ * another virtualization implementation supporting the callbacks.
+ */
+static struct perf_guest_info_callbacks *perf_guest_cbs;
+
+int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * cbs)
+{
+       perf_guest_cbs = cbs;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
+
+int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks * 
cbs)
+{
+       perf_guest_cbs = NULL;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+
+u64 perf_instruction_pointer(struct pt_regs *regs)
+{
+       u64 ip;
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               ip = perf_guest_cbs->get_guest_ip();
+       } else
+               ip = instruction_pointer(regs);
+       return ip;
+}
+
+#ifndef perf_misc_flags
+static inline unsigned int perf_misc_flags(struct pt_regs *regs)
+{
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               return perf_guest_cbs->is_user_mode() ?
+                       PERF_RECORD_MISC_GUEST_USER :
+                       PERF_RECORD_MISC_GUEST_KERNEL;
+       } else
+               return user_mode(regs) ? PERF_RECORD_MISC_USER :
+                       PERF_RECORD_MISC_KERNEL;
+}
+#endif
+
+/*
  * Output
  */
 static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-diff.c 
linux-2.6.34-rc1_work/tools/perf/builtin-diff.c
--- linux-2.6.34-rc1/tools/perf/builtin-diff.c  2010-03-09 13:04:31.373942905 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/builtin-diff.c     2010-03-10 
17:06:34.232905199 +0800
@@ -222,6 +222,9 @@ int cmd_diff(int argc, const char **argv
                        input_new = argv[1];
                } else
                        input_new = argv[0];
+       } else if (symbol_conf.guest_vmlinux_name || 
symbol_conf.guest_kallsyms) {
+               input_old = "perf.data.host";
+               input_new = "perf.data.guest";
        }
 
        symbol_conf.exclude_other = false;
diff -Nraup linux-2.6.34-rc1/tools/perf/builtin.h 
linux-2.6.34-rc1_work/tools/perf/builtin.h
--- linux-2.6.34-rc1/tools/perf/builtin.h       2010-03-09 13:04:31.377861392 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/builtin.h  2010-03-10 17:06:34.232905199 
+0800
@@ -32,5 +32,6 @@ extern int cmd_version(int argc, const c
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
 extern int cmd_lock(int argc, const char **argv, const char *prefix);
+extern int cmd_kvm(int argc, const char **argv, const char *prefix);
 
 #endif
diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-kvm.c 
linux-2.6.34-rc1_work/tools/perf/builtin-kvm.c
--- linux-2.6.34-rc1/tools/perf/builtin-kvm.c   1970-01-01 08:00:00.000000000 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/builtin-kvm.c      2010-03-10 
17:06:34.232905199 +0800
@@ -0,0 +1,123 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+
+#include <sys/prctl.h>
+
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+
+static char                    *file_name = NULL;
+static char                    name_buffer[256];
+
+int                            perf_host = 1;
+int                            perf_guest = 0;
+
+static const char * const kvm_usage[] = {
+       "perf kvm [<options>] {top|record|report|diff}",
+       NULL
+};
+
+static const struct option kvm_options[] = {
+       OPT_STRING('i', "input", &file_name, "file",
+                   "Input file name"),
+       OPT_STRING('o', "output", &file_name, "file",
+                   "Output file name"),
+       OPT_BOOLEAN(0, "guest", &perf_guest,
+                   "Collect guest os data"),
+       OPT_BOOLEAN(0, "host", &perf_host,
+                   "Collect guest os data"),
+       OPT_STRING(0, "guestvmlinux", &symbol_conf.guest_vmlinux_name, "file",
+                   "file saving guest os vmlinux"),
+       OPT_STRING(0, "guestkallsyms", &symbol_conf.guest_kallsyms, "file",
+                   "file saving guest os /proc/kallsyms"),
+       OPT_STRING(0, "guestmodules", &symbol_conf.guest_modules, "file",
+                   "file saving guest os /proc/modules"),
+       OPT_END()
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+       int rec_argc, i = 0, j;
+       const char **rec_argv;
+
+       rec_argc = argc + 2;
+       rec_argv = calloc(rec_argc + 1, sizeof(char *));
+       rec_argv[i++] = strdup("record");
+       rec_argv[i++] = strdup("-o");
+       rec_argv[i++] = strdup(file_name);
+       for (j = 1; j < argc; j++, i++)
+               rec_argv[i] = argv[j];
+
+       BUG_ON(i != rec_argc);
+
+       return cmd_record(i, rec_argv, NULL);
+}
+
+static int __cmd_report(int argc, const char **argv)
+{
+       int rec_argc, i = 0, j;
+       const char **rec_argv;
+
+       rec_argc = argc + 2;
+       rec_argv = calloc(rec_argc + 1, sizeof(char *));
+       rec_argv[i++] = strdup("report");
+       rec_argv[i++] = strdup("-i");
+       rec_argv[i++] = strdup(file_name);
+       for (j = 1; j < argc; j++, i++)
+               rec_argv[i] = argv[j];
+
+       BUG_ON(i != rec_argc);
+
+       return cmd_report(i, rec_argv, NULL);
+}
+
+int cmd_kvm(int argc, const char **argv, const char *prefix __used)
+{
+       perf_host = perf_guest = 0;
+
+       argc = parse_options(argc, argv, kvm_options, kvm_usage,
+                       PARSE_OPT_STOP_AT_NON_OPTION);
+       if (!argc)
+               usage_with_options(kvm_usage, kvm_options);
+
+       if (!perf_host)
+               perf_guest = 1;
+
+       if (!file_name) {
+               if (perf_host && !perf_guest)
+                       sprintf(name_buffer, "perf.data.host");
+               else if (!perf_host && perf_guest)
+                       sprintf(name_buffer, "perf.data.guest");
+               else
+                       sprintf(name_buffer, "perf.data.kvm");
+               file_name = name_buffer;
+       }
+
+       if (!strncmp(argv[0], "rec", 3)) {
+               return __cmd_record(argc, argv);
+       } else if (!strncmp(argv[0], "rep", 3)) {
+               return __cmd_report(argc, argv);
+       } else if (!strncmp(argv[0], "diff", 4)) {
+               return cmd_diff(argc, argv, NULL);
+       } else if (!strncmp(argv[0], "top", 3)) {
+               return cmd_top(argc, argv, NULL);
+       } else {
+               usage_with_options(kvm_usage, kvm_options);
+       }
+
+       return 0;
+}
+
diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-top.c 
linux-2.6.34-rc1_work/tools/perf/builtin-top.c
--- linux-2.6.34-rc1/tools/perf/builtin-top.c   2010-03-09 13:04:31.377861392 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/builtin-top.c      2010-03-10 
17:06:34.232905199 +0800
@@ -409,7 +409,8 @@ static double sym_weight(const struct sy
 }
 
 static long                    samples;
-static long                    userspace_samples;
+static long                    kernel_samples, userspace_samples;
+static long                    guest_us_samples, guest_kernel_samples;
 static const char              CONSOLE_CLEAR[] = "";
 
 static void __list_insert_active_sym(struct sym_entry *syme)
@@ -449,7 +450,10 @@ static void print_sym_table(void)
        int printed = 0, j;
        int counter, snap = !display_weighted ? sym_counter : 0;
        float samples_per_sec = samples/delay_secs;
-       float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
+       float ksamples_per_sec = (kernel_samples)/delay_secs;
+       float userspace_samples_per_sec = (userspace_samples)/delay_secs;
+       float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
+       float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
        float sum_ksamples = 0.0;
        struct sym_entry *syme, *n;
        struct rb_root tmp = RB_ROOT;
@@ -457,7 +461,8 @@ static void print_sym_table(void)
        int sym_width = 0, dso_width = 0, max_dso_width;
        const int win_width = winsize.ws_col - 1;
 
-       samples = userspace_samples = 0;
+       samples = kernel_samples = userspace_samples = 0;
+       guest_kernel_samples = guest_us_samples = 0;
 
        /* Sort the active symbols */
        pthread_mutex_lock(&active_symbols_lock);
@@ -488,9 +493,19 @@ static void print_sym_table(void)
        puts(CONSOLE_CLEAR);
 
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
-       printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
-               samples_per_sec,
-               100.0 - 
(100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
+       if (!perf_guest) {
+               printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
+                       samples_per_sec,
+                       100.0 - 
(100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
+       } else {
+               printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% user:%4.1f%% 
guest kernel:%4.1f%% guest user:%4.1f%% [",
+                       samples_per_sec,
+                       100.0 - 
(100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)),
+                       100.0 - 
(100.0*((samples_per_sec-userspace_samples_per_sec)/samples_per_sec)),
+                       100.0 - 
(100.0*((samples_per_sec-guest_kernel_samples_per_sec)/samples_per_sec)),
+                       100.0 - 
(100.0*((samples_per_sec-guest_us_samples_per_sec)/samples_per_sec))
+                       );
+       }
 
        if (nr_counters == 1 || !display_weighted) {
                printf("%Ld", (u64)attrs[0].sample_period);
@@ -947,9 +962,17 @@ static void event__process_sample(const 
                        return;
                break;
        case PERF_RECORD_MISC_KERNEL:
+               ++kernel_samples;
                if (hide_kernel_symbols)
                        return;
                break;
+       case PERF_RECORD_MISC_GUEST_KERNEL:
+               ++guest_kernel_samples;
+               break;
+       case PERF_RECORD_MISC_GUEST_USER:
+               ++guest_us_samples;
+               /* TODO: we don't process guest user from host side. */
+               return;
        default:
                return;
        }
diff -Nraup linux-2.6.34-rc1/tools/perf/Makefile 
linux-2.6.34-rc1_work/tools/perf/Makefile
--- linux-2.6.34-rc1/tools/perf/Makefile        2010-03-09 13:04:31.341942020 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/Makefile   2010-03-10 17:06:34.232905199 
+0800
@@ -458,6 +458,7 @@ BUILTIN_OBJS += builtin-trace.o
 BUILTIN_OBJS += builtin-probe.o
 BUILTIN_OBJS += builtin-kmem.o
 BUILTIN_OBJS += builtin-lock.o
+BUILTIN_OBJS += builtin-kvm.o
 
 PERFLIBS = $(LIB_FILE)
 
diff -Nraup linux-2.6.34-rc1/tools/perf/perf.c 
linux-2.6.34-rc1_work/tools/perf/perf.c
--- linux-2.6.34-rc1/tools/perf/perf.c  2010-03-09 13:04:31.377861392 +0800
+++ linux-2.6.34-rc1_work/tools/perf/perf.c     2010-03-10 17:06:34.232905199 
+0800
@@ -304,6 +304,7 @@ static void handle_internal_command(int 
                { "probe",      cmd_probe,      0 },
                { "kmem",       cmd_kmem,       0 },
                { "lock",       cmd_lock,       0 },
+               { "kvm",        cmd_kvm,        0 },
        };
        unsigned int i;
        static const char ext[] = STRIP_EXTENSION;
diff -Nraup linux-2.6.34-rc1/tools/perf/perf.h 
linux-2.6.34-rc1_work/tools/perf/perf.h
--- linux-2.6.34-rc1/tools/perf/perf.h  2010-03-09 13:04:16.357945701 +0800
+++ linux-2.6.34-rc1_work/tools/perf/perf.h     2010-03-10 17:06:34.236904596 
+0800
@@ -131,4 +131,6 @@ struct ip_callchain {
        u64 ips[0];
 };
 
+extern int perf_host, perf_guest;
+
 #endif
diff -Nraup linux-2.6.34-rc1/tools/perf/util/event.c 
linux-2.6.34-rc1_work/tools/perf/util/event.c
--- linux-2.6.34-rc1/tools/perf/util/event.c    2010-03-09 13:04:31.381941876 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/event.c       2010-03-10 
17:06:34.236904596 +0800
@@ -442,12 +442,16 @@ void thread__find_addr_map(struct thread
        al->thread = self;
        al->addr = addr;
 
-       if (cpumode == PERF_RECORD_MISC_KERNEL) {
+       if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
                al->level = 'k';
                mg = &session->kmaps;
-       } else if (cpumode == PERF_RECORD_MISC_USER)
+       } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
                al->level = '.';
-       else {
+       } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
+               al->level = 'g';
+               mg = &session->guest_kmaps;
+       } else {
+               /* TODO: We don't support guest user space. Might support late 
*/
                al->level = 'H';
                al->map = NULL;
                return;
@@ -464,10 +468,18 @@ try_again:
                 * "[vdso]" dso, but for now lets use the old trick of looking
                 * in the whole kernel symbol list.
                 */
-               if ((long long)al->addr < 0 && mg != &session->kmaps) {
+               if ((long long)al->addr < 0 &&
+                       mg != &session->kmaps &&
+                       cpumode == PERF_RECORD_MISC_KERNEL) {
                        mg = &session->kmaps;
                        goto try_again;
                }
+               if ((long long)al->addr < 0 &&
+                               mg != &session->guest_kmaps &&
+                               cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
+                       mg = &session->guest_kmaps;
+                       goto try_again;
+               }
        } else
                al->addr = al->map->map_ip(al->map, al->addr);
 }
diff -Nraup linux-2.6.34-rc1/tools/perf/util/session.c 
linux-2.6.34-rc1_work/tools/perf/util/session.c
--- linux-2.6.34-rc1/tools/perf/util/session.c  2010-03-09 13:04:31.385942104 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/session.c     2010-03-10 
17:06:34.236904596 +0800
@@ -54,7 +54,12 @@ out_close:
 
 static inline int perf_session__create_kernel_maps(struct perf_session *self)
 {
-       return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps);
+       int ret;
+       ret = map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps);
+       if (ret >= 0)
+               ret = map_groups__create_guest_kernel_maps(&self->guest_kmaps,
+                               self->guest_vmlinux_maps);
+       return ret;
 }
 
 struct perf_session *perf_session__new(const char *filename, int mode, bool 
force)
@@ -76,6 +81,7 @@ struct perf_session *perf_session__new(c
        self->cwdlen = 0;
        self->unknown_events = 0;
        map_groups__init(&self->kmaps);
+       map_groups__init(&self->guest_kmaps);
 
        if (mode == O_RDONLY) {
                if (perf_session__open(self, force) < 0)
diff -Nraup linux-2.6.34-rc1/tools/perf/util/session.h 
linux-2.6.34-rc1_work/tools/perf/util/session.h
--- linux-2.6.34-rc1/tools/perf/util/session.h  2010-03-09 13:04:31.385942104 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/session.h     2010-03-10 
17:06:34.236904596 +0800
@@ -16,9 +16,11 @@ struct perf_session {
        unsigned long           size;
        unsigned long           mmap_window;
        struct map_groups       kmaps;
+       struct map_groups       guest_kmaps;
        struct rb_root          threads;
        struct thread           *last_match;
        struct map              *vmlinux_maps[MAP__NR_TYPES];
+       struct map              *guest_vmlinux_maps[MAP__NR_TYPES];
        struct events_stats     events_stats;
        unsigned long           event_total[PERF_RECORD_MAX];
        unsigned long           unknown_events;
@@ -83,6 +85,6 @@ static inline struct map *
        perf_session__new_module_map(struct perf_session *self,
                                     u64 start, const char *filename)
 {
-       return map_groups__new_module(&self->kmaps, start, filename);
+       return map_groups__new_module(&self->kmaps, start, filename, 0);
 }
 #endif /* __PERF_SESSION_H */
diff -Nraup linux-2.6.34-rc1/tools/perf/util/symbol.c 
linux-2.6.34-rc1_work/tools/perf/util/symbol.c
--- linux-2.6.34-rc1/tools/perf/util/symbol.c   2010-03-09 13:04:31.385942104 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/symbol.c      2010-03-10 
17:06:34.236904596 +0800
@@ -27,6 +27,8 @@ enum dso_origin {
        DSO__ORIG_BUILDID,
        DSO__ORIG_DSO,
        DSO__ORIG_KMODULE,
+       DSO__ORIG_GUEST_KERNEL,
+       DSO__ORIG_GUEST_KMODULE,
        DSO__ORIG_NOT_FOUND,
 };
 
@@ -34,6 +36,8 @@ static void dsos__add(struct list_head *
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
                                symbol_filter_t filter);
+static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
+                       symbol_filter_t filter);
 static int vmlinux_path__nr_entries;
 static char **vmlinux_path;
 
@@ -184,6 +188,7 @@ struct dso *dso__new(const char *name)
                self->loaded = 0;
                self->sorted_by_name = 0;
                self->has_build_id = 0;
+               self->kernel = DSO_TYPE_USER;
        }
 
        return self;
@@ -523,13 +528,19 @@ static int dso__split_kallsyms(struct ds
                        char dso_name[PATH_MAX];
                        struct dso *dso;
 
-                       snprintf(dso_name, sizeof(dso_name), "[kernel].%d",
-                                kernel_range++);
+                       if (self->kernel == DSO_TYPE_GUEST_KERNEL)
+                               snprintf(dso_name, sizeof(dso_name), 
"[guest.kernel].%d",
+                                               kernel_range++);
+                       else
+                               snprintf(dso_name, sizeof(dso_name), 
"[kernel].%d",
+                                               kernel_range++);
 
                        dso = dso__new(dso_name);
                        if (dso == NULL)
                                return -1;
 
+                       dso->kernel = self->kernel;
+
                        curr_map = map__new2(pos->start, dso, map->type);
                        if (curr_map == NULL) {
                                dso__delete(dso);
@@ -563,7 +574,10 @@ int dso__load_kallsyms(struct dso *self,
                return -1;
 
        symbols__fixup_end(&self->symbols[map->type]);
-       self->origin = DSO__ORIG_KERNEL;
+       if (self->kernel == DSO_TYPE_GUEST_KERNEL)
+               self->origin = DSO__ORIG_GUEST_KERNEL;
+       else
+               self->origin = DSO__ORIG_KERNEL;
 
        return dso__split_kallsyms(self, map, filter);
 }
@@ -951,7 +965,7 @@ static int dso__load_sym(struct dso *sel
        nr_syms = shdr.sh_size / shdr.sh_entsize;
 
        memset(&sym, 0, sizeof(sym));
-       if (!self->kernel) {
+       if (self->kernel == DSO_TYPE_USER) {
                self->adjust_symbols = (ehdr.e_type == ET_EXEC ||
                                elf_section_by_name(elf, &ehdr, &shdr,
                                                     ".gnu.prelink_undo",
@@ -983,7 +997,7 @@ static int dso__load_sym(struct dso *sel
 
                section_name = elf_sec__name(&shdr, secstrs);
 
-               if (self->kernel || kmodule) {
+               if (self->kernel != DSO_TYPE_USER || kmodule) {
                        char dso_name[PATH_MAX];
 
                        if (strcmp(section_name,
@@ -1009,6 +1023,7 @@ static int dso__load_sym(struct dso *sel
                                curr_dso = dso__new(dso_name);
                                if (curr_dso == NULL)
                                        goto out_elf_end;
+                               curr_dso->kernel = self->kernel;
                                curr_map = map__new2(start, curr_dso,
                                                     map->type);
                                if (curr_map == NULL) {
@@ -1017,9 +1032,15 @@ static int dso__load_sym(struct dso *sel
                                }
                                curr_map->map_ip = identity__map_ip;
                                curr_map->unmap_ip = identity__map_ip;
-                               curr_dso->origin = DSO__ORIG_KERNEL;
+                               if (curr_dso->kernel == DSO_TYPE_GUEST_KERNEL) {
+                                       curr_dso->origin = 
DSO__ORIG_GUEST_KERNEL;
+                                       dsos__add(&dsos__guest_kernel, 
curr_dso);
+                               } else {
+                                       curr_dso->origin = DSO__ORIG_KERNEL;
+                                       dsos__add(&dsos__kernel, curr_dso);
+                               }
+
                                map_groups__insert(kmap->kmaps, curr_map);
-                               dsos__add(&dsos__kernel, curr_dso);
                                dso__set_loaded(curr_dso, map->type);
                        } else
                                curr_dso = curr_map->dso;
@@ -1240,6 +1261,8 @@ char dso__symtab_origin(const struct dso
                [DSO__ORIG_BUILDID] =  'b',
                [DSO__ORIG_DSO] =      'd',
                [DSO__ORIG_KMODULE] =  'K',
+               [DSO__ORIG_GUEST_KERNEL] =  'g',
+               [DSO__ORIG_GUEST_KMODULE] =  'G',
        };
 
        if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
@@ -1258,8 +1281,10 @@ int dso__load(struct dso *self, struct m
 
        dso__set_loaded(self, map->type);
 
-       if (self->kernel)
+       if (self->kernel == DSO_TYPE_KERNEL)
                return dso__load_kernel_sym(self, map, filter);
+       else if (self->kernel == DSO_TYPE_GUEST_KERNEL)
+               return dso__load_guest_kernel_sym(self, map, filter);
 
        name = malloc(size);
        if (!name)
@@ -1463,7 +1488,7 @@ static int map_groups__set_modules_path(
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 {
        struct map *self = zalloc(sizeof(*self) +
-                                 (dso->kernel ? sizeof(struct kmap) : 0));
+                         (dso->kernel != DSO_TYPE_USER ? sizeof(struct kmap) : 
0));
        if (self != NULL) {
                /*
                 * ->end will be filled after we load all the symbols
@@ -1475,11 +1500,15 @@ static struct map *map__new2(u64 start, 
 }
 
 struct map *map_groups__new_module(struct map_groups *self, u64 start,
-                                  const char *filename)
+                                  const char *filename, int guest)
 {
        struct map *map;
        struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
 
+       if (!guest)
+               dso = __dsos__findnew(&dsos__kernel, filename);
+       else
+               dso = __dsos__findnew(&dsos__guest_kernel, filename);
        if (dso == NULL)
                return NULL;
 
@@ -1487,16 +1516,20 @@ struct map *map_groups__new_module(struc
        if (map == NULL)
                return NULL;
 
-       dso->origin = DSO__ORIG_KMODULE;
+       if (guest)
+               dso->origin = DSO__ORIG_GUEST_KMODULE;
+       else
+               dso->origin = DSO__ORIG_KMODULE;
        map_groups__insert(self, map);
        return map;
 }
 
-static int map_groups__create_modules(struct map_groups *self)
+static int __map_groups__create_modules(struct map_groups *self,
+                       const char * filename, int guest)
 {
        char *line = NULL;
        size_t n;
-       FILE *file = fopen("/proc/modules", "r");
+       FILE *file = fopen(filename, "r");
        struct map *map;
 
        if (file == NULL)
@@ -1530,16 +1563,17 @@ static int map_groups__create_modules(st
                *sep = '\0';
 
                snprintf(name, sizeof(name), "[%s]", line);
-               map = map_groups__new_module(self, start, name);
+               map = map_groups__new_module(self, start, name, guest);
                if (map == NULL)
                        goto out_delete_line;
-               dso__kernel_module_get_build_id(map->dso);
+               if (!guest)
+                       dso__kernel_module_get_build_id(map->dso);
        }
 
        free(line);
        fclose(file);
 
-       return map_groups__set_modules_path(self);
+       return 0;
 
 out_delete_line:
        free(line);
@@ -1547,6 +1581,21 @@ out_failure:
        return -1;
 }
 
+static int map_groups__create_modules(struct map_groups *self)
+{
+       int ret;
+
+       ret = __map_groups__create_modules(self, "/proc/modules", 0);
+       if (ret >= 0)
+               ret = map_groups__set_modules_path(self);
+       return ret;
+}
+
+static int map_groups__create_guest_modules(struct map_groups *self)
+{
+       return  __map_groups__create_modules(self, symbol_conf.guest_modules, 
1);
+}
+
 static int dso__load_vmlinux(struct dso *self, struct map *map,
                             const char *vmlinux, symbol_filter_t filter)
 {
@@ -1706,8 +1755,44 @@ out_fixup:
        return err;
 }
 
+static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
+                               symbol_filter_t filter)
+{
+       int err;
+       const char *kallsyms_filename;
+       /*
+        * if the user specified a vmlinux filename, use it and only
+        * it, reporting errors to the user if it cannot be used.
+        * Or use file guest_kallsyms inputted by user on commandline
+        */
+       if (symbol_conf.guest_vmlinux_name != NULL) {
+               err = dso__load_vmlinux(self, map,
+                                       symbol_conf.guest_vmlinux_name, filter);
+               goto out_try_fixup;
+       }
+
+       kallsyms_filename = symbol_conf.guest_kallsyms;
+       if (!kallsyms_filename)
+               return -1;
+       err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
+       if (err > 0)
+               pr_debug("Using %s for symbols\n", kallsyms_filename);
+
+out_try_fixup:
+       if (err > 0) {
+               if (kallsyms_filename != NULL)
+                       dso__set_long_name(self, 
strdup("[guest.kernel.kallsyms]"));
+               map__fixup_start(map);
+               map__fixup_end(map);
+       }
+
+       return err;
+}
+
 LIST_HEAD(dsos__user);
 LIST_HEAD(dsos__kernel);
+LIST_HEAD(dsos__guest_user);
+LIST_HEAD(dsos__guest_kernel);
 
 static void dsos__add(struct list_head *head, struct dso *dso)
 {
@@ -1754,6 +1839,8 @@ void dsos__fprintf(FILE *fp)
 {
        __dsos__fprintf(&dsos__kernel, fp);
        __dsos__fprintf(&dsos__user, fp);
+       __dsos__fprintf(&dsos__guest_kernel, fp);
+       __dsos__fprintf(&dsos__guest_user, fp);
 }
 
 static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1783,7 +1870,19 @@ struct dso *dso__new_kernel(const char *
 
        if (self != NULL) {
                self->short_name = "[kernel]";
-               self->kernel     = 1;
+               self->kernel     = DSO_TYPE_KERNEL;
+       }
+
+       return self;
+}
+
+struct dso *dso__new_guest_kernel(const char *name)
+{
+       struct dso *self = dso__new(name ?: "[guest.kernel.kallsyms]");
+
+       if (self != NULL) {
+               self->short_name = "[guest.kernel]";
+               self->kernel     = DSO_TYPE_GUEST_KERNEL;
        }
 
        return self;
@@ -1808,6 +1907,16 @@ static struct dso *dsos__create_kernel(c
        return kernel;
 }
 
+static struct dso *dsos__create_guest_kernel(const char *vmlinux)
+{
+       struct dso *kernel = dso__new_guest_kernel(vmlinux);
+
+       kernel->kernel = DSO_TYPE_GUEST_KERNEL;
+       if (kernel != NULL)
+               dsos__add(&dsos__guest_kernel, kernel);
+       return kernel;
+}
+
 int __map_groups__create_kernel_maps(struct map_groups *self,
                                     struct map *vmlinux_maps[MAP__NR_TYPES],
                                     struct dso *kernel)
@@ -1956,3 +2065,24 @@ int map_groups__create_kernel_maps(struc
        map_groups__fixup_end(self);
        return 0;
 }
+
+int map_groups__create_guest_kernel_maps(struct map_groups *self,
+                                  struct map *vmlinux_maps[MAP__NR_TYPES])
+{
+       struct dso *kernel = 
dsos__create_guest_kernel(symbol_conf.guest_vmlinux_name);
+
+       if (kernel == NULL)
+               return -1;
+
+       if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0)
+               return -1;
+
+       if (symbol_conf.use_modules && map_groups__create_guest_modules(self) < 
0)
+               pr_debug("Problems creating module maps, continuing 
anyway...\n");
+       /*
+        * Now that we have all the maps created, just set the ->end of them:
+        */
+       map_groups__fixup_end(self);
+       return 0;
+}
+
diff -Nraup linux-2.6.34-rc1/tools/perf/util/symbol.h 
linux-2.6.34-rc1_work/tools/perf/util/symbol.h
--- linux-2.6.34-rc1/tools/perf/util/symbol.h   2010-03-09 13:04:31.385942104 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/symbol.h      2010-03-10 
17:06:34.236904596 +0800
@@ -66,7 +66,10 @@ struct symbol_conf {
                        full_paths;
        const char      *vmlinux_name,
                        *field_sep;
-       char            *dso_list_str,
+       const char      *guest_vmlinux_name,
+                       *guest_kallsyms,
+                       *guest_modules;
+       char            *dso_list_str,
                        *comm_list_str,
                        *sym_list_str,
                        *col_width_list_str;
@@ -97,6 +100,12 @@ struct addr_location {
        bool          filtered;
 };
 
+enum dso_kernel_type {
+       DSO_TYPE_USER = 0,
+       DSO_TYPE_KERNEL,
+       DSO_TYPE_GUEST_KERNEL
+};
+
 struct dso {
        struct list_head node;
        struct rb_root   symbols[MAP__NR_TYPES];
@@ -104,7 +113,7 @@ struct dso {
        u8               adjust_symbols:1;
        u8               slen_calculated:1;
        u8               has_build_id:1;
-       u8               kernel:1;
+       enum dso_kernel_type    kernel;
        u8               hit:1;
        unsigned char    origin;
        u8               sorted_by_name;
@@ -118,6 +127,7 @@ struct dso {
 
 struct dso *dso__new(const char *name);
 struct dso *dso__new_kernel(const char *name);
+struct dso *dso__new_guest_kernel(const char *name);
 void dso__delete(struct dso *self);
 
 bool dso__loaded(const struct dso *self, enum map_type type);
@@ -130,7 +140,7 @@ static inline void dso__set_loaded(struc
 
 void dso__sort_by_name(struct dso *self, enum map_type type);
 
-extern struct list_head dsos__user, dsos__kernel;
+extern struct list_head dsos__user, dsos__kernel, dsos__guest_user, 
dsos__guest_kernel;
 
 struct dso *__dsos__findnew(struct list_head *head, const char *name);
 
diff -Nraup linux-2.6.34-rc1/tools/perf/util/thread.h 
linux-2.6.34-rc1_work/tools/perf/util/thread.h
--- linux-2.6.34-rc1/tools/perf/util/thread.h   2010-03-09 13:04:31.385942104 
+0800
+++ linux-2.6.34-rc1_work/tools/perf/util/thread.h      2010-03-10 
17:06:34.236904596 +0800
@@ -79,6 +79,9 @@ int __map_groups__create_kernel_maps(str
 int map_groups__create_kernel_maps(struct map_groups *self,
                                   struct map *vmlinux_maps[MAP__NR_TYPES]);
 
+int map_groups__create_guest_kernel_maps(struct map_groups *self,
+                                  struct map *vmlinux_maps[MAP__NR_TYPES]);
+
 struct map *map_groups__new_module(struct map_groups *self, u64 start,
-                                  const char *filename);
+                                  const char *filename, int guest);
 #endif /* __PERF_THREAD_H */


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to