> On Mar 9, 2018, at 3:25 PM, Daniel Borkmann <dan...@iogearbox.net> wrote: > > Hi Song, > > On 03/06/2018 08:42 PM, Song Liu wrote: > [...]> +/* >> + * Parse build id from the note segment. This logic can be shared between >> + * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are >> + * identical. >> + */ >> +static inline int stack_map_parse_build_id(void *vm_start, >> + unsigned char *build_id, >> + void *note_start, >> + Elf32_Word note_size) >> +{ >> + Elf32_Word note_offs = 0, new_offs; >> + >> + /* check for overflow */ >> + if (note_start < vm_start || note_start + note_size < note_start) >> + return -EINVAL; >> + >> + /* only supports note that fits in the first page */ >> + if (note_start + note_size > vm_start + PAGE_SIZE) >> + return -EINVAL; >> + >> + while (note_offs + sizeof(Elf32_Nhdr) < note_size) { >> + Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); >> + >> + if (nhdr->n_type == BPF_BUILD_ID && >> + nhdr->n_namesz == sizeof("GNU") && >> + nhdr->n_descsz == BPF_BUILD_ID_SIZE) { >> + memcpy(build_id, >> + note_start + note_offs + >> + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), >> + BPF_BUILD_ID_SIZE); >> + return 0; >> + } >> + new_offs = note_offs + sizeof(Elf32_Nhdr) + >> + ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); >> + if (new_offs <= note_offs) /* overflow */ >> + break; >> + note_offs = new_offs; >> + }; >> + return -EINVAL; >> +} >> + >> +/* Parse build ID from 32-bit ELF */ >> +static int stack_map_get_build_id_32(void *vm_start, >> + unsigned char *build_id) >> +{ >> + Elf32_Ehdr *ehdr = (Elf32_Ehdr *)vm_start; >> + Elf32_Phdr *phdr; >> + int i; >> + >> + /* only supports phdr that fits in one page */ >> + if (ehdr->e_phnum > >> + (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) >> + return -EINVAL; >> + >> + phdr = (Elf32_Phdr *)(vm_start + sizeof(Elf32_Ehdr)); >> + >> + for (i = 0; i < ehdr->e_phnum; ++i) >> + if (phdr[i].p_type == PT_NOTE) >> + return stack_map_parse_build_id(vm_start, build_id, >> + vm_start + phdr[i].p_offset, >> + phdr[i].p_filesz); >> + return -EINVAL; >> +} >> + >> +/* Parse build ID from 64-bit ELF */ >> +static int stack_map_get_build_id_64(void *vm_start, >> + unsigned char *build_id) >> +{ >> + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)vm_start; >> + Elf64_Phdr *phdr; >> + int i; >> + >> + /* only supports phdr that fits in one page */ >> + if (ehdr->e_phnum > >> + (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) >> + return -EINVAL; >> + >> + phdr = (Elf64_Phdr *)(vm_start + sizeof(Elf64_Ehdr)); >> + >> + for (i = 0; i < ehdr->e_phnum; ++i) >> + if (phdr[i].p_type == PT_NOTE) >> + return stack_map_parse_build_id(vm_start, build_id, >> + vm_start + phdr[i].p_offset, >> + phdr[i].p_filesz); >> + return -EINVAL; >> +} >> + >> +/* Parse build ID of ELF file mapped to vma */ >> +static int stack_map_get_build_id(struct vm_area_struct *vma, >> + unsigned char *build_id) >> +{ >> + Elf32_Ehdr *ehdr; >> + struct page *page; >> + int ret; >> + >> + /* >> + * vm_start is user memory, so we need to be careful with it. >> + * We don't want too many copy_from_user to reduce overhead. >> + * Most ELF file is at least one page long, and the build_id >> + * is stored in the first page. Therefore, we limit the search of >> + * build_id to the first page only. This can be made safe with >> + * get_user_pages_fast(). If the file is smaller than PAGE_SIZE, >> + * or the build_id is not in the first page, the look up fails. >> + */ >> + if (vma->vm_end - vma->vm_start < PAGE_SIZE || >> + vma->vm_start & (PAGE_SIZE - 1)) /* page aligned */ >> + return -EINVAL; >> + >> + if (get_user_pages_fast(vma->vm_start, 1, 0, &page) != 1) > > Shouldn't this throw a splat? Implementations of get_user_pages_fast() > call down_read() which has might_sleep() and we're under RCU read side > here. > >> + return -EFAULT; >> + >> + ret = -EINVAL; >> + ehdr = (Elf32_Ehdr *)page_address(page); >> + >> + /* compare magic x7f "ELF" */ >> + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) >> + goto out; >> + >> + /* only support executable file and shared object file */ >> + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) >> + goto out; >> + >> + if (ehdr->e_ident[EI_CLASS] == 1) > > Minor nit: ehdr->e_ident[EI_CLASS] == ELFCLASS32 > >> + ret = stack_map_get_build_id_32(page_address(page), build_id); >> + else if (ehdr->e_ident[EI_CLASS] == 2) > > Minor nit: ehdr->e_ident[EI_CLASS] == ELFCLASS64 > >> + ret = stack_map_get_build_id_64(page_address(page), build_id); >> +out: >> + put_page(page); >> + return ret; >> +} >> + >> +static int stack_map_get_build_id_offset(struct bpf_map *map, >> + struct stack_map_bucket *bucket, >> + u64 *ips, u32 trace_nr) >> +{ >> + int i; >> + struct vm_area_struct *vma; >> + struct bpf_stack_build_id *id_offs; >> + int err; >> + int successful_lookup = 0; >> + > [...] > > Thanks, > Daniel
Thanks Daniel! I will fix these. I also chatted with Teng about this, that we may miss some cases. I will also include that fix in the next version. Song