Hello Nathan,

Would it be possible to do the sort "in place" (without the extra
buffer)? I mean would that upset any other part of the kernel?


Thanks,
Emil.


> -----Original Message-----
> From: Nathan Lynch [mailto:[EMAIL PROTECTED] 
> Sent: Monday, November 05, 2007 8:34 PM
> To: Medve Emilian
> Cc: linuxppc-dev@ozlabs.org
> Subject: [RFC/PATCH] reduce load time for modules with lots of relocs
> 
> Nathan Lynch wrote:
> > Medve Emilian-EMMEDVE1 wrote:
> > > 
> > > I have module with 36K relocation entries (I know, I 
> know, how could
> > > that be...) and the count_relocs() function takes ~17 
> seconds to crunch
> > > through the relocation table from the .rela.text section. 
> I don't think
> > > I can reduce the number of entries in the relocation 
> table (can I?) so
> > > I'm thinking at improving the performance of 
> count_relocs() (currently
> > > O(n^2)). Does anybody have a simpler idea? Does anyone have any
> > > constructive suggestion on how to improve the situation?
> > 
> > This seems to come up every few months.  There was a patch submitted
> > here:
> > 
> > http://ozlabs.org/pipermail/linuxppc-dev/2007-June/037641.html
> 
> I think this comes up often enough for us to fix it (IIRC unionfs
> people complained about it long ago too), and it's kind of lame to
> spend 17 seconds in the kernel without scheduling.  So I dug up some
> old patches that should reduce the complexity to O(n logn), using the
> sort() function, which IMO is preferable to doing our own hash thing
> as that patch from June does.  Only the 64-bit code is tested.  Does
> this help your situation?
> 
>  arch/powerpc/kernel/module_32.c |   85 
> +++++++++++++++++++++++++++++++---------
>  arch/powerpc/kernel/module_64.c |   80 
> ++++++++++++++++++++++++++++++-------
>  2 files changed, 132 insertions(+), 33 deletions(-)
> 
> 
> diff --git a/arch/powerpc/kernel/module_32.c 
> b/arch/powerpc/kernel/module_32.c
> index 07a89a3..001b941 100644
> --- a/arch/powerpc/kernel/module_32.c
> +++ b/arch/powerpc/kernel/module_32.c
> @@ -24,6 +24,7 @@
>  #include <linux/kernel.h>
>  #include <linux/cache.h>
>  #include <linux/bug.h>
> +#include <linux/sort.h>
>  
>  #include "setup.h"
>  
> @@ -50,26 +51,64 @@ void module_free(struct module *mod, void 
> *module_region)
>             table entries. */
>  }
>  
> +static int reloc_cmp(const void *_a, const void *_b)
> +{
> +     const Elf32_Rela *a = *(Elf32_Rela **)_a, *b = 
> *(Elf32_Rela **)_b;
> +
> +     if (a->r_info < b->r_info)
> +             return -1;
> +     else if (a->r_info > b->r_info)
> +             return 1;
> +     else if (a->r_addend < b->r_addend)
> +             return -1;
> +     else if (a->r_addend > b->r_addend)
> +             return 1;
> +
> +     return 0;
> +}
> +
> +
>  /* Count how many different relocations (different symbol, different
>     addend) */
>  static unsigned int count_relocs(const Elf32_Rela *rela, 
> unsigned int num)
>  {
> -     unsigned int i, j, ret = 0;
> +     unsigned int i, sorted_count = 0;
> +     Elf32_Word last_info;
> +     Elf32_Sword last_addend;
> +     Elf32_Rela **sortbuf = NULL;
> +
> +     if (num == 0)
> +             return 0;
> +
> +     sortbuf = vmalloc(num * sizeof(*sortbuf));
> +
> +     if (!sortbuf)
> +             return -ENOMEM;
> +
> +     for (i = 0; i < num; i++)
> +             sortbuf[i] = (Elf32_Rela *)&rela[i];
> +
> +     sort(sortbuf, i, sizeof(sortbuf[0]), reloc_cmp, NULL);
> +
> +     last_info = sortbuf[0]->r_info;
> +     last_addend = sortbuf[0]->r_addend;
> +     sorted_count = 1;
>  
> -     /* Sure, this is order(n^2), but it's usually short, and not
> -           time critical */
>       for (i = 0; i < num; i++) {
> -             for (j = 0; j < i; j++) {
> -                     /* If this addend appeared before, it's
> -                           already been counted */
> -                     if (ELF32_R_SYM(rela[i].r_info)
> -                         == ELF32_R_SYM(rela[j].r_info)
> -                         && rela[i].r_addend == rela[j].r_addend)
> -                             break;
> -             }
> -             if (j == i) ret++;
> +             /* If this r_info,r_addend tuple matches the previous
> +              * entry, don't count it again
> +              */
> +             if (sortbuf[i]->r_info == last_info &&
> +                 sortbuf[i]->r_addend == last_addend)
> +                     continue;
> +
> +             last_info = sortbuf[i]->r_info;
> +             last_addend = sortbuf[i]->r_addend;
> +             sorted_count++;
>       }
> -     return ret;
> +
> +     vfree(sortbuf);
> +     return sorted_count;
>  }
>  
>  /* Get the potential trampolines size required of the init and
> @@ -96,15 +135,19 @@ static unsigned long get_plt_size(const 
> Elf32_Ehdr *hdr,
>                       continue;
>  
>               if (sechdrs[i].sh_type == SHT_RELA) {
> +                     int count;
>                       DEBUGP("Found relocations in section %u\n", i);
>                       DEBUGP("Ptr: %p.  Number: %u\n",
>                              (void *)hdr + sechdrs[i].sh_offset,
>                              sechdrs[i].sh_size / sizeof(Elf32_Rela));
> -                     ret += count_relocs((void *)hdr
> +                     count = count_relocs((void *)hdr
>                                            + sechdrs[i].sh_offset,
>                                            sechdrs[i].sh_size
>                                            / sizeof(Elf32_Rela))
>                               * sizeof(struct ppc_plt_entry);
> +                     if (count < 0)
> +                             return count;
> +                     ret += count;
>               }
>       }
>  
> @@ -117,6 +160,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
>                             struct module *me)
>  {
>       unsigned int i;
> +     int ret;
>  
>       /* Find .plt and .init.plt sections */
>       for (i = 0; i < hdr->e_shnum; i++) {
> @@ -131,10 +175,15 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
>       }
>  
>       /* Override their sizes */
> -     sechdrs[me->arch.core_plt_section].sh_size
> -             = get_plt_size(hdr, sechdrs, secstrings, 0);
> -     sechdrs[me->arch.init_plt_section].sh_size
> -             = get_plt_size(hdr, sechdrs, secstrings, 1);
> +     ret = get_plt_size(hdr, sechdrs, secstrings, 0);
> +     if (ret < 0)
> +             return ret;
> +     sechdrs[me->arch.core_plt_section].sh_size = ret;
> +
> +     ret = get_plt_size(hdr, sechdrs, secstrings, 1);
> +     if (ret < 0)
> +             return ret;
> +     sechdrs[me->arch.init_plt_section].sh_size = ret;
>       return 0;
>  }
>  
> diff --git a/arch/powerpc/kernel/module_64.c 
> b/arch/powerpc/kernel/module_64.c
> index 75c7c4f..bfc5b98 100644
> --- a/arch/powerpc/kernel/module_64.c
> +++ b/arch/powerpc/kernel/module_64.c
> @@ -21,6 +21,7 @@
>  #include <linux/err.h>
>  #include <linux/vmalloc.h>
>  #include <linux/bug.h>
> +#include <linux/sort.h>
>  #include <asm/module.h>
>  #include <asm/uaccess.h>
>  #include <asm/firmware.h>
> @@ -77,28 +78,68 @@ static struct ppc64_stub_entry ppc64_stub =
>       0x4e, 0x80, 0x04, 0x20  /* bctr */
>  } };
>  
> +static int reloc_cmp(const void *_a, const void *_b)
> +{
> +     const Elf64_Rela *a = *(Elf64_Rela **)_a, *b = 
> *(Elf64_Rela **)_b;
> +
> +     if (a->r_info < b->r_info)
> +             return -1;
> +     else if (a->r_info > b->r_info)
> +             return 1;
> +     else if (a->r_addend < b->r_addend)
> +             return -1;
> +     else if (a->r_addend > b->r_addend)
> +             return 1;
> +
> +     return 0;
> +}
> +
>  /* Count how many different 24-bit relocations (different symbol,
>     different addend) */
> -static unsigned int count_relocs(const Elf64_Rela *rela, 
> unsigned int num)
> +static int count_relocs(const Elf64_Rela *rela, unsigned int num)
>  {
>       unsigned int i, j, ret = 0;
> +     Elf64_Xword last_info;
> +     Elf64_Sxword last_addend;
> +     Elf64_Rela **sortbuf = NULL;
> +
> +     if (num == 0)
> +             return 0;
> +
> +     sortbuf = vmalloc(num * sizeof(*sortbuf));
>  
> -     /* FIXME: Only count external ones --RR */
> -     /* Sure, this is order(n^2), but it's usually short, and not
> -           time critical */
> -     for (i = 0; i < num; i++) {
> +     if (!sortbuf)
> +             return -ENOMEM;
> +
> +     for (j = 0, i = 0; i < num; i++) {
>               /* Only count 24-bit relocs, others don't need stubs */
>               if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
>                       continue;
> -             for (j = 0; j < i; j++) {
> -                     /* If this addend appeared before, it's
> -                           already been counted */
> -                     if (rela[i].r_info == rela[j].r_info
> -                         && rela[i].r_addend == rela[j].r_addend)
> -                             break;
> -             }
> -             if (j == i) ret++;
> +             sortbuf[j++] = (Elf64_Rela *)&rela[i];
>       }
> +
> +     if (j == 0)
> +             goto out;
> +
> +     sort(sortbuf, j, sizeof(sortbuf[0]), reloc_cmp, NULL);
> +
> +     last_info = sortbuf[0]->r_info;
> +     last_addend = sortbuf[0]->r_addend;
> +     ret = 1;
> +     for (i = 0; i < j; i++) {
> +             /* If this r_info,r_addend tuple matches the previous
> +              * entry, don't count it again
> +              */
> +             if (sortbuf[i]->r_info == last_info &&
> +                 sortbuf[i]->r_addend == last_addend)
> +                     continue;
> +
> +             last_info = sortbuf[i]->r_info;
> +             last_addend = sortbuf[i]->r_addend;
> +             ret++;
> +     }
> +out:
> +     vfree(sortbuf);
>       return ret;
>  }
>  
> @@ -129,13 +170,17 @@ static unsigned long 
> get_stubs_size(const Elf64_Ehdr *hdr,
>       /* Every relocated section... */
>       for (i = 1; i < hdr->e_shnum; i++) {
>               if (sechdrs[i].sh_type == SHT_RELA) {
> +                     int count;
>                       DEBUGP("Found relocations in section %u\n", i);
>                       DEBUGP("Ptr: %p.  Number: %lu\n",
>                              (void *)sechdrs[i].sh_addr,
>                              sechdrs[i].sh_size / sizeof(Elf64_Rela));
> -                     relocs += count_relocs((void 
> *)sechdrs[i].sh_addr,
> +                     count = count_relocs((void *)sechdrs[i].sh_addr,
>                                              sechdrs[i].sh_size
>                                              / sizeof(Elf64_Rela));
> +                     if (count < 0)
> +                             return count;
> +                     relocs += count;
>               }
>       }
>  
> @@ -173,6 +218,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
>                             struct module *me)
>  {
>       unsigned int i;
> +     int stubs_size;
>  
>       /* Find .toc and .stubs sections, symtab and strtab */
>       for (i = 1; i < hdr->e_shnum; i++) {
> @@ -209,7 +255,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
>               me->arch.toc_section = me->arch.stubs_section;
>  
>       /* Override the stubs size */
> -     sechdrs[me->arch.stubs_section].sh_size = 
> get_stubs_size(hdr, sechdrs);
> +     stubs_size = get_stubs_size(hdr, sechdrs);
> +     if (stubs_size < 0)
> +             return stubs_size;
> +
> +     sechdrs[me->arch.stubs_section].sh_size = stubs_size;
>       return 0;
>  }
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to