First of all, nice set of patches. On Sat, 2007-03-31 at 23:10 -0800, Christoph Lameter wrote: > --- linux-2.6.21-rc5-mm2.orig/include/asm-generic/memory_model.h > 2007-03-31 22:47:14.000000000 -0700 > +++ linux-2.6.21-rc5-mm2/include/asm-generic/memory_model.h 2007-03-31 > 22:59:35.000000000 -0700 > @@ -47,6 +47,13 @@ > }) > > #elif defined(CONFIG_SPARSEMEM) > +#ifdef CONFIG_SPARSE_VIRTUAL > +/* > + * We have a virtual memmap that makes lookups very simple > + */ > +#define __pfn_to_page(pfn) (vmemmap + (pfn)) > +#define __page_to_pfn(page) ((page) - vmemmap) > +#else > /* > * Note: section's mem_map is encorded to reflect its start_pfn. > * section[i].section_mem_map == mem_map's address - start_pfn; > @@ -62,6 +69,7 @@ > struct mem_section *__sec = __pfn_to_section(__pfn); \ > __section_mem_map_addr(__sec) + __pfn; \ > }) > +#endif > #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
Any chance this can be done without embedding this inside another #ifdef? I really hate untangling the mess when an #endif goes missing. Any reason this can't just be another #elif? > #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE > Index: linux-2.6.21-rc5-mm2/mm/sparse.c > =================================================================== > --- linux-2.6.21-rc5-mm2.orig/mm/sparse.c 2007-03-31 22:47:14.000000000 > -0700 > +++ linux-2.6.21-rc5-mm2/mm/sparse.c 2007-03-31 22:59:35.000000000 -0700 > @@ -9,6 +9,7 @@ > #include <linux/spinlock.h> > #include <linux/vmalloc.h> > #include <asm/dma.h> > +#include <asm/pgalloc.h> > > /* > * Permanent SPARSEMEM data: > @@ -101,7 +102,7 @@ static inline int sparse_index_init(unsi > > /* > * Although written for the SPARSEMEM_EXTREME case, this happens > - * to also work for the flat array case becase > + * to also work for the flat array case because > * NR_SECTION_ROOTS==NR_MEM_SECTIONS. > */ > int __section_nr(struct mem_section* ms) > @@ -211,6 +212,90 @@ static int sparse_init_one_section(struc > return 1; > } > > +#ifdef CONFIG_SPARSE_VIRTUAL > + > +void *vmemmap_alloc_block(unsigned long size, int node) > +{ > + if (slab_is_available()) { > + struct page *page = > + alloc_pages_node(node, GFP_KERNEL, > + get_order(size)); > + > + BUG_ON(!page); > + return page_address(page); > + } else > + return __alloc_bootmem_node(NODE_DATA(node), size, size, > + __pa(MAX_DMA_ADDRESS)); > +} Hmmmmmmm. Can we combine this with sparse_index_alloc()? Also, why not just use the slab for this? Let's get rid of the _block() part, too. I'm not sure it does any good. At least make it _bytes() so that we know what the units are. Also, if you're just going to round up internally and _not_ use the slab, can you just make the argument in pages, or even order? Can you think of any times when we'd want that BUG_ON() to be a WARN_ON(), instead? I can see preferring having my mem_map[] on the wrong node than hitting a BUG(). > +#ifndef ARCH_POPULATES_VIRTUAL_MEMMAP > +/* > + * Virtual memmap populate functionality for architectures that support > + * PMDs for huge pages like i386, x86_64 etc. > + */ How about: /* * Virtual memmap support for architectures that use Linux pagetables * natively in hardware, and support mapping huge pages with PMD * entries. */ It wouldn't make sense to map the vmemmap area with Linux pagetables on an arch that didn't use them in hardware, right? So, perhaps this doesn't quite belong in mm/sparse.c. Perhaps we need arch/x86/sparse.c. ;) > +static void vmemmap_pop_pmd(pud_t *pud, unsigned long addr, > + unsigned long end, int node) > +{ > + pmd_t *pmd; > + > + end = pmd_addr_end(addr, end); > + > + for (pmd = pmd_offset(pud, addr); addr < end; > + pmd++, addr += PMD_SIZE) { > + if (pmd_none(*pmd)) { > + void *block; > + pte_t pte; > + > + block = vmemmap_alloc_block(PMD_SIZE, node); > + pte = pfn_pte(__pa(block) >> PAGE_SHIFT, > + PAGE_KERNEL); > + pte_mkdirty(pte); > + pte_mkwrite(pte); > + pte_mkyoung(pte); > + mk_pte_huge(pte); > + set_pmd(pmd, __pmd(pte_val(pte))); > + } > + } > +} Nitpick: I think this would look quite a bit neater with a little less indentation. How about making the loop start with if (!pmd_none(*pmd)) continue; It should bring the rest of the code in a bit and make that long line more readable. > +static void vmemmap_pop_pud(pgd_t *pgd, unsigned long addr, > + unsigned long end, int node) > +{ > + pud_t *pud; > + > + end = pud_addr_end(addr, end); > + for (pud = pud_offset(pgd, addr); addr < end; > + pud++, addr += PUD_SIZE) { > + > + if (pud_none(*pud)) > + pud_populate(&init_mm, pud, > + vmemmap_alloc_block(PAGE_SIZE, node)); > + > + vmemmap_pop_pmd(pud, addr, end, node); > + } > +} > + > +static void vmemmap_populate(struct page *start_page, unsigned long nr, > + int node) > +{ > + pgd_t *pgd; > + unsigned long addr = (unsigned long)(start_page); > + unsigned long end = pgd_addr_end(addr, > + (unsigned long)((start_page + nr))); There appear to be a few extra parentheses on these lines. > + for (pgd = pgd_offset_k(addr); addr < end; > + pgd++, addr += PGDIR_SIZE) { > + > + if (pgd_none(*pgd)) > + pgd_populate(&init_mm, pgd, > + vmemmap_alloc_block(PAGE_SIZE, node)); > + vmemmap_pop_pud(pgd, addr, end, node); > + } > +} > +#endif > +#endif /* CONFIG_SPARSE_VIRTUAL */ We don't really need these #ifdefs embedded inside of each other, either, right? Kconfig should take care of enforcing the dependency. > static struct page *sparse_early_mem_map_alloc(unsigned long pnum) > { > struct page *map; > @@ -221,8 +306,13 @@ static struct page *sparse_early_mem_map > if (map) > return map; > > +#ifdef CONFIG_SPARSE_VIRTUAL > + map = pfn_to_page(pnum * PAGES_PER_SECTION); > + vmemmap_populate(map, PAGES_PER_SECTION, nid); > +#else > map = alloc_bootmem_node(NODE_DATA(nid), > sizeof(struct page) * PAGES_PER_SECTION); > +#endif We really worked hard to keep #ifdefs out of the code flow in that file and keep it as clean as possible. Could we hide this behind a helper? map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) return map; + map = alloc_vmemmap(map, PAGES_PER_SECTION, nid); + if (map) + return map; + map = alloc_bootmem_node(NODE_DATA(nid), sizeof(struct page) * PAGES_PER_SECTION); if (map) return map; Then, do whatever magic you want in alloc_vmemmap(). -- Dave - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/