On Tue, 2012-07-31 at 21:12 +0200, Peter Zijlstra wrote: > +#ifdef CONFIG_NUMA > + /* > + * XXX fold this into flags for 64bit or so... > + */ > + int nid_last; > +#endif
Something like the below? I still ought to update all the various comments about page flag layout etc.. Also, that #warning gives a very noisy build indeed, I guess we should either make it silent or increase the page frame size for those configs.. 32bit NUMA is quite rare for normal people (sorry Paul) :) --- --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -611,10 +611,19 @@ static inline pte_t maybe_mkwrite(pte_t #define NODES_WIDTH 0 #endif +#if NODES_WIDTH && (SECTIONS_WIDTH+ZONES_WIDTH+2*NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS) +#define LAST_NID_WIDTH NODES_SHIFT +#else +#warning "faking page_xchg_last_nid" +#define LAST_NID_NOT_IN_PAGE_FLAGS +#define LAST_NID_WIDTH 0 +#endif + /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) +#define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) /* * We are going to use the flags for the page to node mapping if its in @@ -632,6 +641,7 @@ static inline pte_t maybe_mkwrite(pte_t #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) +#define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -653,6 +663,7 @@ static inline pte_t maybe_mkwrite(pte_t #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) +#define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) @@ -691,6 +702,39 @@ static inline int page_to_nid(const stru } #endif +#ifdef LAST_NID_NOT_IN_PAGE_FLAGS +static inline int page_xchg_last_nid(struct page *page, int nid) +{ + return nid; /* fakin' it */ +} + +static inline int page_last_nid(struct page *page) +{ + return page_to_nid(page); +} +#else +static inline int page_xchg_last_nid(struct page *page, int nid) +{ + unsigned long old_flags, flags; + int last_nid; + + old_flags = flags = page->flags; + last_nid = (flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; + + flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); + flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; + + (void)cmpxchg(&page->flags, old_flags, flags); + + return last_nid; +} + +static inline int page_last_nid(struct page *page) +{ + return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; +} +#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ + static inline struct zone *page_zone(const struct page *page) { return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -176,12 +176,6 @@ struct page { */ void *shadow; #endif -#ifdef CONFIG_NUMA - /* - * XXX fold this into flags for 64bit or so... - */ - int nid_last; -#endif } /* * The struct page can be forced to be double word aligned so that atomic ops --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1366,6 +1366,7 @@ static void __split_huge_page_refcount(s page_tail->mapping = page->mapping; page_tail->index = page->index + i; + page_xchg_last_nid(page, page_last_nid(tail_page)); page_tail->nid_last = page->nid_last; BUG_ON(!PageAnon(page_tail)); --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2265,10 +2265,9 @@ int mpol_misplaced(struct page *page, st * task_tick_numa(). */ if (multi && (pol->flags & MPOL_F_HOME)) { - if (page->nid_last != polnid) { - page->nid_last = polnid; + int last_nid = page_xchg_last_nid(page, polnid); + if (last_nid != polnid) goto out; - } } if (curnid != polnid) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/