mm,hugetlb: use folio fields in second tail page
authorHugh Dickins <hughd@google.com>
Thu, 3 Nov 2022 01:48:45 +0000 (18:48 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 30 Nov 2022 23:58:46 +0000 (15:58 -0800)
Patch series "mm,huge,rmap: unify and speed up compound mapcounts".

This patch (of 3):

We want to declare one more int in the first tail of a compound page: that
first tail page being valuable property, since every compound page has a
first tail, but perhaps no more than that.

No problem on 64-bit: there is already space for it.  No problem with
32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always
available") kindly cleared the space for it, apparently not realizing that
only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail
page->private might conflict) - but make sure of that in its Kconfig.

But hugetlb pages use tail page->private of the first tail page for a
subpool pointer, which will conflict; and they also use page->private of
the 2nd, 3rd and 4th tails.

Undo "mm: add private field of first tail to struct page and struct
folio"'s recent addition of private_1 to the folio tail: instead add
hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison to
a second tail page of the folio: THP has long been using several fields of
that tail, so make better use of it for hugetlb too.  This is not how a
generic folio should be declared in future, but it is an effective
transitional way to make use of it.

Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3.

[hughd@google.com: prefix folio's page_1 and page_2 with double underscore,
  give folio's _flags_2 and _head_2 a line documentation each]
Link: https://lkml.kernel.org/r/9e2cb6b-5b58-d3f2-b5ee-5f8a14e8f10@google.com
Link: https://lkml.kernel.org/r/5f52de70-975-e94f-f141-543765736181@google.com
Link: https://lkml.kernel.org/r/3818cc9a-9999-d064-d778-9c94c5911e6@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/hugetlb.h
include/linux/hugetlb_cgroup.h
include/linux/mm_types.h
mm/Kconfig
mm/memory-failure.c

index 58a30938a9b19b14878abe49b6b43023064f7361..551834cd529951e37a73cba4ebbf1840f5579db3 100644 (file)
@@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
 /*
  * For HugeTLB page, there are more metadata to save in the struct page. But
  * the head struct page cannot meet our needs, so we have to abuse other tail
- * struct page to store the metadata. In order to avoid conflicts caused by
- * subsequent use of more tail struct pages, we gather these discrete indexes
- * of tail struct page here.
+ * struct page to store the metadata.
  */
-enum {
-       SUBPAGE_INDEX_SUBPOOL = 1,      /* reuse page->private */
-#ifdef CONFIG_CGROUP_HUGETLB
-       SUBPAGE_INDEX_CGROUP,           /* reuse page->private */
-       SUBPAGE_INDEX_CGROUP_RSVD,      /* reuse page->private */
-       __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-       SUBPAGE_INDEX_HWPOISON,
-#endif
-       __NR_USED_SUBPAGE,
-};
+#define __NR_USED_SUBPAGE 3
 
 struct hugepage_subpool {
        spinlock_t lock;
@@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx;
 
 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
 {
-       return (void *)folio_get_private_1(folio);
+       return folio->_hugetlb_subpool;
 }
 
 /*
- * hugetlb page subpool pointer located in hpage[1].private
+ * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
  */
 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 {
@@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 static inline void hugetlb_set_folio_subpool(struct folio *folio,
                                        struct hugepage_subpool *subpool)
 {
-       folio_set_private_1(folio, (unsigned long)subpool);
+       folio->_hugetlb_subpool = subpool;
 }
 
 static inline void hugetlb_set_page_subpool(struct page *hpage,
index c70f92fe493ef103a8c6e513a48f9810b087c57b..f706626a80635f2df49fe1054e03477fcfd6046b 100644 (file)
@@ -24,12 +24,10 @@ struct file_region;
 #ifdef CONFIG_CGROUP_HUGETLB
 /*
  * Minimum page order trackable by hugetlb cgroup.
- * At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
- * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
- * is the reservation usage cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ * The second tail page contains all of the hugetlb-specific fields.
  */
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
+#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
 
 enum hugetlb_memory_event {
        HUGETLB_MAX,
@@ -69,21 +67,13 @@ struct hugetlb_cgroup {
 static inline struct hugetlb_cgroup *
 __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
 {
-       struct page *tail;
-
        VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
        if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
                return NULL;
-
-       if (rsvd) {
-               tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD);
-               return (void *)page_private(tail);
-       }
-
-       else {
-               tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
-               return (void *)page_private(tail);
-       }
+       if (rsvd)
+               return folio->_hugetlb_cgroup_rsvd;
+       else
+               return folio->_hugetlb_cgroup;
 }
 
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
@@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
                                       struct hugetlb_cgroup *h_cg, bool rsvd)
 {
        VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
-
        if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
                return;
        if (rsvd)
-               set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD),
-                                (unsigned long)h_cg);
+               folio->_hugetlb_cgroup_rsvd = h_cg;
        else
-               set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP),
-                                (unsigned long)h_cg);
+               folio->_hugetlb_cgroup = h_cg;
 }
 
 static inline void set_hugetlb_cgroup(struct folio *folio,
index e86861ff5bbdb3f323183b1aa9db9aa8fc0f1a05..44f1f8b6be02bc55f03d3d10c34d9b3c905068e5 100644 (file)
@@ -145,15 +145,22 @@ struct page {
                        atomic_t compound_pincount;
 #ifdef CONFIG_64BIT
                        unsigned int compound_nr; /* 1 << compound_order */
-                       unsigned long _private_1;
 #endif
                };
-               struct {        /* Second tail page of compound page */
+               struct {        /* Second tail page of transparent huge page */
                        unsigned long _compound_pad_1;  /* compound_head */
                        unsigned long _compound_pad_2;
                        /* For both global and memcg */
                        struct list_head deferred_list;
                };
+               struct {        /* Second tail page of hugetlb page */
+                       unsigned long _hugetlb_pad_1;   /* compound_head */
+                       void *hugetlb_subpool;
+                       void *hugetlb_cgroup;
+                       void *hugetlb_cgroup_rsvd;
+                       void *hugetlb_hwpoison;
+                       /* No more space on 32-bit: use third tail if more */
+               };
                struct {        /* Page table pages */
                        unsigned long _pt_pad_1;        /* compound_head */
                        pgtable_t pmd_huge_pte; /* protected by page->ptl */
@@ -260,13 +267,18 @@ struct page {
  *    to find how many references there are to this folio.
  * @memcg_data: Memory Control Group data.
  * @_flags_1: For large folios, additional page flags.
- * @__head: Points to the folio.  Do not use.
+ * @_head_1: Points to the folio.  Do not use.
  * @_folio_dtor: Which destructor to use for this folio.
  * @_folio_order: Do not use directly, call folio_order().
  * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
- * @_private_1: Do not use directly, call folio_get_private_1().
+ * @_flags_2: For alignment.  Do not use.
+ * @_head_2: Points to the folio.  Do not use.
+ * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
+ * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -305,16 +317,31 @@ struct folio {
                };
                struct page page;
        };
-       unsigned long _flags_1;
-       unsigned long __head;
-       unsigned char _folio_dtor;
-       unsigned char _folio_order;
-       atomic_t _total_mapcount;
-       atomic_t _pincount;
+       union {
+               struct {
+                       unsigned long _flags_1;
+                       unsigned long _head_1;
+                       unsigned char _folio_dtor;
+                       unsigned char _folio_order;
+                       atomic_t _total_mapcount;
+                       atomic_t _pincount;
 #ifdef CONFIG_64BIT
-       unsigned int _folio_nr_pages;
+                       unsigned int _folio_nr_pages;
 #endif
-       unsigned long _private_1;
+               };
+               struct page __page_1;
+       };
+       union {
+               struct {
+                       unsigned long _flags_2;
+                       unsigned long _head_2;
+                       void *_hugetlb_subpool;
+                       void *_hugetlb_cgroup;
+                       void *_hugetlb_cgroup_rsvd;
+                       void *_hugetlb_hwpoison;
+               };
+               struct page __page_2;
+       };
 };
 
 #define FOLIO_MATCH(pg, fl)                                            \
@@ -335,16 +362,25 @@ FOLIO_MATCH(memcg_data, memcg_data);
        static_assert(offsetof(struct folio, fl) ==                     \
                        offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
-FOLIO_MATCH(compound_head, __head);
+FOLIO_MATCH(compound_head, _head_1);
 FOLIO_MATCH(compound_dtor, _folio_dtor);
 FOLIO_MATCH(compound_order, _folio_order);
 FOLIO_MATCH(compound_mapcount, _total_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
-FOLIO_MATCH(_private_1, _private_1);
 #endif
 #undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl)                                            \
+       static_assert(offsetof(struct folio, fl) ==                     \
+                       offsetof(struct page, pg) + 2 * sizeof(struct page))
+FOLIO_MATCH(flags, _flags_2);
+FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
+FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
+FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
+FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
+#undef FOLIO_MATCH
 
 static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
 {
@@ -388,16 +424,6 @@ static inline void *folio_get_private(struct folio *folio)
        return folio->private;
 }
 
-static inline void folio_set_private_1(struct folio *folio, unsigned long private)
-{
-       folio->_private_1 = private;
-}
-
-static inline unsigned long folio_get_private_1(struct folio *folio)
-{
-       return folio->_private_1;
-}
-
 struct page_frag_cache {
        void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
index 4b28800d9be1651c3938700cfaa0b99d0f0ba23a..c86b69aff7d4dad26f0d8faf1c4b12a1c133a24c 100644 (file)
@@ -775,7 +775,7 @@ endchoice
 
 config THP_SWAP
        def_bool y
-       depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
+       depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
        help
          Swap transparent huge pages in one piece, without splitting.
          XXX: For now, swap cluster backing transparent huge page
index 779a426d2caba1ad2c37aabc64b1ec16185b2f22..63d8501001c6fe87e7c7b618c2ea087e8bb2800c 100644 (file)
@@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
 #ifdef CONFIG_HUGETLB_PAGE
 /*
  * Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list originated from ->private field of
- * SUBPAGE_INDEX_HWPOISON-th tail page.
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
  */
 struct raw_hwp_page {
        struct llist_node node;
@@ -1697,7 +1696,7 @@ struct raw_hwp_page {
 
 static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
 {
-       return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+       return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
 }
 
 static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)