mm.h source code [linux/include/linux/mm.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_MM_H
3	#define _LINUX_MM_H
4
5	#include <linux/errno.h>
6	#include <linux/mmdebug.h>
7	#include <linux/gfp.h>
8	#include <linux/pgalloc_tag.h>
9	#include <linux/bug.h>
10	#include <linux/list.h>
11	#include <linux/mmzone.h>
12	#include <linux/rbtree.h>
13	#include <linux/atomic.h>
14	#include <linux/debug_locks.h>
15	#include <linux/compiler.h>
16	#include <linux/mm_types.h>
17	#include <linux/mmap_lock.h>
18	#include <linux/range.h>
19	#include <linux/pfn.h>
20	#include <linux/percpu-refcount.h>
21	#include <linux/bit_spinlock.h>
22	#include <linux/shrinker.h>
23	#include <linux/resource.h>
24	#include <linux/page_ext.h>
25	#include <linux/err.h>
26	#include <linux/page-flags.h>
27	#include <linux/page_ref.h>
28	#include <linux/overflow.h>
29	#include <linux/sizes.h>
30	#include <linux/sched.h>
31	#include <linux/pgtable.h>
32	#include <linux/kasan.h>
33	#include <linux/memremap.h>
34	#include <linux/slab.h>
35	#include <linux/cacheinfo.h>
36	#include <linux/rcuwait.h>
37	#include <linux/bitmap.h>
38	#include <linux/bitops.h>
39
40	struct mempolicy;
41	struct anon_vma;
42	struct anon_vma_chain;
43	struct user_struct;
44	struct pt_regs;
45	struct folio_batch;
46
47	void arch_mm_preinit(void);
48	void mm_core_init(void);
49	void init_mm_internals(void);
50
51	extern atomic_long_t _totalram_pages;
52	static inline unsigned long totalram_pages(void)
53	{
54	return (unsigned long)atomic_long_read(v: &_totalram_pages);
55	}
56
57	static inline void totalram_pages_inc(void)
58	{
59	atomic_long_inc(v: &_totalram_pages);
60	}
61
62	static inline void totalram_pages_dec(void)
63	{
64	atomic_long_dec(v: &_totalram_pages);
65	}
66
67	static inline void totalram_pages_add(long count)
68	{
69	atomic_long_add(i: count, v: &_totalram_pages);
70	}
71
72	extern void * high_memory;
73
74	/*
75	* Convert between pages and MB
76	* 20 is the shift for 1MB (2^20 = 1MB)
77	* PAGE_SHIFT is the shift for page size (e.g., 12 for 4KB pages)
78	* So (20 - PAGE_SHIFT) converts between pages and MB
79	*/
80	#define PAGES_TO_MB(pages) ((pages) >> (20 - PAGE_SHIFT))
81	#define MB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
82
83	#ifdef CONFIG_SYSCTL
84	extern int sysctl_legacy_va_layout;
85	#else
86	#define sysctl_legacy_va_layout 0
87	#endif
88
89	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
90	extern const int mmap_rnd_bits_min;
91	extern int mmap_rnd_bits_max __ro_after_init;
92	extern int mmap_rnd_bits __read_mostly;
93	#endif
94	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
95	extern const int mmap_rnd_compat_bits_min;
96	extern const int mmap_rnd_compat_bits_max;
97	extern int mmap_rnd_compat_bits __read_mostly;
98	#endif
99
100	#ifndef DIRECT_MAP_PHYSMEM_END
101	# ifdef MAX_PHYSMEM_BITS
102	# define DIRECT_MAP_PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1)
103	# else
104	# define DIRECT_MAP_PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63))
105	# endif
106	#endif
107
108	#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
109
110	#include <asm/page.h>
111	#include <asm/processor.h>
112
113	#ifndef __pa_symbol
114	#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
115	#endif
116
117	#ifndef page_to_virt
118	#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
119	#endif
120
121	#ifndef lm_alias
122	#define lm_alias(x) __va(__pa_symbol(x))
123	#endif
124
125	/*
126	* To prevent common memory management code establishing
127	* a zero page mapping on a read fault.
128	* This macro should be defined within <asm/pgtable.h>.
129	* s390 does this to prevent multiplexing of hardware bits
130	* related to the physical page in case of virtualization.
131	*/
132	#ifndef mm_forbids_zeropage
133	#define mm_forbids_zeropage(X) (0)
134	#endif
135
136	/*
137	* On some architectures it is expensive to call memset() for small sizes.
138	* If an architecture decides to implement their own version of
139	* mm_zero_struct_page they should wrap the defines below in a #ifndef and
140	* define their own version of this macro in <asm/pgtable.h>
141	*/
142	#if BITS_PER_LONG == 64
143	/ This function must be updated when the size of struct page grows above 96*
144	* or reduces below 56. The idea that compiler optimizes out switch()
145	* statement, and only leaves move/store instructions. Also the compiler can
146	* combine write statements if they are both assignments and can be reordered,
147	* this can result in several of the writes here being dropped.
148	*/
149	#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
150	static inline void __mm_zero_struct_page(struct page *page)
151	{
152	unsigned long _pp = (void* *)page;
153
154	/ Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes /
155	BUILD_BUG_ON(sizeof(struct page) & `7`);
156	BUILD_BUG_ON(sizeof(struct page) < `56`);
157	BUILD_BUG_ON(sizeof(struct page) > `96`);
158
159	switch (sizeof(struct page)) {
160	case `96`:
161	_pp[`11`] = `0`;
162	fallthrough;
163	case `88`:
164	_pp[`10`] = `0`;
165	fallthrough;
166	case `80`:
167	_pp[`9`] = `0`;
168	fallthrough;
169	case `72`:
170	_pp[`8`] = `0`;
171	fallthrough;
172	case `64`:
173	_pp[`7`] = `0`;
174	fallthrough;
175	case `56`:
176	_pp[`6`] = `0`;
177	_pp[`5`] = `0`;
178	_pp[`4`] = `0`;
179	_pp[`3`] = `0`;
180	_pp[`2`] = `0`;
181	_pp[`1`] = `0`;
182	_pp[`0`] = `0`;
183	}
184	}
185	#else
186	#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
187	#endif
188
189	/*
190	* Default maximum number of active map areas, this limits the number of vmas
191	* per mm struct. Users can overwrite this number by sysctl but there is a
192	* problem.
193	*
194	* When a program's coredump is generated as ELF format, a section is created
195	* per a vma. In ELF, the number of sections is represented in unsigned short.
196	* This means the number of sections should be smaller than 65535 at coredump.
197	* Because the kernel adds some informative sections to a image of program at
198	* generating coredump, we need some margin. The number of extra sections is
199	* 1-3 now and depends on arch. We use "5" as safe margin, here.
200	*
201	* ELF extended numbering allows more than 65535 sections, so 16-bit bound is
202	* not a hard limit any more. Although some userspace tools can be surprised by
203	* that.
204	*/
205	#define MAPCOUNT_ELF_CORE_MARGIN (5)
206	#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
207
208	extern int sysctl_max_map_count;
209
210	extern unsigned long sysctl_user_reserve_kbytes;
211	extern unsigned long sysctl_admin_reserve_kbytes;
212
213	#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
214	bool page_range_contiguous(const struct page page, unsigned* long nr_pages);
215	#else
216	static inline bool page_range_contiguous(const struct page *page,
217	unsigned long nr_pages)
218	{
219	return true;
220	}
221	#endif
222
223	/ to align the pointer to the (next) page boundary /
224	#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
225
226	/ to align the pointer to the (prev) page boundary /
227	#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
228
229	/ test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE /
230	#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
231
232	/**
233	* folio_page_idx - Return the number of a page in a folio.
234	* @folio: The folio.
235	* @page: The folio page.
236	*
237	* This function expects that the page is actually part of the folio.
238	* The returned number is relative to the start of the folio.
239	*/
240	static inline unsigned long folio_page_idx(const struct folio *folio,
241	const struct page *page)
242	{
243	return page - &folio->page;
244	}
245
246	static inline struct folio lru_to_folio(struct* list_head *head)
247	{
248	return list_entry((head)->prev, struct folio, lru);
249	}
250
251	void setup_initial_init_mm(void start_code, void* *end_code,
252	void end_data, void* *brk);
253
254	/*
255	* Linux kernel virtual memory manager primitives.
256	* The idea being to have a "virtual" mm in the same way
257	* we have a virtual fs - giving a cleaner interface to the
258	* mm details, and allowing different kinds of memory mappings
259	* (from shared memory to executable loading to arbitrary
260	* mmap() functions).
261	*/
262
263	struct vm_area_struct vm_area_alloc(struct* mm_struct *);
264	struct vm_area_struct vm_area_dup(struct* vm_area_struct *);
265	void vm_area_free(struct vm_area_struct *);
266
267	#ifndef CONFIG_MMU
268	extern struct rb_root nommu_region_tree;
269	extern struct rw_semaphore nommu_region_sem;
270
271	extern unsigned int kobjsize(const void *objp);
272	#endif
273
274	/*
275	* vm_flags in vm_area_struct, see mm_types.h.
276	* When changing, update also include/trace/events/mmflags.h
277	*/
278
279	#define VM_NONE 0x00000000
280
281	/**
282	* typedef vma_flag_t - specifies an individual VMA flag by bit number.
283	*
284	* This value is made type safe by sparse to avoid passing invalid flag values
285	* around.
286	*/
287	typedef int __bitwise vma_flag_t;
288
289	#define DECLARE_VMA_BIT(name, bitnum) \
290	VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
291	#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
292	VMA_ ## name ## _BIT = (VMA_ ## aliased ## _BIT)
293	enum {
294	DECLARE_VMA_BIT(READ, `0`),
295	DECLARE_VMA_BIT(WRITE, `1`),
296	DECLARE_VMA_BIT(EXEC, `2`),
297	DECLARE_VMA_BIT(SHARED, `3`),
298	/ mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. /
299	DECLARE_VMA_BIT(MAYREAD, `4`), / limits for mprotect() etc. /
300	DECLARE_VMA_BIT(MAYWRITE, `5`),
301	DECLARE_VMA_BIT(MAYEXEC, `6`),
302	DECLARE_VMA_BIT(MAYSHARE, `7`),
303	DECLARE_VMA_BIT(GROWSDOWN, `8`), / general info on the segment /
304	#ifdef CONFIG_MMU
305	DECLARE_VMA_BIT(UFFD_MISSING, `9`),/ missing pages tracking /
306	#else
307	/ nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping /
308	DECLARE_VMA_BIT(MAYOVERLAY, `9`),
309	#endif /* CONFIG_MMU */
310	/ Page-ranges managed without "struct page", just pure PFN /
311	DECLARE_VMA_BIT(PFNMAP, `10`),
312	DECLARE_VMA_BIT(MAYBE_GUARD, `11`),
313	DECLARE_VMA_BIT(UFFD_WP, `12`), / wrprotect pages tracking /
314	DECLARE_VMA_BIT(LOCKED, `13`),
315	DECLARE_VMA_BIT(IO, `14`), / Memory mapped I/O or similar /
316	DECLARE_VMA_BIT(SEQ_READ, `15`), / App will access data sequentially /
317	DECLARE_VMA_BIT(RAND_READ, `16`), / App will not benefit from clustered reads /
318	DECLARE_VMA_BIT(DONTCOPY, `17`), / Do not copy this vma on fork /
319	DECLARE_VMA_BIT(DONTEXPAND, `18`),/ Cannot expand with mremap() /
320	DECLARE_VMA_BIT(LOCKONFAULT, `19`),/ Lock pages covered when faulted in /
321	DECLARE_VMA_BIT(ACCOUNT, `20`), / Is a VM accounted object /
322	DECLARE_VMA_BIT(NORESERVE, `21`), / should the VM suppress accounting /
323	DECLARE_VMA_BIT(HUGETLB, `22`), / Huge TLB Page VM /
324	DECLARE_VMA_BIT(SYNC, `23`), / Synchronous page faults /
325	DECLARE_VMA_BIT(ARCH_1, `24`), / Architecture-specific flag /
326	DECLARE_VMA_BIT(WIPEONFORK, `25`),/ Wipe VMA contents in child. /
327	DECLARE_VMA_BIT(DONTDUMP, `26`), / Do not include in the core dump /
328	DECLARE_VMA_BIT(SOFTDIRTY, `27`), / NOT soft dirty clean area /
329	DECLARE_VMA_BIT(MIXEDMAP, `28`), / Can contain struct page and pure PFN pages /
330	DECLARE_VMA_BIT(HUGEPAGE, `29`), / MADV_HUGEPAGE marked this vma /
331	DECLARE_VMA_BIT(NOHUGEPAGE, `30`),/ MADV_NOHUGEPAGE marked this vma /
332	DECLARE_VMA_BIT(MERGEABLE, `31`), / KSM may merge identical pages /
333	/ These bits are reused, we define specific uses below. /
334	DECLARE_VMA_BIT(HIGH_ARCH_0, `32`),
335	DECLARE_VMA_BIT(HIGH_ARCH_1, `33`),
336	DECLARE_VMA_BIT(HIGH_ARCH_2, `34`),
337	DECLARE_VMA_BIT(HIGH_ARCH_3, `35`),
338	DECLARE_VMA_BIT(HIGH_ARCH_4, `36`),
339	DECLARE_VMA_BIT(HIGH_ARCH_5, `37`),
340	DECLARE_VMA_BIT(HIGH_ARCH_6, `38`),
341	/*
342	* This flag is used to connect VFIO to arch specific KVM code. It
343	* indicates that the memory under this VMA is safe for use with any
344	* non-cachable memory type inside KVM. Some VFIO devices, on some
345	* platforms, are thought to be unsafe and can cause machine crashes
346	* if KVM does not lock down the memory type.
347	*/
348	DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, `39`),
349	#ifdef CONFIG_PPC32
350	DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
351	#else
352	DECLARE_VMA_BIT(DROPPABLE, `40`),
353	#endif
354	DECLARE_VMA_BIT(UFFD_MINOR, `41`),
355	DECLARE_VMA_BIT(SEALED, `42`),
356	/ Flags that reuse flags above. /
357	DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
358	DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
359	DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
360	DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
361	DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
362	#if defined(CONFIG_X86_USER_SHADOW_STACK)
363	/*
364	* VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
365	* support core mm.
366	*
367	* These VMAs will get a single end guard page. This helps userspace
368	* protect itself from attacks. A single page is enough for current
369	* shadow stack archs (x86). See the comments near alloc_shstk() in
370	* arch/x86/kernel/shstk.c for more details on the guard size.
371	*/
372	DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
373	#elif defined(CONFIG_ARM64_GCS)
374	/*
375	* arm64's Guarded Control Stack implements similar functionality and
376	* has similar constraints to shadow stacks.
377	*/
378	DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
379	#endif
380	DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), / Strong Access Ordering (powerpc) /
381	DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), / parisc /
382	DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), / sparc64 /
383	DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), / arm64 /
384	DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), / sparc64, arm64 /
385	DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), / !CONFIG_MMU /
386	DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), / arm64 /
387	DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/ arm64 /
388	#ifdef CONFIG_STACK_GROWSUP
389	DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
390	DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
391	#else
392	DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
393	#endif
394	};
395	#undef DECLARE_VMA_BIT
396	#undef DECLARE_VMA_BIT_ALIAS
397
398	#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
399	#define VM_READ INIT_VM_FLAG(READ)
400	#define VM_WRITE INIT_VM_FLAG(WRITE)
401	#define VM_EXEC INIT_VM_FLAG(EXEC)
402	#define VM_SHARED INIT_VM_FLAG(SHARED)
403	#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
404	#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
405	#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
406	#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
407	#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
408	#ifdef CONFIG_MMU
409	#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
410	#else
411	#define VM_UFFD_MISSING VM_NONE
412	#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
413	#endif
414	#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
415	#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
416	#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
417	#define VM_LOCKED INIT_VM_FLAG(LOCKED)
418	#define VM_IO INIT_VM_FLAG(IO)
419	#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
420	#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
421	#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
422	#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
423	#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
424	#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
425	#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
426	#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
427	#define VM_SYNC INIT_VM_FLAG(SYNC)
428	#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
429	#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
430	#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
431	#ifdef CONFIG_MEM_SOFT_DIRTY
432	#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
433	#else
434	#define VM_SOFTDIRTY VM_NONE
435	#endif
436	#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
437	#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
438	#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
439	#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
440	#define VM_STACK INIT_VM_FLAG(STACK)
441	#ifdef CONFIG_STACK_GROWSUP
442	#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
443	#else
444	#define VM_STACK_EARLY VM_NONE
445	#endif
446	#ifdef CONFIG_ARCH_HAS_PKEYS
447	#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
448	/ Despite the naming, these are FLAGS not bits. /
449	#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
450	#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
451	#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
452	#if CONFIG_ARCH_PKEY_BITS > 3
453	#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
454	#else
455	#define VM_PKEY_BIT3 VM_NONE
456	#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
457	#if CONFIG_ARCH_PKEY_BITS > 4
458	#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
459	#else
460	#define VM_PKEY_BIT4 VM_NONE
461	#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
462	#endif /* CONFIG_ARCH_HAS_PKEYS */
463	#if defined(CONFIG_X86_USER_SHADOW_STACK) \|\| defined(CONFIG_ARM64_GCS)
464	#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
465	#else
466	#define VM_SHADOW_STACK VM_NONE
467	#endif
468	#if defined(CONFIG_PPC64)
469	#define VM_SAO INIT_VM_FLAG(SAO)
470	#elif defined(CONFIG_PARISC)
471	#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
472	#elif defined(CONFIG_SPARC64)
473	#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
474	#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
475	#elif defined(CONFIG_ARM64)
476	#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
477	#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
478	#elif !defined(CONFIG_MMU)
479	#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
480	#endif
481	#ifndef VM_GROWSUP
482	#define VM_GROWSUP VM_NONE
483	#endif
484	#ifdef CONFIG_ARM64_MTE
485	#define VM_MTE INIT_VM_FLAG(MTE)
486	#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
487	#else
488	#define VM_MTE VM_NONE
489	#define VM_MTE_ALLOWED VM_NONE
490	#endif
491	#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
492	#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
493	#else
494	#define VM_UFFD_MINOR VM_NONE
495	#endif
496	#ifdef CONFIG_64BIT
497	#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
498	#define VM_SEALED INIT_VM_FLAG(SEALED)
499	#else
500	#define VM_ALLOW_ANY_UNCACHED VM_NONE
501	#define VM_SEALED VM_NONE
502	#endif
503	#if defined(CONFIG_64BIT) \|\| defined(CONFIG_PPC32)
504	#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
505	#else
506	#define VM_DROPPABLE VM_NONE
507	#endif
508
509	/ Bits set in the VMA until the stack is in its final location /
510	#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ \| VM_SEQ_READ \| VM_STACK_EARLY)
511
512	#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
513
514	/ Common data flag combinations /
515	#define VM_DATA_FLAGS_TSK_EXEC (VM_READ \| VM_WRITE \| TASK_EXEC \| \
516	VM_MAYREAD \| VM_MAYWRITE \| VM_MAYEXEC)
517	#define VM_DATA_FLAGS_NON_EXEC (VM_READ \| VM_WRITE \| VM_MAYREAD \| \
518	VM_MAYWRITE \| VM_MAYEXEC)
519	#define VM_DATA_FLAGS_EXEC (VM_READ \| VM_WRITE \| VM_EXEC \| \
520	VM_MAYREAD \| VM_MAYWRITE \| VM_MAYEXEC)
521
522	#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
523	#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
524	#endif
525
526	#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
527	#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
528	#endif
529
530	#define VM_STARTGAP_FLAGS (VM_GROWSDOWN \| VM_SHADOW_STACK)
531
532	#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS
533	#define VM_SEALED_SYSMAP VM_SEALED
534	#else
535	#define VM_SEALED_SYSMAP VM_NONE
536	#endif
537
538	#define VM_STACK_FLAGS (VM_STACK \| VM_STACK_DEFAULT_FLAGS \| VM_ACCOUNT)
539
540	/ VMA basic access permission flags /
541	#define VM_ACCESS_FLAGS (VM_READ \| VM_WRITE \| VM_EXEC)
542
543	/*
544	* Special vmas that are non-mergable, non-mlock()able.
545	*/
546	#define VM_SPECIAL (VM_IO \| VM_DONTEXPAND \| VM_PFNMAP \| VM_MIXEDMAP)
547
548	/*
549	* Physically remapped pages are special. Tell the
550	* rest of the world about it:
551	* VM_IO tells people not to look at these pages
552	* (accesses can have side effects).
553	* VM_PFNMAP tells the core MM that the base pages are just
554	* raw PFN mappings, and do not have a "struct page" associated
555	* with them.
556	* VM_DONTEXPAND
557	* Disable vma merging and expanding with mremap().
558	* VM_DONTDUMP
559	* Omit vma from core dump, even when VM_IO turned off.
560	*/
561	#define VM_REMAP_FLAGS (VM_IO \| VM_PFNMAP \| VM_DONTEXPAND \| VM_DONTDUMP)
562
563	/ This mask prevents VMA from being scanned with khugepaged /
564	#define VM_NO_KHUGEPAGED (VM_SPECIAL \| VM_HUGETLB)
565
566	/ This mask defines which mm->def_flags a process can inherit its parent /
567	#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
568
569	/ This mask represents all the VMA flag bits used by mlock /
570	#define VM_LOCKED_MASK (VM_LOCKED \| VM_LOCKONFAULT)
571
572	/ These flags can be updated atomically via VMA/mmap read lock. /
573	#define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD
574
575	/ Arch-specific flags to clear when updating VM flags on protection change /
576	#ifndef VM_ARCH_CLEAR
577	#define VM_ARCH_CLEAR VM_NONE
578	#endif
579	#define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS \| VM_ARCH_CLEAR)
580
581	/*
582	* Flags which should be 'sticky' on merge - that is, flags which, when one VMA
583	* possesses it but the other does not, the merged VMA should nonetheless have
584	* applied to it:
585	*
586	* VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
587	* references cleared via /proc/$pid/clear_refs, any merged VMA
588	* should be considered soft-dirty also as it operates at a VMA
589	* granularity.
590	*
591	* VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
592	* mapped page tables may contain metadata not described by the
593	* VMA and thus any merged VMA may also contain this metadata,
594	* and thus we must make this flag sticky.
595	*/
596	#define VM_STICKY (VM_SOFTDIRTY \| VM_MAYBE_GUARD)
597
598	/*
599	* VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
600	* of these flags and the other not does not preclude a merge.
601	*
602	* VM_STICKY - When merging VMAs, VMA flags must match, unless they are
603	* 'sticky'. If any sticky flags exist in either VMA, we simply
604	* set all of them on the merged VMA.
605	*/
606	#define VM_IGNORE_MERGE VM_STICKY
607
608	/*
609	* Flags which should result in page tables being copied on fork. These are
610	* flags which indicate that the VMA maps page tables which cannot be
611	* reconsistuted upon page fault, so necessitate page table copying upon fork.
612	*
613	* Note that these flags should be compared with the DESTINATION VMA not the
614	* source, as VM_UFFD_WP may not be propagated to destination, while all other
615	* flags will be.
616	*
617	* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
618	* reasonably reconstructed on page fault.
619	*
620	* VM_UFFD_WP - Encodes metadata about an installed uffd
621	* write protect handler, which cannot be
622	* reconstructed on page fault.
623	*
624	* We always copy pgtables when dst_vma has uffd-wp
625	* enabled even if it's file-backed
626	* (e.g. shmem). Because when uffd-wp is enabled,
627	* pgtable contains uffd-wp protection information,
628	* that's something we can't retrieve from page cache,
629	* and skip copying will lose those info.
630	*
631	* VM_MAYBE_GUARD - Could contain page guard region markers which
632	* by design are a property of the page tables
633	* only and thus cannot be reconstructed on page
634	* fault.
635	*/
636	#define VM_COPY_ON_FORK (VM_PFNMAP \| VM_MIXEDMAP \| VM_UFFD_WP \| VM_MAYBE_GUARD)
637
638	/*
639	* mapping from the currently active vm_flags protection bits (the
640	* low four bits) to a page protection mask..
641	*/
642
643	/*
644	* The default fault flags that should be used by most of the
645	* arch-specific page fault handlers.
646	*/
647	#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY \| \
648	FAULT_FLAG_KILLABLE \| \
649	FAULT_FLAG_INTERRUPTIBLE)
650
651	/**
652	* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
653	* @flags: Fault flags.
654	*
655	* This is mostly used for places where we want to try to avoid taking
656	* the mmap_lock for too long a time when waiting for another condition
657	* to change, in which case we can try to be polite to release the
658	* mmap_lock in the first round to avoid potential starvation of other
659	* processes that would also want the mmap_lock.
660	*
661	* Return: true if the page fault allows retry and this is the first
662	* attempt of the fault handling; false otherwise.
663	*/
664	static inline bool fault_flag_allow_retry_first(enum fault_flag flags)
665	{
666	return (flags & FAULT_FLAG_ALLOW_RETRY) &&
667	(!(flags & FAULT_FLAG_TRIED));
668	}
669
670	#define FAULT_FLAG_TRACE \
671	{ FAULT_FLAG_WRITE, "WRITE" }, \
672	{ FAULT_FLAG_MKWRITE, "MKWRITE" }, \
673	{ FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \
674	{ FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \
675	{ FAULT_FLAG_KILLABLE, "KILLABLE" }, \
676	{ FAULT_FLAG_TRIED, "TRIED" }, \
677	{ FAULT_FLAG_USER, "USER" }, \
678	{ FAULT_FLAG_REMOTE, "REMOTE" }, \
679	{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
680	{ FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \
681	{ FAULT_FLAG_VMA_LOCK, "VMA_LOCK" }
682
683	/*
684	* vm_fault is filled by the pagefault handler and passed to the vma's
685	* ->fault function. The vma's ->fault is responsible for returning a bitmask
686	* of VM_FAULT_xxx flags that give details about how the fault was handled.
687	*
688	* MM layer fills up gfp_mask for page allocations but fault handler might
689	* alter it if its implementation requires a different allocation context.
690	*
691	* pgoff should be used in favour of virtual_address, if possible.
692	*/
693	struct vm_fault {
694	const struct {
695	struct vm_area_struct vma; /* Target VMA /
696	gfp_t gfp_mask; / gfp mask to be used for allocations /
697	pgoff_t pgoff; / Logical page offset based on vma /
698	unsigned long address; / Faulting virtual address - masked /
699	unsigned long real_address; / Faulting virtual address - unmasked /
700	};
701	enum fault_flag flags; / FAULT_FLAG_xxx flags*
702	* XXX: should really be 'const' */
703	pmd_t pmd; /* Pointer to pmd entry matching*
704	* the 'address' */
705	pud_t pud; /* Pointer to pud entry matching*
706	* the 'address'
707	*/
708	union {
709	pte_t orig_pte; / Value of PTE at the time of fault /
710	pmd_t orig_pmd; / Value of PMD at the time of fault,*
711	* used by PMD fault only.
712	*/
713	};
714
715	struct page cow_page; /* Page handler may use for COW fault /
716	struct page page; /* ->fault handlers should return a*
717	* page here, unless VM_FAULT_NOPAGE
718	* is set (which is also implied by
719	* VM_FAULT_ERROR).
720	*/
721	/ These three entries are valid only while holding ptl lock /
722	pte_t pte; /* Pointer to pte entry matching*
723	* the 'address'. NULL if the page
724	* table hasn't been allocated.
725	*/
726	spinlock_t ptl; /* Page table lock.*
727	* Protects pte page table if 'pte'
728	* is not NULL, otherwise pmd.
729	*/
730	pgtable_t prealloc_pte; / Pre-allocated pte page table.*
731	* vm_ops->map_pages() sets up a page
732	* table from atomic context.
733	* do_fault_around() pre-allocates
734	* page table to avoid allocation from
735	* atomic context.
736	*/
737	};
738
739	/*
740	* These are the virtual MM functions - opening of an area, closing and
741	* unmapping it (needed to keep files on disk up-to-date etc), pointer
742	* to the functions called when a no-page or a wp-page exception occurs.
743	*/
744	struct vm_operations_struct {
745	void (open)(struct* vm_area_struct * area);
746	/**
747	* @close: Called when the VMA is being removed from the MM.
748	* Context: User context. May sleep. Caller holds mmap_lock.
749	*/
750	void (close)(struct* vm_area_struct * area);
751	/ Called any time before splitting to check if it's allowed /
752	int (may_split)(struct* vm_area_struct area, unsigned* long addr);
753	int (mremap)(struct* vm_area_struct *area);
754	/*
755	* Called by mprotect() to make driver-specific permission
756	* checks before mprotect() is finalised. The VMA must not
757	* be modified. Returns 0 if mprotect() can proceed.
758	*/
759	int (mprotect)(struct* vm_area_struct vma, unsigned* long start,
760	unsigned long end, unsigned long newflags);
761	vm_fault_t (fault)(struct* vm_fault *vmf);
762	vm_fault_t (huge_fault)(struct* vm_fault vmf, unsigned* int order);
763	vm_fault_t (map_pages)(struct* vm_fault *vmf,
764	pgoff_t start_pgoff, pgoff_t end_pgoff);
765	unsigned long (pagesize)(struct* vm_area_struct * area);
766
767	/ notification that a previously read-only page is about to become*
768	* writable, if an error is returned it will cause a SIGBUS */
769	vm_fault_t (page_mkwrite)(struct* vm_fault *vmf);
770
771	/ same as page_mkwrite when using VM_PFNMAP\|VM_MIXEDMAP /
772	vm_fault_t (pfn_mkwrite)(struct* vm_fault *vmf);
773
774	/ called by access_process_vm when get_user_pages() fails, typically*
775	* for use by special VMAs. See also generic_access_phys() for a generic
776	* implementation useful for any iomem mapping.
777	*/
778	int (access)(struct* vm_area_struct vma, unsigned* long addr,
779	void buf, int* len, int write);
780
781	/ Called by the /proc/PID/maps code to ask the vma whether it*
782	* has a special name. Returning non-NULL will also cause this
783	* vma to be dumped unconditionally. */
784	const char (name)(struct vm_area_struct *vma);
785
786	#ifdef CONFIG_NUMA
787	/*
788	* set_policy() op must add a reference to any non-NULL @new mempolicy
789	* to hold the policy upon return. Caller should pass NULL @new to
790	* remove a policy and fall back to surrounding context--i.e. do not
791	* install a MPOL_DEFAULT policy, nor the task or system default
792	* mempolicy.
793	*/
794	int (set_policy)(struct* vm_area_struct vma, struct* mempolicy *new);
795
796	/*
797	* get_policy() op must add reference [mpol_get()] to any policy at
798	* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
799	* in mm/mempolicy.c will do this automatically.
800	* get_policy() must NOT add a ref if the policy at (vma,addr) is not
801	* marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
802	* If no [shared/vma] mempolicy exists at the addr, get_policy() op
803	* must return NULL--i.e., do not "fallback" to task or system default
804	* policy.
805	*/
806	struct mempolicy (get_policy)(struct vm_area_struct *vma,
807	unsigned long addr, pgoff_t *ilx);
808	#endif
809	#ifdef CONFIG_FIND_NORMAL_PAGE
810	/*
811	* Called by vm_normal_page() for special PTEs in @vma at @addr. This
812	* allows for returning a "normal" page from vm_normal_page() even
813	* though the PTE indicates that the "struct page" either does not exist
814	* or should not be touched: "special".
815	*
816	* Do not add new users: this really only works when a "normal" page
817	* was mapped, but then the PTE got changed to something weird (+
818	* marked special) that would not make pte_pfn() identify the originally
819	* inserted page.
820	*/
821	struct page (find_normal_page)(struct vm_area_struct *vma,
822	unsigned long addr);
823	#endif /* CONFIG_FIND_NORMAL_PAGE */
824	};
825
826	#ifdef CONFIG_NUMA_BALANCING
827	static inline void vma_numab_state_init(struct vm_area_struct *vma)
828	{
829	vma->numab_state = NULL;
830	}
831	static inline void vma_numab_state_free(struct vm_area_struct *vma)
832	{
833	kfree(objp: vma->numab_state);
834	}
835	#else
836	static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
837	static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
838	#endif /* CONFIG_NUMA_BALANCING */
839
840	/*
841	* These must be here rather than mmap_lock.h as dependent on vm_fault type,
842	* declared in this header.
843	*/
844	#ifdef CONFIG_PER_VMA_LOCK
845	static inline void release_fault_lock(struct vm_fault *vmf)
846	{
847	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
848	vma_end_read(vma: vmf->vma);
849	else
850	mmap_read_unlock(mm: vmf->vma->vm_mm);
851	}
852
853	static inline void assert_fault_locked(const struct vm_fault *vmf)
854	{
855	if (vmf->flags & FAULT_FLAG_VMA_LOCK)
856	vma_assert_locked(vma: vmf->vma);
857	else
858	mmap_assert_locked(mm: vmf->vma->vm_mm);
859	}
860	#else
861	static inline void release_fault_lock(struct vm_fault *vmf)
862	{
863	mmap_read_unlock(vmf->vma->vm_mm);
864	}
865
866	static inline void assert_fault_locked(const struct vm_fault *vmf)
867	{
868	mmap_assert_locked(vmf->vma->vm_mm);
869	}
870	#endif /* CONFIG_PER_VMA_LOCK */
871
872	static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
873	{
874	return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
875	}
876
877	static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm)
878	{
879	return test_and_set_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
880	}
881
882	static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm)
883	{
884	return test_and_clear_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
885	}
886
887	static inline void mm_flags_set(int flag, struct mm_struct *mm)
888	{
889	set_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
890	}
891
892	static inline void mm_flags_clear(int flag, struct mm_struct *mm)
893	{
894	clear_bit(nr: flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
895	}
896
897	static inline void mm_flags_clear_all(struct mm_struct *mm)
898	{
899	bitmap_zero(ACCESS_PRIVATE(&mm->flags, __mm_flags), NUM_MM_FLAG_BITS);
900	}
901
902	extern const struct vm_operations_struct vma_dummy_vm_ops;
903
904	static inline void vma_init(struct vm_area_struct vma, struct* mm_struct *mm)
905	{
906	memset(vma, `0`, sizeof(*vma));
907	vma->vm_mm = mm;
908	vma->vm_ops = &vma_dummy_vm_ops;
909	INIT_LIST_HEAD(list: &vma->anon_vma_chain);
910	vma_lock_init(vma, reset_refcnt: false);
911	}
912
913	/ Use when VMA is not part of the VMA tree and needs no locking /
914	static inline void vm_flags_init(struct vm_area_struct *vma,
915	vm_flags_t flags)
916	{
917	VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY));
918	vma_flags_clear_all(flags: &vma->flags);
919	vma_flags_overwrite_word(flags: &vma->flags, value: flags);
920	}
921
922	/*
923	* Use when VMA is part of the VMA tree and modifications need coordination
924	* Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
925	* it should be locked explicitly beforehand.
926	*/
927	static inline void vm_flags_reset(struct vm_area_struct *vma,
928	vm_flags_t flags)
929	{
930	VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY));
931	vma_assert_write_locked(vma);
932	vm_flags_init(vma, flags);
933	}
934
935	static inline void vm_flags_reset_once(struct vm_area_struct *vma,
936	vm_flags_t flags)
937	{
938	vma_assert_write_locked(vma);
939	/*
940	* If VMA flags exist beyond the first system word, also clear these. It
941	* is assumed the write once behaviour is required only for the first
942	* system word.
943	*/
944	if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
945	unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
946
947	bitmap_zero(dst: &bitmap[`1`], NUM_VMA_FLAG_BITS - BITS_PER_LONG);
948	}
949
950	vma_flags_overwrite_word_once(flags: &vma->flags, value: flags);
951	}
952
953	static inline void vm_flags_set(struct vm_area_struct *vma,
954	vm_flags_t flags)
955	{
956	vma_start_write(vma);
957	vma_flags_set_word(flags: &vma->flags, value: flags);
958	}
959
960	static inline void vm_flags_clear(struct vm_area_struct *vma,
961	vm_flags_t flags)
962	{
963	VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY));
964	vma_start_write(vma);
965	vma_flags_clear_word(flags: &vma->flags, value: flags);
966	}
967
968	/*
969	* Use only if VMA is not part of the VMA tree or has no other users and
970	* therefore needs no locking.
971	*/
972	static inline void __vm_flags_mod(struct vm_area_struct *vma,
973	vm_flags_t set, vm_flags_t clear)
974	{
975	vm_flags_init(vma, flags: (vma->vm_flags \| set) & ~clear);
976	}
977
978	/*
979	* Use only when the order of set/clear operations is unimportant, otherwise
980	* use vm_flags_{set\|clear} explicitly.
981	*/
982	static inline void vm_flags_mod(struct vm_area_struct *vma,
983	vm_flags_t set, vm_flags_t clear)
984	{
985	vma_start_write(vma);
986	__vm_flags_mod(vma, set, clear);
987	}
988
989	static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
990	vma_flag_t bit)
991	{
992	const vm_flags_t mask = BIT((__force int)bit);
993
994	/ Only specific flags are permitted /
995	if (WARN_ON_ONCE(!(mask & VM_ATOMIC_SET_ALLOWED)))
996	return false;
997
998	return true;
999	}
1000
1001	/*
1002	* Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
1003	* valid flags are allowed to do this.
1004	*/
1005	static inline void vma_flag_set_atomic(struct vm_area_struct *vma,
1006	vma_flag_t bit)
1007	{
1008	unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
1009
1010	/ mmap read lock/VMA read lock must be held. /
1011	if (!rwsem_is_locked(sem: &vma->vm_mm->mmap_lock))
1012	vma_assert_locked(vma);
1013
1014	if (__vma_flag_atomic_valid(vma, bit))
1015	set_bit(nr: (__force int)bit, addr: bitmap);
1016	}
1017
1018	/*
1019	* Test for VMA flag atomically. Requires no locks. Only specific valid flags
1020	* are allowed to do this.
1021	*
1022	* This is necessarily racey, so callers must ensure that serialisation is
1023	* achieved through some other means, or that races are permissible.
1024	*/
1025	static inline bool vma_flag_test_atomic(struct vm_area_struct *vma,
1026	vma_flag_t bit)
1027	{
1028	if (__vma_flag_atomic_valid(vma, bit))
1029	return test_bit((__force int)bit, &vma->vm_flags);
1030
1031	return false;
1032	}
1033
1034	static inline void vma_set_anonymous(struct vm_area_struct *vma)
1035	{
1036	vma->vm_ops = NULL;
1037	}
1038
1039	static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1040	{
1041	return !vma->vm_ops;
1042	}
1043
1044	/*
1045	* Indicate if the VMA is a heap for the given task; for
1046	* /proc/PID/maps that is the heap of the main task.
1047	*/
1048	static inline bool vma_is_initial_heap(const struct vm_area_struct *vma)
1049	{
1050	return vma->vm_start < vma->vm_mm->brk &&
1051	vma->vm_end > vma->vm_mm->start_brk;
1052	}
1053
1054	/*
1055	* Indicate if the VMA is a stack for the given task; for
1056	* /proc/PID/maps that is the stack of the main task.
1057	*/
1058	static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
1059	{
1060	/*
1061	* We make no effort to guess what a given thread considers to be
1062	* its "stack". It's not even well-defined for programs written
1063	* languages like Go.
1064	*/
1065	return vma->vm_start <= vma->vm_mm->start_stack &&
1066	vma->vm_end >= vma->vm_mm->start_stack;
1067	}
1068
1069	static inline bool vma_is_temporary_stack(const struct vm_area_struct *vma)
1070	{
1071	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
1072
1073	if (!maybe_stack)
1074	return false;
1075
1076	if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1077	VM_STACK_INCOMPLETE_SETUP)
1078	return true;
1079
1080	return false;
1081	}
1082
1083	static inline bool vma_is_foreign(const struct vm_area_struct *vma)
1084	{
1085	if (!current->mm)
1086	return true;
1087
1088	if (current->mm != vma->vm_mm)
1089	return true;
1090
1091	return false;
1092	}
1093
1094	static inline bool vma_is_accessible(const struct vm_area_struct *vma)
1095	{
1096	return vma->vm_flags & VM_ACCESS_FLAGS;
1097	}
1098
1099	static inline bool is_shared_maywrite(vm_flags_t vm_flags)
1100	{
1101	return (vm_flags & (VM_SHARED \| VM_MAYWRITE)) ==
1102	(VM_SHARED \| VM_MAYWRITE);
1103	}
1104
1105	static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
1106	{
1107	return is_shared_maywrite(vm_flags: vma->vm_flags);
1108	}
1109
1110	static inline
1111	struct vm_area_struct vma_find(struct* vma_iterator vmi, unsigned* long max)
1112	{
1113	return mas_find(mas: &vmi->mas, max: max - `1`);
1114	}
1115
1116	static inline struct vm_area_struct vma_next(struct* vma_iterator *vmi)
1117	{
1118	/*
1119	* Uses mas_find() to get the first VMA when the iterator starts.
1120	* Calling mas_next() could skip the first entry.
1121	*/
1122	return mas_find(mas: &vmi->mas, ULONG_MAX);
1123	}
1124
1125	static inline
1126	struct vm_area_struct vma_iter_next_range(struct* vma_iterator *vmi)
1127	{
1128	return mas_next_range(mas: &vmi->mas, ULONG_MAX);
1129	}
1130
1131
1132	static inline struct vm_area_struct vma_prev(struct* vma_iterator *vmi)
1133	{
1134	return mas_prev(mas: &vmi->mas, min: `0`);
1135	}
1136
1137	static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
1138	unsigned long start, unsigned long end, gfp_t gfp)
1139	{
1140	__mas_set_range(mas: &vmi->mas, start, last: end - `1`);
1141	mas_store_gfp(mas: &vmi->mas, NULL, gfp);
1142	if (unlikely(mas_is_err(&vmi->mas)))
1143	return -ENOMEM;
1144
1145	return `0`;
1146	}
1147
1148	/ Free any unused preallocations /
1149	static inline void vma_iter_free(struct vma_iterator *vmi)
1150	{
1151	mas_destroy(mas: &vmi->mas);
1152	}
1153
1154	static inline int vma_iter_bulk_store(struct vma_iterator *vmi,
1155	struct vm_area_struct *vma)
1156	{
1157	vmi->mas.index = vma->vm_start;
1158	vmi->mas.last = vma->vm_end - `1`;
1159	mas_store(mas: &vmi->mas, entry: vma);
1160	if (unlikely(mas_is_err(&vmi->mas)))
1161	return -ENOMEM;
1162
1163	vma_mark_attached(vma);
1164	return `0`;
1165	}
1166
1167	static inline void vma_iter_invalidate(struct vma_iterator *vmi)
1168	{
1169	mas_pause(mas: &vmi->mas);
1170	}
1171
1172	static inline void vma_iter_set(struct vma_iterator vmi, unsigned* long addr)
1173	{
1174	mas_set(mas: &vmi->mas, index: addr);
1175	}
1176
1177	#define for_each_vma(__vmi, __vma) \
1178	while (((__vma) = vma_next(&(__vmi))) != NULL)
1179
1180	/ The MM code likes to work with exclusive end addresses /
1181	#define for_each_vma_range(__vmi, __vma, __end) \
1182	while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
1183
1184	#ifdef CONFIG_SHMEM
1185	/*
1186	* The vma_is_shmem is not inline because it is used only by slow
1187	* paths in userfault.
1188	*/
1189	bool vma_is_shmem(const struct vm_area_struct *vma);
1190	bool vma_is_anon_shmem(const struct vm_area_struct *vma);
1191	#else
1192	static inline bool vma_is_shmem(const struct vm_area_struct vma) { return* false; }
1193	static inline bool vma_is_anon_shmem(const struct vm_area_struct vma) { return* false; }
1194	#endif
1195
1196	int vma_is_stack_for_current(const struct vm_area_struct *vma);
1197
1198	/ flush_tlb_range() takes a vma, not a mm, and can care about flags /
1199	#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
1200
1201	struct mmu_gather;
1202	struct inode;
1203
1204	extern void prep_compound_page(struct page page, unsigned* int order);
1205
1206	static inline unsigned int folio_large_order(const struct folio *folio)
1207	{
1208	return folio->_flags_1 & `0xff`;
1209	}
1210
1211	#ifdef NR_PAGES_IN_LARGE_FOLIO
1212	static inline unsigned long folio_large_nr_pages(const struct folio *folio)
1213	{
1214	return folio->_nr_pages;
1215	}
1216	#else
1217	static inline unsigned long folio_large_nr_pages(const struct folio *folio)
1218	{
1219	return `1L` << folio_large_order(folio);
1220	}
1221	#endif
1222
1223	/*
1224	* compound_order() can be called without holding a reference, which means
1225	* that niceties like page_folio() don't work. These callers should be
1226	* prepared to handle wild return values. For example, PG_head may be
1227	* set before the order is initialised, or this may be a tail page.
1228	* See compaction.c for some good examples.
1229	*/
1230	static inline unsigned int compound_order(const struct page *page)
1231	{
1232	const struct folio folio = (struct* folio *)page;
1233
1234	if (!test_bit(PG_head, &folio->flags.f))
1235	return `0`;
1236	return folio_large_order(folio);
1237	}
1238
1239	/**
1240	* folio_order - The allocation order of a folio.
1241	* @folio: The folio.
1242	*
1243	* A folio is composed of 2^order pages. See get_order() for the definition
1244	* of order.
1245	*
1246	* Return: The order of the folio.
1247	*/
1248	static inline unsigned int folio_order(const struct folio *folio)
1249	{
1250	if (!folio_test_large(folio))
1251	return `0`;
1252	return folio_large_order(folio);
1253	}
1254
1255	/**
1256	* folio_reset_order - Reset the folio order and derived _nr_pages
1257	* @folio: The folio.
1258	*
1259	* Reset the order and derived _nr_pages to 0. Must only be used in the
1260	* process of splitting large folios.
1261	*/
1262	static inline void folio_reset_order(struct folio *folio)
1263	{
1264	if (WARN_ON_ONCE(!folio_test_large(folio)))
1265	return;
1266	folio->_flags_1 &= ~`0xffUL`;
1267	#ifdef NR_PAGES_IN_LARGE_FOLIO
1268	folio->_nr_pages = `0`;
1269	#endif
1270	}
1271
1272	#include <linux/huge_mm.h>
1273
1274	/*
1275	* Methods to modify the page usage count.
1276	*
1277	* What counts for a page usage:
1278	* - cache mapping (page->mapping)
1279	* - private data (page->private)
1280	* - page mapped in a task's page tables, each mapping
1281	* is counted separately
1282	*
1283	* Also, many kernel routines increase the page count before a critical
1284	* routine so they can be sure the page doesn't go away from under them.
1285	*/
1286
1287	/*
1288	* Drop a ref, return true if the refcount fell to zero (the page has no users)
1289	*/
1290	static inline int put_page_testzero(struct page *page)
1291	{
1292	VM_BUG_ON_PAGE(page_ref_count(page) == `0`, page);
1293	return page_ref_dec_and_test(page);
1294	}
1295
1296	static inline int folio_put_testzero(struct folio *folio)
1297	{
1298	return put_page_testzero(page: &folio->page);
1299	}
1300
1301	/*
1302	* Try to grab a ref unless the page has a refcount of zero, return false if
1303	* that is the case.
1304	* This can be called when MMU is off so it must not access
1305	* any of the virtual mappings.
1306	*/
1307	static inline bool get_page_unless_zero(struct page *page)
1308	{
1309	return page_ref_add_unless(page, nr: `1`, u: `0`);
1310	}
1311
1312	static inline struct folio folio_get_nontail_page(struct* page *page)
1313	{
1314	if (unlikely(!get_page_unless_zero(page)))
1315	return NULL;
1316	return (struct folio *)page;
1317	}
1318
1319	extern int page_is_ram(unsigned long pfn);
1320
1321	enum {
1322	REGION_INTERSECTS,
1323	REGION_DISJOINT,
1324	REGION_MIXED,
1325	};
1326
1327	int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
1328	unsigned long desc);
1329
1330	/ Support for virtually mapped pages /
1331	struct page vmalloc_to_page(const* void *addr);
1332	unsigned long vmalloc_to_pfn(const void *addr);
1333
1334	/*
1335	* Determine if an address is within the vmalloc range
1336	*
1337	* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
1338	* is no special casing required.
1339	*/
1340	#ifdef CONFIG_MMU
1341	extern bool is_vmalloc_addr(const void *x);
1342	extern int is_vmalloc_or_module_addr(const void *x);
1343	#else
1344	static inline bool is_vmalloc_addr(const void *x)
1345	{
1346	return false;
1347	}
1348	static inline int is_vmalloc_or_module_addr(const void *x)
1349	{
1350	return `0`;
1351	}
1352	#endif
1353
1354	/*
1355	* How many times the entire folio is mapped as a single unit (eg by a
1356	* PMD or PUD entry). This is probably not what you want, except for
1357	* debugging purposes or implementation of other core folio_*() primitives.
1358	*/
1359	static inline int folio_entire_mapcount(const struct folio *folio)
1360	{
1361	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
1362	if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == `1`))
1363	return `0`;
1364	return atomic_read(v: &folio->_entire_mapcount) + `1`;
1365	}
1366
1367	static inline int folio_large_mapcount(const struct folio *folio)
1368	{
1369	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
1370	return atomic_read(v: &folio->_large_mapcount) + `1`;
1371	}
1372
1373	/**
1374	* folio_mapcount() - Number of mappings of this folio.
1375	* @folio: The folio.
1376	*
1377	* The folio mapcount corresponds to the number of present user page table
1378	* entries that reference any part of a folio. Each such present user page
1379	* table entry must be paired with exactly on folio reference.
1380	*
1381	* For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts
1382	* exactly once.
1383	*
1384	* For hugetlb folios, each abstracted "hugetlb" user page table entry that
1385	* references the entire folio counts exactly once, even when such special
1386	* page table entries are comprised of multiple ordinary page table entries.
1387	*
1388	* Will report 0 for pages which cannot be mapped into userspace, such as
1389	* slab, page tables and similar.
1390	*
1391	* Return: The number of times this folio is mapped.
1392	*/
1393	static inline int folio_mapcount(const struct folio *folio)
1394	{
1395	int mapcount;
1396
1397	if (likely(!folio_test_large(folio))) {
1398	mapcount = atomic_read(v: &folio->_mapcount) + `1`;
1399	if (page_mapcount_is_type(mapcount))
1400	mapcount = `0`;
1401	return mapcount;
1402	}
1403	return folio_large_mapcount(folio);
1404	}
1405
1406	/**
1407	* folio_mapped - Is this folio mapped into userspace?
1408	* @folio: The folio.
1409	*
1410	* Return: True if any page in this folio is referenced by user page tables.
1411	*/
1412	static inline bool folio_mapped(const struct folio *folio)
1413	{
1414	return folio_mapcount(folio) >= `1`;
1415	}
1416
1417	/*
1418	* Return true if this page is mapped into pagetables.
1419	* For compound page it returns true if any sub-page of compound page is mapped,
1420	* even if this particular sub-page is not itself mapped by any PTE or PMD.
1421	*/
1422	static inline bool page_mapped(const struct page *page)
1423	{
1424	return folio_mapped(page_folio(page));
1425	}
1426
1427	static inline struct page virt_to_head_page(const* void *x)
1428	{
1429	struct page *page = virt_to_page(x);
1430
1431	return compound_head(page);
1432	}
1433
1434	static inline struct folio virt_to_folio(const* void *x)
1435	{
1436	struct page *page = virt_to_page(x);
1437
1438	return page_folio(page);
1439	}
1440
1441	void __folio_put(struct folio *folio);
1442
1443	void split_page(struct page page, unsigned* int order);
1444	void folio_copy(struct folio dst, struct* folio *src);
1445	int folio_mc_copy(struct folio dst, struct* folio *src);
1446
1447	unsigned long nr_free_buffer_pages(void);
1448
1449	/ Returns the number of bytes in this potentially compound page. /
1450	static inline unsigned long page_size(const struct page *page)
1451	{
1452	return PAGE_SIZE << compound_order(page);
1453	}
1454
1455	/ Returns the number of bits needed for the number of bytes in a page /
1456	static inline unsigned int page_shift(struct page *page)
1457	{
1458	return PAGE_SHIFT + compound_order(page);
1459	}
1460
1461	/**
1462	* thp_order - Order of a transparent huge page.
1463	* @page: Head page of a transparent huge page.
1464	*/
1465	static inline unsigned int thp_order(struct page *page)
1466	{
1467	VM_BUG_ON_PGFLAGS(PageTail(page), page);
1468	return compound_order(page);
1469	}
1470
1471	/**
1472	* thp_size - Size of a transparent huge page.
1473	* @page: Head page of a transparent huge page.
1474	*
1475	* Return: Number of bytes in this page.
1476	*/
1477	static inline unsigned long thp_size(struct page *page)
1478	{
1479	return PAGE_SIZE << thp_order(page);
1480	}
1481
1482	#ifdef CONFIG_MMU
1483	/*
1484	* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
1485	* servicing faults for write access. In the normal case, do always want
1486	* pte_mkwrite. But get_user_pages can cause write faults for mappings
1487	* that do not have writing enabled, when used by access_process_vm.
1488	*/
1489	static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
1490	{
1491	if (likely(vma->vm_flags & VM_WRITE))
1492	pte = pte_mkwrite(pte, vma);
1493	return pte;
1494	}
1495
1496	vm_fault_t do_set_pmd(struct vm_fault vmf, struct* folio folio, struct* page *page);
1497	void set_pte_range(struct vm_fault vmf, struct* folio *folio,
1498	struct page page, unsigned* int nr, unsigned long addr);
1499
1500	vm_fault_t finish_fault(struct vm_fault *vmf);
1501	#endif
1502
1503	/*
1504	* Multiple processes may "see" the same page. E.g. for untouched
1505	* mappings of /dev/null, all processes see the same page full of
1506	* zeroes, and text pages of executables and shared libraries have
1507	* only one copy in memory, at most, normally.
1508	*
1509	* For the non-reserved pages, page_count(page) denotes a reference count.
1510	* page_count() == 0 means the page is free. page->lru is then used for
1511	* freelist management in the buddy allocator.
1512	* page_count() > 0 means the page has been allocated.
1513	*
1514	* Pages are allocated by the slab allocator in order to provide memory
1515	* to kmalloc and kmem_cache_alloc. In this case, the management of the
1516	* page, and the fields in 'struct page' are the responsibility of mm/slab.c
1517	* unless a particular usage is carefully commented. (the responsibility of
1518	* freeing the kmalloc memory is the caller's, of course).
1519	*
1520	* A page may be used by anyone else who does a __get_free_page().
1521	* In this case, page_count still tracks the references, and should only
1522	* be used through the normal accessor functions. The top bits of page->flags
1523	* and page->virtual store page management information, but all other fields
1524	* are unused and could be used privately, carefully. The management of this
1525	* page is the responsibility of the one who allocated it, and those who have
1526	* subsequently been given references to it.
1527	*
1528	* The other pages (we may call them "pagecache pages") are completely
1529	* managed by the Linux memory manager: I/O, buffers, swapping etc.
1530	* The following discussion applies only to them.
1531	*
1532	* A pagecache page contains an opaque `private' member, which belongs to the
1533	* page's address_space. Usually, this is the address of a circular list of
1534	* the page's disk buffers. PG_private must be set to tell the VM to call
1535	* into the filesystem to release these pages.
1536	*
1537	* A folio may belong to an inode's memory mapping. In this case,
1538	* folio->mapping points to the inode, and folio->index is the file
1539	* offset of the folio, in units of PAGE_SIZE.
1540	*
1541	* If pagecache pages are not associated with an inode, they are said to be
1542	* anonymous pages. These may become associated with the swapcache, and in that
1543	* case PG_swapcache is set, and page->private is an offset into the swapcache.
1544	*
1545	* In either case (swapcache or inode backed), the pagecache itself holds one
1546	* reference to the page. Setting PG_private should also increment the
1547	* refcount. The each user mapping also has a reference to the page.
1548	*
1549	* The pagecache pages are stored in a per-mapping radix tree, which is
1550	* rooted at mapping->i_pages, and indexed by offset.
1551	* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
1552	* lists, we instead now tag pages as dirty/writeback in the radix tree.
1553	*
1554	* All pagecache pages may be subject to I/O:
1555	* - inode pages may need to be read from disk,
1556	* - inode pages which have been modified and are MAP_SHARED may need
1557	* to be written back to the inode on disk,
1558	* - anonymous pages (including MAP_PRIVATE file mappings) which have been
1559	* modified may need to be swapped out to swap space and (later) to be read
1560	* back into memory.
1561	*/
1562
1563	/ 127: arbitrary random number, small enough to assemble well /
1564	#define folio_ref_zero_or_close_to_overflow(folio) \
1565	((unsigned int) folio_ref_count(folio) + 127u <= 127u)
1566
1567	/**
1568	* folio_get - Increment the reference count on a folio.
1569	* @folio: The folio.
1570	*
1571	* Context: May be called in any context, as long as you know that
1572	* you have a refcount on the folio. If you do not already have one,
1573	* folio_try_get() may be the right interface for you to use.
1574	*/
1575	static inline void folio_get(struct folio *folio)
1576	{
1577	VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
1578	folio_ref_inc(folio);
1579	}
1580
1581	static inline void get_page(struct page *page)
1582	{
1583	struct folio *folio = page_folio(page);
1584	if (WARN_ON_ONCE(folio_test_slab(folio)))
1585	return;
1586	if (WARN_ON_ONCE(folio_test_large_kmalloc(folio)))
1587	return;
1588	folio_get(folio);
1589	}
1590
1591	static inline __must_check bool try_get_page(struct page *page)
1592	{
1593	page = compound_head(page);
1594	if (WARN_ON_ONCE(page_ref_count(page) <= `0`))
1595	return false;
1596	page_ref_inc(page);
1597	return true;
1598	}
1599
1600	/**
1601	* folio_put - Decrement the reference count on a folio.
1602	* @folio: The folio.
1603	*
1604	* If the folio's reference count reaches zero, the memory will be
1605	* released back to the page allocator and may be used by another
1606	* allocation immediately. Do not access the memory or the struct folio
1607	* after calling folio_put() unless you can be sure that it wasn't the
1608	* last reference.
1609	*
1610	* Context: May be called in process or interrupt context, but not in NMI
1611	* context. May be called while holding a spinlock.
1612	*/
1613	static inline void folio_put(struct folio *folio)
1614	{
1615	if (folio_put_testzero(folio))
1616	__folio_put(folio);
1617	}
1618
1619	/**
1620	* folio_put_refs - Reduce the reference count on a folio.
1621	* @folio: The folio.
1622	* @refs: The amount to subtract from the folio's reference count.
1623	*
1624	* If the folio's reference count reaches zero, the memory will be
1625	* released back to the page allocator and may be used by another
1626	* allocation immediately. Do not access the memory or the struct folio
1627	* after calling folio_put_refs() unless you can be sure that these weren't
1628	* the last references.
1629	*
1630	* Context: May be called in process or interrupt context, but not in NMI
1631	* context. May be called while holding a spinlock.
1632	*/
1633	static inline void folio_put_refs(struct folio folio, int* refs)
1634	{
1635	if (folio_ref_sub_and_test(folio, nr: refs))
1636	__folio_put(folio);
1637	}
1638
1639	void folios_put_refs(struct folio_batch folios, unsigned* int *refs);
1640
1641	/*
1642	* union release_pages_arg - an array of pages or folios
1643	*
1644	* release_pages() releases a simple array of multiple pages, and
1645	* accepts various different forms of said page array: either
1646	* a regular old boring array of pages, an array of folios, or
1647	* an array of encoded page pointers.
1648	*
1649	* The transparent union syntax for this kind of "any of these
1650	* argument types" is all kinds of ugly, so look away.
1651	*/
1652	typedef union {
1653	struct page **pages;
1654	struct folio **folios;
1655	struct encoded_page **encoded_pages;
1656	} release_pages_arg __attribute__ ((__transparent_union__));
1657
1658	void release_pages(release_pages_arg, int nr);
1659
1660	/**
1661	* folios_put - Decrement the reference count on an array of folios.
1662	* @folios: The folios.
1663	*
1664	* Like folio_put(), but for a batch of folios. This is more efficient
1665	* than writing the loop yourself as it will optimise the locks which need
1666	* to be taken if the folios are freed. The folios batch is returned
1667	* empty and ready to be reused for another batch; there is no need to
1668	* reinitialise it.
1669	*
1670	* Context: May be called in process or interrupt context, but not in NMI
1671	* context. May be called while holding a spinlock.
1672	*/
1673	static inline void folios_put(struct folio_batch *folios)
1674	{
1675	folios_put_refs(folios, NULL);
1676	}
1677
1678	static inline void put_page(struct page *page)
1679	{
1680	struct folio *folio = page_folio(page);
1681
1682	if (folio_test_slab(folio) \|\| folio_test_large_kmalloc(folio))
1683	return;
1684
1685	folio_put(folio);
1686	}
1687
1688	/*
1689	* GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload
1690	* the page's refcount so that two separate items are tracked: the original page
1691	* reference count, and also a new count of how many pin_user_pages() calls were
1692	* made against the page. ("gup-pinned" is another term for the latter).
1693	*
1694	* With this scheme, pin_user_pages() becomes special: such pages are marked as
1695	* distinct from normal pages. As such, the unpin_user_page() call (and its
1696	* variants) must be used in order to release gup-pinned pages.
1697	*
1698	* Choice of value:
1699	*
1700	* By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference
1701	* counts with respect to pin_user_pages() and unpin_user_page() becomes
1702	* simpler, due to the fact that adding an even power of two to the page
1703	* refcount has the effect of using only the upper N bits, for the code that
1704	* counts up using the bias value. This means that the lower bits are left for
1705	* the exclusive use of the original code that increments and decrements by one
1706	* (or at least, by much smaller values than the bias value).
1707	*
1708	* Of course, once the lower bits overflow into the upper bits (and this is
1709	* OK, because subtraction recovers the original values), then visual inspection
1710	* no longer suffices to directly view the separate counts. However, for normal
1711	* applications that don't have huge page reference counts, this won't be an
1712	* issue.
1713	*
1714	* Locking: the lockless algorithm described in folio_try_get_rcu()
1715	* provides safe operation for get_user_pages(), folio_mkclean() and
1716	* other calls that race to set up page table entries.
1717	*/
1718	#define GUP_PIN_COUNTING_BIAS (1U << 10)
1719
1720	void unpin_user_page(struct page *page);
1721	void unpin_folio(struct folio *folio);
1722	void unpin_user_pages_dirty_lock(struct page *pages, unsigned* long npages,
1723	bool make_dirty);
1724	void unpin_user_page_range_dirty_lock(struct page page, unsigned* long npages,
1725	bool make_dirty);
1726	void unpin_user_pages(struct page *pages, unsigned* long npages);
1727	void unpin_user_folio(struct folio folio, unsigned* long npages);
1728	void unpin_folios(struct folio *folios, unsigned* long nfolios);
1729
1730	static inline bool is_cow_mapping(vm_flags_t flags)
1731	{
1732	return (flags & (VM_SHARED \| VM_MAYWRITE)) == VM_MAYWRITE;
1733	}
1734
1735	#ifndef CONFIG_MMU
1736	static inline bool is_nommu_shared_mapping(vm_flags_t flags)
1737	{
1738	/*
1739	* NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
1740	* R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
1741	* a file mapping. R/O MAP_PRIVATE mappings might still modify
1742	* underlying memory if ptrace is active, so this is only possible if
1743	* ptrace does not apply. Note that there is no mprotect() to upgrade
1744	* write permissions later.
1745	*/
1746	return flags & (VM_MAYSHARE \| VM_MAYOVERLAY);
1747	}
1748	#endif
1749
1750	#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
1751	#define SECTION_IN_PAGE_FLAGS
1752	#endif
1753
1754	/*
1755	* The identification function is mainly used by the buddy allocator for
1756	* determining if two pages could be buddies. We are not really identifying
1757	* the zone since we could be using the section number id if we do not have
1758	* node id available in page flags.
1759	* We only guarantee that it will return the same value for two combinable
1760	* pages in a zone.
1761	*/
1762	static inline int page_zone_id(struct page *page)
1763	{
1764	return (page->flags.f >> ZONEID_PGSHIFT) & ZONEID_MASK;
1765	}
1766
1767	#ifdef NODE_NOT_IN_PAGE_FLAGS
1768	int memdesc_nid(memdesc_flags_t mdf);
1769	#else
1770	static inline int memdesc_nid(memdesc_flags_t mdf)
1771	{
1772	return (mdf.f >> NODES_PGSHIFT) & NODES_MASK;
1773	}
1774	#endif
1775
1776	static inline int page_to_nid(const struct page *page)
1777	{
1778	return memdesc_nid(PF_POISONED_CHECK(page)->flags);
1779	}
1780
1781	static inline int folio_nid(const struct folio *folio)
1782	{
1783	return memdesc_nid(mdf: folio->flags);
1784	}
1785
1786	#ifdef CONFIG_NUMA_BALANCING
1787	/ page access time bits needs to hold at least 4 seconds /
1788	#define PAGE_ACCESS_TIME_MIN_BITS 12
1789	#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS
1790	#define PAGE_ACCESS_TIME_BUCKETS \
1791	(PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT)
1792	#else
1793	#define PAGE_ACCESS_TIME_BUCKETS 0
1794	#endif
1795
1796	#define PAGE_ACCESS_TIME_MASK \
1797	(LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS)
1798
1799	static inline int cpu_pid_to_cpupid(int cpu, int pid)
1800	{
1801	return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) \| (pid & LAST__PID_MASK);
1802	}
1803
1804	static inline int cpupid_to_pid(int cpupid)
1805	{
1806	return cpupid & LAST__PID_MASK;
1807	}
1808
1809	static inline int cpupid_to_cpu(int cpupid)
1810	{
1811	return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
1812	}
1813
1814	static inline int cpupid_to_nid(int cpupid)
1815	{
1816	return cpu_to_node(cpu: cpupid_to_cpu(cpupid));
1817	}
1818
1819	static inline bool cpupid_pid_unset(int cpupid)
1820	{
1821	return cpupid_to_pid(cpupid) == (-`1` & LAST__PID_MASK);
1822	}
1823
1824	static inline bool cpupid_cpu_unset(int cpupid)
1825	{
1826	return cpupid_to_cpu(cpupid) == (-`1` & LAST__CPU_MASK);
1827	}
1828
1829	static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
1830	{
1831	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
1832	}
1833
1834	#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
1835	#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
1836	static inline int folio_xchg_last_cpupid(struct folio folio, int* cpupid)
1837	{
1838	return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK);
1839	}
1840
1841	static inline int folio_last_cpupid(struct folio *folio)
1842	{
1843	return folio->_last_cpupid;
1844	}
1845	static inline void page_cpupid_reset_last(struct page *page)
1846	{
1847	page->_last_cpupid = -`1` & LAST_CPUPID_MASK;
1848	}
1849	#else
1850	static inline int folio_last_cpupid(struct folio *folio)
1851	{
1852	return (folio->flags.f >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
1853	}
1854
1855	int folio_xchg_last_cpupid(struct folio folio, int* cpupid);
1856
1857	static inline void page_cpupid_reset_last(struct page *page)
1858	{
1859	page->flags.f \|= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
1860	}
1861	#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
1862
1863	static inline int folio_xchg_access_time(struct folio folio, int* time)
1864	{
1865	int last_time;
1866
1867	last_time = folio_xchg_last_cpupid(folio,
1868	time >> PAGE_ACCESS_TIME_BUCKETS);
1869	return last_time << PAGE_ACCESS_TIME_BUCKETS;
1870	}
1871
1872	static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
1873	{
1874	unsigned int pid_bit;
1875
1876	pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG));
1877	if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[`1`])) {
1878	__set_bit(pid_bit, &vma->numab_state->pids_active[`1`]);
1879	}
1880	}
1881
1882	bool folio_use_access_time(struct folio *folio);
1883	#else /* !CONFIG_NUMA_BALANCING */
1884	static inline int folio_xchg_last_cpupid(struct folio folio, int* cpupid)
1885	{
1886	return folio_nid(folio); / XXX /
1887	}
1888
1889	static inline int folio_xchg_access_time(struct folio folio, int* time)
1890	{
1891	return `0`;
1892	}
1893
1894	static inline int folio_last_cpupid(struct folio *folio)
1895	{
1896	return folio_nid(folio); / XXX /
1897	}
1898
1899	static inline int cpupid_to_nid(int cpupid)
1900	{
1901	return -`1`;
1902	}
1903
1904	static inline int cpupid_to_pid(int cpupid)
1905	{
1906	return -`1`;
1907	}
1908
1909	static inline int cpupid_to_cpu(int cpupid)
1910	{
1911	return -`1`;
1912	}
1913
1914	static inline int cpu_pid_to_cpupid(int nid, int pid)
1915	{
1916	return -`1`;
1917	}
1918
1919	static inline bool cpupid_pid_unset(int cpupid)
1920	{
1921	return true;
1922	}
1923
1924	static inline void page_cpupid_reset_last(struct page *page)
1925	{
1926	}
1927
1928	static inline bool cpupid_match_pid(struct task_struct task, int* cpupid)
1929	{
1930	return false;
1931	}
1932
1933	static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
1934	{
1935	}
1936	static inline bool folio_use_access_time(struct folio *folio)
1937	{
1938	return false;
1939	}
1940	#endif /* CONFIG_NUMA_BALANCING */
1941
1942	#if defined(CONFIG_KASAN_SW_TAGS) \|\| defined(CONFIG_KASAN_HW_TAGS)
1943
1944	/*
1945	* KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid
1946	* setting tags for all pages to native kernel tag value 0xff, as the default
1947	* value 0x00 maps to 0xff.
1948	*/
1949
1950	static inline u8 page_kasan_tag(const struct page *page)
1951	{
1952	u8 tag = KASAN_TAG_KERNEL;
1953
1954	if (kasan_enabled()) {
1955	tag = (page->flags.f >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
1956	tag ^= `0xff`;
1957	}
1958
1959	return tag;
1960	}
1961
1962	static inline void page_kasan_tag_set(struct page *page, u8 tag)
1963	{
1964	unsigned long old_flags, flags;
1965
1966	if (!kasan_enabled())
1967	return;
1968
1969	tag ^= `0xff`;
1970	old_flags = READ_ONCE(page->flags.f);
1971	do {
1972	flags = old_flags;
1973	flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
1974	flags \|= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
1975	} while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags)));
1976	}
1977
1978	static inline void page_kasan_tag_reset(struct page *page)
1979	{
1980	if (kasan_enabled())
1981	page_kasan_tag_set(page, KASAN_TAG_KERNEL);
1982	}
1983
1984	#else /* CONFIG_KASAN_SW_TAGS \|\| CONFIG_KASAN_HW_TAGS */
1985
1986	static inline u8 page_kasan_tag(const struct page *page)
1987	{
1988	return `0xff`;
1989	}
1990
1991	static inline void page_kasan_tag_set(struct page *page, u8 tag) { }
1992	static inline void page_kasan_tag_reset(struct page *page) { }
1993
1994	#endif /* CONFIG_KASAN_SW_TAGS \|\| CONFIG_KASAN_HW_TAGS */
1995
1996	static inline struct zone page_zone(const* struct page *page)
1997	{
1998	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
1999	}
2000
2001	static inline pg_data_t page_pgdat(const* struct page *page)
2002	{
2003	return NODE_DATA(page_to_nid(page));
2004	}
2005
2006	static inline pg_data_t folio_pgdat(const* struct folio *folio)
2007	{
2008	return NODE_DATA(folio_nid(folio));
2009	}
2010
2011	static inline struct zone folio_zone(const* struct folio *folio)
2012	{
2013	return &folio_pgdat(folio)->node_zones[folio_zonenum(folio)];
2014	}
2015
2016	#ifdef SECTION_IN_PAGE_FLAGS
2017	static inline void set_page_section(struct page page, unsigned* long section)
2018	{
2019	page->flags.f &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
2020	page->flags.f \|= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
2021	}
2022
2023	static inline unsigned long memdesc_section(memdesc_flags_t mdf)
2024	{
2025	return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
2026	}
2027	#else /* !SECTION_IN_PAGE_FLAGS */
2028	static inline unsigned long memdesc_section(memdesc_flags_t mdf)
2029	{
2030	return `0`;
2031	}
2032	#endif /* SECTION_IN_PAGE_FLAGS */
2033
2034	/**
2035	* folio_pfn - Return the Page Frame Number of a folio.
2036	* @folio: The folio.
2037	*
2038	* A folio may contain multiple pages. The pages have consecutive
2039	* Page Frame Numbers.
2040	*
2041	* Return: The Page Frame Number of the first page in the folio.
2042	*/
2043	static inline unsigned long folio_pfn(const struct folio *folio)
2044	{
2045	return page_to_pfn(&folio->page);
2046	}
2047
2048	static inline struct folio pfn_folio(unsigned* long pfn)
2049	{
2050	return page_folio(pfn_to_page(pfn));
2051	}
2052
2053	#ifdef CONFIG_MMU
2054	static inline pte_t mk_pte(const struct page *page, pgprot_t pgprot)
2055	{
2056	return pfn_pte(page_to_pfn(page), pgprot);
2057	}
2058
2059	/**
2060	* folio_mk_pte - Create a PTE for this folio
2061	* @folio: The folio to create a PTE for
2062	* @pgprot: The page protection bits to use
2063	*
2064	* Create a page table entry for the first page of this folio.
2065	* This is suitable for passing to set_ptes().
2066	*
2067	* Return: A page table entry suitable for mapping this folio.
2068	*/
2069	static inline pte_t folio_mk_pte(const struct folio *folio, pgprot_t pgprot)
2070	{
2071	return pfn_pte(page_nr: folio_pfn(folio), pgprot);
2072	}
2073
2074	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2075	/**
2076	* folio_mk_pmd - Create a PMD for this folio
2077	* @folio: The folio to create a PMD for
2078	* @pgprot: The page protection bits to use
2079	*
2080	* Create a page table entry for the first page of this folio.
2081	* This is suitable for passing to set_pmd_at().
2082	*
2083	* Return: A page table entry suitable for mapping this folio.
2084	*/
2085	static inline pmd_t folio_mk_pmd(const struct folio *folio, pgprot_t pgprot)
2086	{
2087	return pmd_mkhuge(pmd: pfn_pmd(page_nr: folio_pfn(folio), pgprot));
2088	}
2089
2090	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
2091	/**
2092	* folio_mk_pud - Create a PUD for this folio
2093	* @folio: The folio to create a PUD for
2094	* @pgprot: The page protection bits to use
2095	*
2096	* Create a page table entry for the first page of this folio.
2097	* This is suitable for passing to set_pud_at().
2098	*
2099	* Return: A page table entry suitable for mapping this folio.
2100	*/
2101	static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot)
2102	{
2103	return pud_mkhuge(pud: pfn_pud(page_nr: folio_pfn(folio), pgprot));
2104	}
2105	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
2106	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2107	#endif /* CONFIG_MMU */
2108
2109	static inline bool folio_has_pincount(const struct folio *folio)
2110	{
2111	if (IS_ENABLED(CONFIG_64BIT))
2112	return folio_test_large(folio);
2113	return folio_order(folio) > `1`;
2114	}
2115
2116	/**
2117	* folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
2118	* @folio: The folio.
2119	*
2120	* This function checks if a folio has been pinned via a call to
2121	* a function in the pin_user_pages() family.
2122	*
2123	* For small folios, the return value is partially fuzzy: false is not fuzzy,
2124	* because it means "definitely not pinned for DMA", but true means "probably
2125	* pinned for DMA, but possibly a false positive due to having at least
2126	* GUP_PIN_COUNTING_BIAS worth of normal folio references".
2127	*
2128	* False positives are OK, because: a) it's unlikely for a folio to
2129	* get that many refcounts, and b) all the callers of this routine are
2130	* expected to be able to deal gracefully with a false positive.
2131	*
2132	* For most large folios, the result will be exactly correct. That's because
2133	* we have more tracking data available: the _pincount field is used
2134	* instead of the GUP_PIN_COUNTING_BIAS scheme.
2135	*
2136	* For more information, please see Documentation/core-api/pin_user_pages.rst.
2137	*
2138	* Return: True, if it is likely that the folio has been "dma-pinned".
2139	* False, if the folio is definitely not dma-pinned.
2140	*/
2141	static inline bool folio_maybe_dma_pinned(struct folio *folio)
2142	{
2143	if (folio_has_pincount(folio))
2144	return atomic_read(v: &folio->_pincount) > `0`;
2145
2146	/*
2147	* folio_ref_count() is signed. If that refcount overflows, then
2148	* folio_ref_count() returns a negative value, and callers will avoid
2149	* further incrementing the refcount.
2150	*
2151	* Here, for that overflow case, use the sign bit to count a little
2152	* bit higher via unsigned math, and thus still get an accurate result.
2153	*/
2154	return ((unsigned int)folio_ref_count(folio)) >=
2155	GUP_PIN_COUNTING_BIAS;
2156	}
2157
2158	/*
2159	* This should most likely only be called during fork() to see whether we
2160	* should break the cow immediately for an anon page on the src mm.
2161	*
2162	* The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq.
2163	*/
2164	static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma,
2165	struct folio *folio)
2166	{
2167	VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & `1`));
2168
2169	if (!mm_flags_test(MMF_HAS_PINNED, mm: vma->vm_mm))
2170	return false;
2171
2172	return folio_maybe_dma_pinned(folio);
2173	}
2174
2175	/**
2176	* is_zero_page - Query if a page is a zero page
2177	* @page: The page to query
2178	*
2179	* This returns true if @page is one of the permanent zero pages.
2180	*/
2181	static inline bool is_zero_page(const struct page *page)
2182	{
2183	return is_zero_pfn(page_to_pfn(page));
2184	}
2185
2186	/**
2187	* is_zero_folio - Query if a folio is a zero page
2188	* @folio: The folio to query
2189	*
2190	* This returns true if @folio is one of the permanent zero pages.
2191	*/
2192	static inline bool is_zero_folio(const struct folio *folio)
2193	{
2194	return is_zero_page(page: &folio->page);
2195	}
2196
2197	/ MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios /
2198	#ifdef CONFIG_MIGRATION
2199	static inline bool folio_is_longterm_pinnable(struct folio *folio)
2200	{
2201	#ifdef CONFIG_CMA
2202	int mt = folio_migratetype(folio);
2203
2204	if (mt == MIGRATE_CMA \|\| mt == MIGRATE_ISOLATE)
2205	return false;
2206	#endif
2207	/ The zero page can be "pinned" but gets special handling. /
2208	if (is_zero_folio(folio))
2209	return true;
2210
2211	/ Coherent device memory must always allow eviction. /
2212	if (folio_is_device_coherent(folio))
2213	return false;
2214
2215	/*
2216	* Filesystems can only tolerate transient delays to truncate and
2217	* hole-punch operations
2218	*/
2219	if (folio_is_fsdax(folio))
2220	return false;
2221
2222	/ Otherwise, non-movable zone folios can be pinned. /
2223	return !folio_is_zone_movable(folio);
2224
2225	}
2226	#else
2227	static inline bool folio_is_longterm_pinnable(struct folio *folio)
2228	{
2229	return true;
2230	}
2231	#endif
2232
2233	static inline void set_page_zone(struct page page, enum* zone_type zone)
2234	{
2235	page->flags.f &= ~(ZONES_MASK << ZONES_PGSHIFT);
2236	page->flags.f \|= (zone & ZONES_MASK) << ZONES_PGSHIFT;
2237	}
2238
2239	static inline void set_page_node(struct page page, unsigned* long node)
2240	{
2241	page->flags.f &= ~(NODES_MASK << NODES_PGSHIFT);
2242	page->flags.f \|= (node & NODES_MASK) << NODES_PGSHIFT;
2243	}
2244
2245	static inline void set_page_links(struct page page, enum* zone_type zone,
2246	unsigned long node, unsigned long pfn)
2247	{
2248	set_page_zone(page, zone);
2249	set_page_node(page, node);
2250	#ifdef SECTION_IN_PAGE_FLAGS
2251	set_page_section(page, pfn_to_section_nr(pfn));
2252	#endif
2253	}
2254
2255	/**
2256	* folio_nr_pages - The number of pages in the folio.
2257	* @folio: The folio.
2258	*
2259	* Return: A positive power of two.
2260	*/
2261	static inline unsigned long folio_nr_pages(const struct folio *folio)
2262	{
2263	if (!folio_test_large(folio))
2264	return `1`;
2265	return folio_large_nr_pages(folio);
2266	}
2267
2268	#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
2269	/*
2270	* We don't expect any folios that exceed buddy sizes (and consequently
2271	* memory sections).
2272	*/
2273	#define MAX_FOLIO_ORDER MAX_PAGE_ORDER
2274	#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
2275	/*
2276	* Only pages within a single memory section are guaranteed to be
2277	* contiguous. By limiting folios to a single memory section, all folio
2278	* pages are guaranteed to be contiguous.
2279	*/
2280	#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT
2281	#elif defined(CONFIG_HUGETLB_PAGE)
2282	/*
2283	* There is no real limit on the folio size. We limit them to the maximum we
2284	* currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
2285	* no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
2286	*/
2287	#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
2288	#else
2289	/*
2290	* Without hugetlb, gigantic folios that are bigger than a single PUD are
2291	* currently impossible.
2292	*/
2293	#define MAX_FOLIO_ORDER PUD_ORDER
2294	#endif
2295
2296	#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER)
2297
2298	/*
2299	* compound_nr() returns the number of pages in this potentially compound
2300	* page. compound_nr() can be called on a tail page, and is defined to
2301	* return 1 in that case.
2302	*/
2303	static inline unsigned long compound_nr(const struct page *page)
2304	{
2305	const struct folio folio = (struct* folio *)page;
2306
2307	if (!test_bit(PG_head, &folio->flags.f))
2308	return `1`;
2309	return folio_large_nr_pages(folio);
2310	}
2311
2312	/**
2313	* folio_next - Move to the next physical folio.
2314	* @folio: The folio we're currently operating on.
2315	*
2316	* If you have physically contiguous memory which may span more than
2317	* one folio (eg a &struct bio_vec), use this function to move from one
2318	* folio to the next. Do not use it if the memory is only virtually
2319	* contiguous as the folios are almost certainly not adjacent to each
2320	* other. This is the folio equivalent to writing ``page++``.
2321	*
2322	* Context: We assume that the folios are refcounted and/or locked at a
2323	* higher level and do not adjust the reference counts.
2324	* Return: The next struct folio.
2325	*/
2326	static inline struct folio folio_next(struct* folio *folio)
2327	{
2328	return (struct folio *)folio_page(folio, folio_nr_pages(folio));
2329	}
2330
2331	/**
2332	* folio_shift - The size of the memory described by this folio.
2333	* @folio: The folio.
2334	*
2335	* A folio represents a number of bytes which is a power-of-two in size.
2336	* This function tells you which power-of-two the folio is. See also
2337	* folio_size() and folio_order().
2338	*
2339	* Context: The caller should have a reference on the folio to prevent
2340	* it from being split. It is not necessary for the folio to be locked.
2341	* Return: The base-2 logarithm of the size of this folio.
2342	*/
2343	static inline unsigned int folio_shift(const struct folio *folio)
2344	{
2345	return PAGE_SHIFT + folio_order(folio);
2346	}
2347
2348	/**
2349	* folio_size - The number of bytes in a folio.
2350	* @folio: The folio.
2351	*
2352	* Context: The caller should have a reference on the folio to prevent
2353	* it from being split. It is not necessary for the folio to be locked.
2354	* Return: The number of bytes in this folio.
2355	*/
2356	static inline size_t folio_size(const struct folio *folio)
2357	{
2358	return PAGE_SIZE << folio_order(folio);
2359	}
2360
2361	/**
2362	* folio_maybe_mapped_shared - Whether the folio is mapped into the page
2363	* tables of more than one MM
2364	* @folio: The folio.
2365	*
2366	* This function checks if the folio maybe currently mapped into more than one
2367	* MM ("maybe mapped shared"), or if the folio is certainly mapped into a single
2368	* MM ("mapped exclusively").
2369	*
2370	* For KSM folios, this function also returns "mapped shared" when a folio is
2371	* mapped multiple times into the same MM, because the individual page mappings
2372	* are independent.
2373	*
2374	* For small anonymous folios and anonymous hugetlb folios, the return
2375	* value will be exactly correct: non-KSM folios can only be mapped at most once
2376	* into an MM, and they cannot be partially mapped. KSM folios are
2377	* considered shared even if mapped multiple times into the same MM.
2378	*
2379	* For other folios, the result can be fuzzy:
2380	* #. For partially-mappable large folios (THP), the return value can wrongly
2381	* indicate "mapped shared" (false positive) if a folio was mapped by
2382	* more than two MMs at one point in time.
2383	* #. For pagecache folios (including hugetlb), the return value can wrongly
2384	* indicate "mapped shared" (false positive) when two VMAs in the same MM
2385	* cover the same file range.
2386	*
2387	* Further, this function only considers current page table mappings that
2388	* are tracked using the folio mapcount(s).
2389	*
2390	* This function does not consider:
2391	* #. If the folio might get mapped in the (near) future (e.g., swapcache,
2392	* pagecache, temporary unmapping for migration).
2393	* #. If the folio is mapped differently (VM_PFNMAP).
2394	* #. If hugetlb page table sharing applies. Callers might want to check
2395	* hugetlb_pmd_shared().
2396	*
2397	* Return: Whether the folio is estimated to be mapped into more than one MM.
2398	*/
2399	static inline bool folio_maybe_mapped_shared(struct folio *folio)
2400	{
2401	int mapcount = folio_mapcount(folio);
2402
2403	/ Only partially-mappable folios require more care. /
2404	if (!folio_test_large(folio) \|\| unlikely(folio_test_hugetlb(folio)))
2405	return mapcount > `1`;
2406
2407	/*
2408	* vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ...
2409	* simply assume "mapped shared", nobody should really care
2410	* about this for arbitrary kernel allocations.
2411	*/
2412	if (!IS_ENABLED(CONFIG_MM_ID))
2413	return true;
2414
2415	/*
2416	* A single mapping implies "mapped exclusively", even if the
2417	* folio flag says something different: it's easier to handle this
2418	* case here instead of on the RMAP hot path.
2419	*/
2420	if (mapcount <= `1`)
2421	return false;
2422	return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
2423	}
2424
2425	/**
2426	* folio_expected_ref_count - calculate the expected folio refcount
2427	* @folio: the folio
2428	*
2429	* Calculate the expected folio refcount, taking references from the pagecache,
2430	* swapcache, PG_private and page table mappings into account. Useful in
2431	* combination with folio_ref_count() to detect unexpected references (e.g.,
2432	* GUP or other temporary references).
2433	*
2434	* Does currently not consider references from the LRU cache. If the folio
2435	* was isolated from the LRU (which is the case during migration or split),
2436	* the LRU cache does not apply.
2437	*
2438	* Calling this function on an unmapped folio -- !folio_mapped() -- that is
2439	* locked will return a stable result.
2440	*
2441	* Calling this function on a mapped folio will not result in a stable result,
2442	* because nothing stops additional page table mappings from coming (e.g.,
2443	* fork()) or going (e.g., munmap()).
2444	*
2445	* Calling this function without the folio lock will also not result in a
2446	* stable result: for example, the folio might get dropped from the swapcache
2447	* concurrently.
2448	*
2449	* However, even when called without the folio lock or on a mapped folio,
2450	* this function can be used to detect unexpected references early (for example,
2451	* if it makes sense to even lock the folio and unmap it).
2452	*
2453	* The caller must add any reference (e.g., from folio_try_get()) it might be
2454	* holding itself to the result.
2455	*
2456	* Returns the expected folio refcount.
2457	*/
2458	static inline int folio_expected_ref_count(const struct folio *folio)
2459	{
2460	const int order = folio_order(folio);
2461	int ref_count = `0`;
2462
2463	if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio)))
2464	return `0`;
2465
2466	/ One reference per page from the swapcache. /
2467	ref_count += folio_test_swapcache(folio) << order;
2468
2469	if (!folio_test_anon(folio)) {
2470	/ One reference per page from the pagecache. /
2471	ref_count += !!folio->mapping << order;
2472	/ One reference from PG_private. /
2473	ref_count += folio_test_private(folio);
2474	}
2475
2476	/ One reference per page table mapping. /
2477	return ref_count + folio_mapcount(folio);
2478	}
2479
2480	#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
2481	static inline int arch_make_folio_accessible(struct folio *folio)
2482	{
2483	return `0`;
2484	}
2485	#endif
2486
2487	/*
2488	* Some inline functions in vmstat.h depend on page_zone()
2489	*/
2490	#include <linux/vmstat.h>
2491
2492	#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
2493	#define HASHED_PAGE_VIRTUAL
2494	#endif
2495
2496	#if defined(WANT_PAGE_VIRTUAL)
2497	static inline void page_address(const* struct page *page)
2498	{
2499	return page->virtual;
2500	}
2501	static inline void set_page_address(struct page page, void* *address)
2502	{
2503	page->virtual = address;
2504	}
2505	#define page_address_init() do { } while(0)
2506	#endif
2507
2508	#if defined(HASHED_PAGE_VIRTUAL)
2509	void page_address(const* struct page *page);
2510	void set_page_address(struct page page, void* *virtual);
2511	void page_address_init(void);
2512	#endif
2513
2514	static __always_inline void lowmem_page_address(const* struct page *page)
2515	{
2516	return page_to_virt(page);
2517	}
2518
2519	#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
2520	#define page_address(page) lowmem_page_address(page)
2521	#define set_page_address(page, address) do { } while(0)
2522	#define page_address_init() do { } while(0)
2523	#endif
2524
2525	static inline void folio_address(const* struct folio *folio)
2526	{
2527	return page_address(&folio->page);
2528	}
2529
2530	/*
2531	* Return true only if the page has been allocated with
2532	* ALLOC_NO_WATERMARKS and the low watermark was not
2533	* met implying that the system is under some pressure.
2534	*/
2535	static inline bool page_is_pfmemalloc(const struct page *page)
2536	{
2537	/*
2538	* lru.next has bit 1 set if the page is allocated from the
2539	* pfmemalloc reserves. Callers may simply overwrite it if
2540	* they do not need to preserve that information.
2541	*/
2542	return (uintptr_t)page->lru.next & BIT(`1`);
2543	}
2544
2545	/*
2546	* Return true only if the folio has been allocated with
2547	* ALLOC_NO_WATERMARKS and the low watermark was not
2548	* met implying that the system is under some pressure.
2549	*/
2550	static inline bool folio_is_pfmemalloc(const struct folio *folio)
2551	{
2552	/*
2553	* lru.next has bit 1 set if the page is allocated from the
2554	* pfmemalloc reserves. Callers may simply overwrite it if
2555	* they do not need to preserve that information.
2556	*/
2557	return (uintptr_t)folio->lru.next & BIT(`1`);
2558	}
2559
2560	/*
2561	* Only to be called by the page allocator on a freshly allocated
2562	* page.
2563	*/
2564	static inline void set_page_pfmemalloc(struct page *page)
2565	{
2566	page->lru.next = (void *)BIT(`1`);
2567	}
2568
2569	static inline void clear_page_pfmemalloc(struct page *page)
2570	{
2571	page->lru.next = NULL;
2572	}
2573
2574	/*
2575	* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
2576	*/
2577	extern void pagefault_out_of_memory(void);
2578
2579	#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
2580	#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
2581
2582	/*
2583	* Parameter block passed down to zap_pte_range in exceptional cases.
2584	*/
2585	struct zap_details {
2586	struct folio single_folio; /* Locked folio to be unmapped /
2587	bool even_cows; / Zap COWed private pages too? /
2588	bool reclaim_pt; / Need reclaim page tables? /
2589	zap_flags_t zap_flags; / Extra flags for zapping /
2590	};
2591
2592	/*
2593	* Whether to drop the pte markers, for example, the uffd-wp information for
2594	* file-backed memory. This should only be specified when we will completely
2595	* drop the page in the mm, either by truncation or unmapping of the vma. By
2596	* default, the flag is not set.
2597	*/
2598	#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
2599	/ Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb /
2600	#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
2601
2602	#ifdef CONFIG_MMU
2603	extern bool can_do_mlock(void);
2604	#else
2605	static inline bool can_do_mlock(void) { return false; }
2606	#endif
2607	extern int user_shm_lock(size_t, struct ucounts *);
2608	extern void user_shm_unlock(size_t, struct ucounts *);
2609
2610	struct folio vm_normal_folio(struct* vm_area_struct vma, unsigned* long addr,
2611	pte_t pte);
2612	struct page vm_normal_page(struct* vm_area_struct vma, unsigned* long addr,
2613	pte_t pte);
2614	struct folio vm_normal_folio_pmd(struct* vm_area_struct *vma,
2615	unsigned long addr, pmd_t pmd);
2616	struct page vm_normal_page_pmd(struct* vm_area_struct vma, unsigned* long addr,
2617	pmd_t pmd);
2618	struct page vm_normal_page_pud(struct* vm_area_struct vma, unsigned* long addr,
2619	pud_t pud);
2620
2621	void zap_vma_ptes(struct vm_area_struct vma, unsigned* long address,
2622	unsigned long size);
2623	void zap_page_range_single(struct vm_area_struct vma, unsigned* long address,
2624	unsigned long size, struct zap_details *details);
2625	static inline void zap_vma_pages(struct vm_area_struct *vma)
2626	{
2627	zap_page_range_single(vma, address: vma->vm_start,
2628	size: vma->vm_end - vma->vm_start, NULL);
2629	}
2630	void unmap_vmas(struct mmu_gather tlb, struct* ma_state *mas,
2631	struct vm_area_struct start_vma, unsigned* long start,
2632	unsigned long end, unsigned long tree_end);
2633
2634	struct mmu_notifier_range;
2635
2636	void free_pgd_range(struct mmu_gather tlb, unsigned* long addr,
2637	unsigned long end, unsigned long floor, unsigned long ceiling);
2638	int
2639	copy_page_range(struct vm_area_struct dst_vma, struct* vm_area_struct *src_vma);
2640	int generic_access_phys(struct vm_area_struct vma, unsigned* long addr,
2641	void buf, int* len, int write);
2642
2643	struct follow_pfnmap_args {
2644	/**
2645	* Inputs:
2646	* @vma: Pointer to @vm_area_struct struct
2647	* @address: the virtual address to walk
2648	*/
2649	struct vm_area_struct *vma;
2650	unsigned long address;
2651	/**
2652	* Internals:
2653	*
2654	* The caller shouldn't touch any of these.
2655	*/
2656	spinlock_t *lock;
2657	pte_t *ptep;
2658	/**
2659	* Outputs:
2660	*
2661	* @pfn: the PFN of the address
2662	* @addr_mask: address mask covering pfn
2663	* @pgprot: the pgprot_t of the mapping
2664	* @writable: whether the mapping is writable
2665	* @special: whether the mapping is a special mapping (real PFN maps)
2666	*/
2667	unsigned long pfn;
2668	unsigned long addr_mask;
2669	pgprot_t pgprot;
2670	bool writable;
2671	bool special;
2672	};
2673	int follow_pfnmap_start(struct follow_pfnmap_args *args);
2674	void follow_pfnmap_end(struct follow_pfnmap_args *args);
2675
2676	extern void truncate_pagecache(struct inode *inode, loff_t new);
2677	extern void truncate_setsize(struct inode *inode, loff_t newsize);
2678	void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
2679	void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
2680	int generic_error_remove_folio(struct address_space *mapping,
2681	struct folio *folio);
2682
2683	struct vm_area_struct lock_mm_and_find_vma(struct* mm_struct *mm,
2684	unsigned long address, struct pt_regs *regs);
2685
2686	#ifdef CONFIG_MMU
2687	extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
2688	unsigned long address, unsigned int flags,
2689	struct pt_regs *regs);
2690	extern int fixup_user_fault(struct mm_struct *mm,
2691	unsigned long address, unsigned int fault_flags,
2692	bool *unlocked);
2693	void unmap_mapping_pages(struct address_space *mapping,
2694	pgoff_t start, pgoff_t nr, bool even_cows);
2695	void unmap_mapping_range(struct address_space *mapping,
2696	loff_t const holebegin, loff_t const holelen, int even_cows);
2697	#else
2698	static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
2699	unsigned long address, unsigned int flags,
2700	struct pt_regs *regs)
2701	{
2702	/ should never happen if there's no MMU /
2703	BUG();
2704	return VM_FAULT_SIGBUS;
2705	}
2706	static inline int fixup_user_fault(struct mm_struct mm, unsigned* long address,
2707	unsigned int fault_flags, bool *unlocked)
2708	{
2709	/ should never happen if there's no MMU /
2710	BUG();
2711	return -EFAULT;
2712	}
2713	static inline void unmap_mapping_pages(struct address_space *mapping,
2714	pgoff_t start, pgoff_t nr, bool even_cows) { }
2715	static inline void unmap_mapping_range(struct address_space *mapping,
2716	loff_t const holebegin, loff_t const holelen, int even_cows) { }
2717	#endif
2718
2719	static inline void unmap_shared_mapping_range(struct address_space *mapping,
2720	loff_t const holebegin, loff_t const holelen)
2721	{
2722	unmap_mapping_range(mapping, holebegin, holelen, even_cows: `0`);
2723	}
2724
2725	static inline struct vm_area_struct vma_lookup(struct* mm_struct *mm,
2726	unsigned long addr);
2727
2728	extern int access_process_vm(struct task_struct tsk, unsigned* long addr,
2729	void buf, int* len, unsigned int gup_flags);
2730	extern int access_remote_vm(struct mm_struct mm, unsigned* long addr,
2731	void buf, int* len, unsigned int gup_flags);
2732
2733	#ifdef CONFIG_BPF_SYSCALL
2734	extern int copy_remote_vm_str(struct task_struct tsk, unsigned* long addr,
2735	void buf, int* len, unsigned int gup_flags);
2736	#endif
2737
2738	long get_user_pages_remote(struct mm_struct *mm,
2739	unsigned long start, unsigned long nr_pages,
2740	unsigned int gup_flags, struct page **pages,
2741	int *locked);
2742	long pin_user_pages_remote(struct mm_struct *mm,
2743	unsigned long start, unsigned long nr_pages,
2744	unsigned int gup_flags, struct page **pages,
2745	int *locked);
2746
2747	/*
2748	* Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT.
2749	*/
2750	static inline struct page get_user_page_vma_remote(struct* mm_struct *mm,
2751	unsigned long addr,
2752	int gup_flags,
2753	struct vm_area_struct **vmap)
2754	{
2755	struct page *page;
2756	struct vm_area_struct *vma;
2757	int got;
2758
2759	if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT)))
2760	return ERR_PTR(error: -EINVAL);
2761
2762	got = get_user_pages_remote(mm, start: addr, nr_pages: `1`, gup_flags, pages: &page, NULL);
2763
2764	if (got < `0`)
2765	return ERR_PTR(error: got);
2766
2767	vma = vma_lookup(mm, addr);
2768	if (WARN_ON_ONCE(!vma)) {
2769	put_page(page);
2770	return ERR_PTR(error: -EINVAL);
2771	}
2772
2773	*vmap = vma;
2774	return page;
2775	}
2776
2777	long get_user_pages(unsigned long start, unsigned long nr_pages,
2778	unsigned int gup_flags, struct page **pages);
2779	long pin_user_pages(unsigned long start, unsigned long nr_pages,
2780	unsigned int gup_flags, struct page **pages);
2781	long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2782	struct page *pages, unsigned* int gup_flags);
2783	long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2784	struct page *pages, unsigned* int gup_flags);
2785	long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
2786	struct folio *folios, unsigned* int max_folios,
2787	pgoff_t *offset);
2788	int folio_add_pins(struct folio folio, unsigned* int pins);
2789
2790	int get_user_pages_fast(unsigned long start, int nr_pages,
2791	unsigned int gup_flags, struct page **pages);
2792	int pin_user_pages_fast(unsigned long start, int nr_pages,
2793	unsigned int gup_flags, struct page **pages);
2794	void folio_add_pin(struct folio *folio);
2795
2796	int account_locked_vm(struct mm_struct mm, unsigned* long pages, bool inc);
2797	int __account_locked_vm(struct mm_struct mm, unsigned* long pages, bool inc,
2798	const struct task_struct *task, bool bypass_rlim);
2799
2800	struct kvec;
2801	struct page get_dump_page(unsigned* long addr, int *locked);
2802
2803	bool folio_mark_dirty(struct folio *folio);
2804	bool folio_mark_dirty_lock(struct folio *folio);
2805	bool set_page_dirty(struct page *page);
2806	int set_page_dirty_lock(struct page *page);
2807
2808	int get_cmdline(struct task_struct task, char* buffer, int* buflen);
2809
2810	/*
2811	* Flags used by change_protection(). For now we make it a bitmap so
2812	* that we can pass in multiple flags just like parameters. However
2813	* for now all the callers are only use one of the flags at the same
2814	* time.
2815	*/
2816	/*
2817	* Whether we should manually check if we can map individual PTEs writable,
2818	* because something (e.g., COW, uffd-wp) blocks that from happening for all
2819	* PTEs automatically in a writable mapping.
2820	*/
2821	#define MM_CP_TRY_CHANGE_WRITABLE (1UL << 0)
2822	/ Whether this protection change is for NUMA hints /
2823	#define MM_CP_PROT_NUMA (1UL << 1)
2824	/ Whether this change is for write protecting /
2825	#define MM_CP_UFFD_WP (1UL << 2) /* do wp */
2826	#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */
2827	#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP \| \
2828	MM_CP_UFFD_WP_RESOLVE)
2829
2830	bool can_change_pte_writable(struct vm_area_struct vma, unsigned* long addr,
2831	pte_t pte);
2832	extern long change_protection(struct mmu_gather *tlb,
2833	struct vm_area_struct vma, unsigned* long start,
2834	unsigned long end, unsigned long cp_flags);
2835	extern int mprotect_fixup(struct vma_iterator vmi, struct* mmu_gather *tlb,
2836	struct vm_area_struct vma, struct* vm_area_struct **pprev,
2837	unsigned long start, unsigned long end, vm_flags_t newflags);
2838
2839	/*
2840	* doesn't attempt to fault and will return short.
2841	*/
2842	int get_user_pages_fast_only(unsigned long start, int nr_pages,
2843	unsigned int gup_flags, struct page **pages);
2844
2845	static inline bool get_user_page_fast_only(unsigned long addr,
2846	unsigned int gup_flags, struct page **pagep)
2847	{
2848	return get_user_pages_fast_only(start: addr, nr_pages: `1`, gup_flags, pages: pagep) == `1`;
2849	}
2850	/*
2851	* per-process(per-mm_struct) statistics.
2852	*/
2853	static inline unsigned long get_mm_counter(struct mm_struct mm, int* member)
2854	{
2855	return percpu_counter_read_positive(fbc: &mm->rss_stat[member]);
2856	}
2857
2858	static inline unsigned long get_mm_counter_sum(struct mm_struct mm, int* member)
2859	{
2860	return percpu_counter_sum_positive(fbc: &mm->rss_stat[member]);
2861	}
2862
2863	void mm_trace_rss_stat(struct mm_struct mm, int* member);
2864
2865	static inline void add_mm_counter(struct mm_struct mm, int* member, long value)
2866	{
2867	percpu_counter_add(fbc: &mm->rss_stat[member], amount: value);
2868
2869	mm_trace_rss_stat(mm, member);
2870	}
2871
2872	static inline void inc_mm_counter(struct mm_struct mm, int* member)
2873	{
2874	percpu_counter_inc(fbc: &mm->rss_stat[member]);
2875
2876	mm_trace_rss_stat(mm, member);
2877	}
2878
2879	static inline void dec_mm_counter(struct mm_struct mm, int* member)
2880	{
2881	percpu_counter_dec(fbc: &mm->rss_stat[member]);
2882
2883	mm_trace_rss_stat(mm, member);
2884	}
2885
2886	/ Optimized variant when folio is already known not to be anon /
2887	static inline int mm_counter_file(struct folio *folio)
2888	{
2889	if (folio_test_swapbacked(folio))
2890	return MM_SHMEMPAGES;
2891	return MM_FILEPAGES;
2892	}
2893
2894	static inline int mm_counter(struct folio *folio)
2895	{
2896	if (folio_test_anon(folio))
2897	return MM_ANONPAGES;
2898	return mm_counter_file(folio);
2899	}
2900
2901	static inline unsigned long get_mm_rss(struct mm_struct *mm)
2902	{
2903	return get_mm_counter(mm, member: MM_FILEPAGES) +
2904	get_mm_counter(mm, member: MM_ANONPAGES) +
2905	get_mm_counter(mm, member: MM_SHMEMPAGES);
2906	}
2907
2908	static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
2909	{
2910	return max(mm->hiwater_rss, get_mm_rss(mm));
2911	}
2912
2913	static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
2914	{
2915	return max(mm->hiwater_vm, mm->total_vm);
2916	}
2917
2918	static inline void update_hiwater_rss(struct mm_struct *mm)
2919	{
2920	unsigned long _rss = get_mm_rss(mm);
2921
2922	if (data_race(mm->hiwater_rss) < _rss)
2923	data_race(mm->hiwater_rss = _rss);
2924	}
2925
2926	static inline void update_hiwater_vm(struct mm_struct *mm)
2927	{
2928	if (mm->hiwater_vm < mm->total_vm)
2929	mm->hiwater_vm = mm->total_vm;
2930	}
2931
2932	static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
2933	{
2934	mm->hiwater_rss = get_mm_rss(mm);
2935	}
2936
2937	static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
2938	struct mm_struct *mm)
2939	{
2940	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
2941
2942	if (*maxrss < hiwater_rss)
2943	*maxrss = hiwater_rss;
2944	}
2945
2946	#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL
2947	static inline int pte_special(pte_t pte)
2948	{
2949	return `0`;
2950	}
2951
2952	static inline pte_t pte_mkspecial(pte_t pte)
2953	{
2954	return pte;
2955	}
2956	#endif
2957
2958	#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
2959	static inline bool pmd_special(pmd_t pmd)
2960	{
2961	return false;
2962	}
2963
2964	static inline pmd_t pmd_mkspecial(pmd_t pmd)
2965	{
2966	return pmd;
2967	}
2968	#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
2969
2970	#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
2971	static inline bool pud_special(pud_t pud)
2972	{
2973	return false;
2974	}
2975
2976	static inline pud_t pud_mkspecial(pud_t pud)
2977	{
2978	return pud;
2979	}
2980	#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
2981
2982	extern pte_t __get_locked_pte(struct* mm_struct mm, unsigned* long addr,
2983	spinlock_t **ptl);
2984	static inline pte_t get_locked_pte(struct* mm_struct mm, unsigned* long addr,
2985	spinlock_t **ptl)
2986	{
2987	pte_t *ptep;
2988	__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
2989	return ptep;
2990	}
2991
2992	#ifdef __PAGETABLE_P4D_FOLDED
2993	static inline int __p4d_alloc(struct mm_struct mm, pgd_t pgd,
2994	unsigned long address)
2995	{
2996	return `0`;
2997	}
2998	#else
2999	int __p4d_alloc(struct mm_struct mm, pgd_t pgd, unsigned long address);
3000	#endif
3001
3002	#if defined(__PAGETABLE_PUD_FOLDED) \|\| !defined(CONFIG_MMU)
3003	static inline int __pud_alloc(struct mm_struct mm, p4d_t p4d,
3004	unsigned long address)
3005	{
3006	return `0`;
3007	}
3008	static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
3009	static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
3010
3011	#else
3012	int __pud_alloc(struct mm_struct mm, p4d_t p4d, unsigned long address);
3013
3014	static inline void mm_inc_nr_puds(struct mm_struct *mm)
3015	{
3016	if (mm_pud_folded(mm))
3017	return;
3018	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), v: &mm->pgtables_bytes);
3019	}
3020
3021	static inline void mm_dec_nr_puds(struct mm_struct *mm)
3022	{
3023	if (mm_pud_folded(mm))
3024	return;
3025	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), v: &mm->pgtables_bytes);
3026	}
3027	#endif
3028
3029	#if defined(__PAGETABLE_PMD_FOLDED) \|\| !defined(CONFIG_MMU)
3030	static inline int __pmd_alloc(struct mm_struct mm, pud_t pud,
3031	unsigned long address)
3032	{
3033	return `0`;
3034	}
3035
3036	static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
3037	static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
3038
3039	#else
3040	int __pmd_alloc(struct mm_struct mm, pud_t pud, unsigned long address);
3041
3042	static inline void mm_inc_nr_pmds(struct mm_struct *mm)
3043	{
3044	if (mm_pmd_folded(mm))
3045	return;
3046	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), v: &mm->pgtables_bytes);
3047	}
3048
3049	static inline void mm_dec_nr_pmds(struct mm_struct *mm)
3050	{
3051	if (mm_pmd_folded(mm))
3052	return;
3053	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), v: &mm->pgtables_bytes);
3054	}
3055	#endif
3056
3057	#ifdef CONFIG_MMU
3058	static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
3059	{
3060	atomic_long_set(v: &mm->pgtables_bytes, i: `0`);
3061	}
3062
3063	static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
3064	{
3065	return atomic_long_read(v: &mm->pgtables_bytes);
3066	}
3067
3068	static inline void mm_inc_nr_ptes(struct mm_struct *mm)
3069	{
3070	atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), v: &mm->pgtables_bytes);
3071	}
3072
3073	static inline void mm_dec_nr_ptes(struct mm_struct *mm)
3074	{
3075	atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), v: &mm->pgtables_bytes);
3076	}
3077	#else
3078
3079	static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
3080	static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
3081	{
3082	return `0`;
3083	}
3084
3085	static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
3086	static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
3087	#endif
3088
3089	int __pte_alloc(struct mm_struct mm, pmd_t pmd);
3090	int __pte_alloc_kernel(pmd_t *pmd);
3091
3092	#if defined(CONFIG_MMU)
3093
3094	static inline p4d_t p4d_alloc(struct* mm_struct mm, pgd_t pgd,
3095	unsigned long address)
3096	{
3097	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
3098	NULL : p4d_offset(pgd, address);
3099	}
3100
3101	static inline pud_t pud_alloc(struct* mm_struct mm, p4d_t p4d,
3102	unsigned long address)
3103	{
3104	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
3105	NULL : pud_offset(p4d, address);
3106	}
3107
3108	static inline pmd_t pmd_alloc(struct* mm_struct mm, pud_t pud, unsigned long address)
3109	{
3110	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
3111	NULL: pmd_offset(pud, address);
3112	}
3113	#endif /* CONFIG_MMU */
3114
3115	enum pt_flags {
3116	PT_kernel = PG_referenced,
3117	PT_reserved = PG_reserved,
3118	/ High bits are used for zone/node/section /
3119	};
3120
3121	static inline struct ptdesc virt_to_ptdesc(const* void *x)
3122	{
3123	return page_ptdesc(virt_to_page(x));
3124	}
3125
3126	/**
3127	* ptdesc_address - Virtual address of page table.
3128	* @pt: Page table descriptor.
3129	*
3130	* Return: The first byte of the page table described by @pt.
3131	*/
3132	static inline void ptdesc_address(const* struct ptdesc *pt)
3133	{
3134	return folio_address(ptdesc_folio(pt));
3135	}
3136
3137	static inline bool pagetable_is_reserved(struct ptdesc *pt)
3138	{
3139	return test_bit(PT_reserved, &pt->pt_flags.f);
3140	}
3141
3142	/**
3143	* ptdesc_set_kernel - Mark a ptdesc used to map the kernel
3144	* @ptdesc: The ptdesc to be marked
3145	*
3146	* Kernel page tables often need special handling. Set a flag so that
3147	* the handling code knows this ptdesc will not be used for userspace.
3148	*/
3149	static inline void ptdesc_set_kernel(struct ptdesc *ptdesc)
3150	{
3151	set_bit(nr: PT_kernel, addr: &ptdesc->pt_flags.f);
3152	}
3153
3154	/**
3155	* ptdesc_clear_kernel - Mark a ptdesc as no longer used to map the kernel
3156	* @ptdesc: The ptdesc to be unmarked
3157	*
3158	* Use when the ptdesc is no longer used to map the kernel and no longer
3159	* needs special handling.
3160	*/
3161	static inline void ptdesc_clear_kernel(struct ptdesc *ptdesc)
3162	{
3163	/*
3164	* Note: the 'PG_referenced' bit does not strictly need to be
3165	* cleared before freeing the page. But this is nice for
3166	* symmetry.
3167	*/
3168	clear_bit(nr: PT_kernel, addr: &ptdesc->pt_flags.f);
3169	}
3170
3171	/**
3172	* ptdesc_test_kernel - Check if a ptdesc is used to map the kernel
3173	* @ptdesc: The ptdesc being tested
3174	*
3175	* Call to tell if the ptdesc used to map the kernel.
3176	*/
3177	static inline bool ptdesc_test_kernel(const struct ptdesc *ptdesc)
3178	{
3179	return test_bit(PT_kernel, &ptdesc->pt_flags.f);
3180	}
3181
3182	/**
3183	* pagetable_alloc - Allocate pagetables
3184	* @gfp: GFP flags
3185	* @order: desired pagetable order
3186	*
3187	* pagetable_alloc allocates memory for page tables as well as a page table
3188	* descriptor to describe that memory.
3189	*
3190	* Return: The ptdesc describing the allocated page tables.
3191	*/
3192	static inline struct ptdesc pagetable_alloc_noprof(gfp_t gfp, unsigned* int order)
3193	{
3194	struct page *page = alloc_pages_noprof(gfp: gfp \| __GFP_COMP, order);
3195
3196	return page_ptdesc(page);
3197	}
3198	#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
3199
3200	static inline void __pagetable_free(struct ptdesc *pt)
3201	{
3202	struct page *page = ptdesc_page(pt);
3203
3204	__free_pages(page, order: compound_order(page));
3205	}
3206
3207	#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
3208	void pagetable_free_kernel(struct ptdesc *pt);
3209	#else
3210	static inline void pagetable_free_kernel(struct ptdesc *pt)
3211	{
3212	__pagetable_free(pt);
3213	}
3214	#endif
3215	/**
3216	* pagetable_free - Free pagetables
3217	* @pt: The page table descriptor
3218	*
3219	* pagetable_free frees the memory of all page tables described by a page
3220	* table descriptor and the memory for the descriptor itself.
3221	*/
3222	static inline void pagetable_free(struct ptdesc *pt)
3223	{
3224	if (ptdesc_test_kernel(ptdesc: pt)) {
3225	ptdesc_clear_kernel(ptdesc: pt);
3226	pagetable_free_kernel(pt);
3227	} else {
3228	__pagetable_free(pt);
3229	}
3230	}
3231
3232	#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
3233	#if ALLOC_SPLIT_PTLOCKS
3234	void __init ptlock_cache_init(void);
3235	bool ptlock_alloc(struct ptdesc *ptdesc);
3236	void ptlock_free(struct ptdesc *ptdesc);
3237
3238	static inline spinlock_t ptlock_ptr(struct* ptdesc *ptdesc)
3239	{
3240	return ptdesc->ptl;
3241	}
3242	#else /* ALLOC_SPLIT_PTLOCKS */
3243	static inline void ptlock_cache_init(void)
3244	{
3245	}
3246
3247	static inline bool ptlock_alloc(struct ptdesc *ptdesc)
3248	{
3249	return true;
3250	}
3251
3252	static inline void ptlock_free(struct ptdesc *ptdesc)
3253	{
3254	}
3255
3256	static inline spinlock_t ptlock_ptr(struct* ptdesc *ptdesc)
3257	{
3258	return &ptdesc->ptl;
3259	}
3260	#endif /* ALLOC_SPLIT_PTLOCKS */
3261
3262	static inline spinlock_t pte_lockptr(struct* mm_struct mm, pmd_t pmd)
3263	{
3264	return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
3265	}
3266
3267	static inline spinlock_t ptep_lockptr(struct* mm_struct mm, pte_t pte)
3268	{
3269	BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
3270	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
3271	return ptlock_ptr(ptdesc: virt_to_ptdesc(x: pte));
3272	}
3273
3274	static inline bool ptlock_init(struct ptdesc *ptdesc)
3275	{
3276	/*
3277	* prep_new_page() initialize page->private (and therefore page->ptl)
3278	* with 0. Make sure nobody took it in use in between.
3279	*
3280	* It can happen if arch try to use slab for page table allocation:
3281	* slab code uses page->slab_cache, which share storage with page->ptl.
3282	*/
3283	VM_BUG_ON_PAGE((unsigned* long *)&ptdesc->ptl, ptdesc_page(ptdesc));
3284	if (!ptlock_alloc(ptdesc))
3285	return false;
3286	spin_lock_init(ptlock_ptr(ptdesc));
3287	return true;
3288	}
3289
3290	#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */
3291	/*
3292	* We use mm->page_table_lock to guard all pagetable pages of the mm.
3293	*/
3294	static inline spinlock_t pte_lockptr(struct* mm_struct mm, pmd_t pmd)
3295	{
3296	return &mm->page_table_lock;
3297	}
3298	static inline spinlock_t ptep_lockptr(struct* mm_struct mm, pte_t pte)
3299	{
3300	return &mm->page_table_lock;
3301	}
3302	static inline void ptlock_cache_init(void) {}
3303	static inline bool ptlock_init(struct ptdesc ptdesc) { return* true; }
3304	static inline void ptlock_free(struct ptdesc *ptdesc) {}
3305	#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
3306
3307	static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
3308	{
3309	return compound_nr(ptdesc_page(ptdesc));
3310	}
3311
3312	static inline void __pagetable_ctor(struct ptdesc *ptdesc)
3313	{
3314	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
3315
3316	__SetPageTable(ptdesc_page(ptdesc));
3317	mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
3318	}
3319
3320	static inline void pagetable_dtor(struct ptdesc *ptdesc)
3321	{
3322	pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
3323
3324	ptlock_free(ptdesc);
3325	__ClearPageTable(ptdesc_page(ptdesc));
3326	mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
3327	}
3328
3329	static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
3330	{
3331	pagetable_dtor(ptdesc);
3332	pagetable_free(pt: ptdesc);
3333	}
3334
3335	static inline bool pagetable_pte_ctor(struct mm_struct *mm,
3336	struct ptdesc *ptdesc)
3337	{
3338	if (mm != &init_mm && !ptlock_init(ptdesc))
3339	return false;
3340	__pagetable_ctor(ptdesc);
3341	return true;
3342	}
3343
3344	pte_t ___pte_offset_map(pmd_t pmd, unsigned long addr, pmd_t *pmdvalp);
3345	static inline pte_t __pte_offset_map(pmd_t pmd, unsigned long addr,
3346	pmd_t *pmdvalp)
3347	{
3348	pte_t *pte;
3349
3350	__cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp));
3351	return pte;
3352	}
3353	static inline pte_t pte_offset_map(pmd_t pmd, unsigned long addr)
3354	{
3355	return __pte_offset_map(pmd, addr, NULL);
3356	}
3357
3358	pte_t __pte_offset_map_lock(struct* mm_struct mm, pmd_t pmd,
3359	unsigned long addr, spinlock_t **ptlp);
3360	static inline pte_t pte_offset_map_lock(struct* mm_struct mm, pmd_t pmd,
3361	unsigned long addr, spinlock_t **ptlp)
3362	{
3363	pte_t *pte;
3364
3365	__cond_lock(RCU, __cond_lock(*ptlp,
3366	pte = __pte_offset_map_lock(mm, pmd, addr, ptlp)));
3367	return pte;
3368	}
3369
3370	pte_t pte_offset_map_ro_nolock(struct* mm_struct mm, pmd_t pmd,
3371	unsigned long addr, spinlock_t **ptlp);
3372	pte_t pte_offset_map_rw_nolock(struct* mm_struct mm, pmd_t pmd,
3373	unsigned long addr, pmd_t *pmdvalp,
3374	spinlock_t **ptlp);
3375
3376	#define pte_unmap_unlock(pte, ptl) do { \
3377	spin_unlock(ptl); \
3378	pte_unmap(pte); \
3379	} while (0)
3380
3381	#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
3382
3383	#define pte_alloc_map(mm, pmd, address) \
3384	(pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))
3385
3386	#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
3387	(pte_alloc(mm, pmd) ? \
3388	NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
3389
3390	#define pte_alloc_kernel(pmd, address) \
3391	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
3392	NULL: pte_offset_kernel(pmd, address))
3393
3394	#if defined(CONFIG_SPLIT_PMD_PTLOCKS)
3395
3396	static inline struct page pmd_pgtable_page(pmd_t pmd)
3397	{
3398	unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - `1`);
3399	return virt_to_page((void )((unsigned* long) pmd & mask));
3400	}
3401
3402	static inline struct ptdesc pmd_ptdesc(pmd_t pmd)
3403	{
3404	return page_ptdesc(pmd_pgtable_page(pmd));
3405	}
3406
3407	static inline spinlock_t pmd_lockptr(struct* mm_struct mm, pmd_t pmd)
3408	{
3409	return ptlock_ptr(ptdesc: pmd_ptdesc(pmd));
3410	}
3411
3412	static inline bool pmd_ptlock_init(struct ptdesc *ptdesc)
3413	{
3414	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3415	ptdesc->pmd_huge_pte = NULL;
3416	#endif
3417	return ptlock_init(ptdesc);
3418	}
3419
3420	#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte)
3421
3422	#else
3423
3424	static inline spinlock_t pmd_lockptr(struct* mm_struct mm, pmd_t pmd)
3425	{
3426	return &mm->page_table_lock;
3427	}
3428
3429	static inline bool pmd_ptlock_init(struct ptdesc ptdesc) { return* true; }
3430
3431	#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
3432
3433	#endif
3434
3435	static inline spinlock_t pmd_lock(struct* mm_struct mm, pmd_t pmd)
3436	{
3437	spinlock_t *ptl = pmd_lockptr(mm, pmd);
3438	spin_lock(lock: ptl);
3439	return ptl;
3440	}
3441
3442	static inline bool pagetable_pmd_ctor(struct mm_struct *mm,
3443	struct ptdesc *ptdesc)
3444	{
3445	if (mm != &init_mm && !pmd_ptlock_init(ptdesc))
3446	return false;
3447	ptdesc_pmd_pts_init(ptdesc);
3448	__pagetable_ctor(ptdesc);
3449	return true;
3450	}
3451
3452	/*
3453	* No scalability reason to split PUD locks yet, but follow the same pattern
3454	* as the PMD locks to make it easier if we decide to. The VM should not be
3455	* considered ready to switch to split PUD locks yet; there may be places
3456	* which need to be converted from page_table_lock.
3457	*/
3458	static inline spinlock_t pud_lockptr(struct* mm_struct mm, pud_t pud)
3459	{
3460	return &mm->page_table_lock;
3461	}
3462
3463	static inline spinlock_t pud_lock(struct* mm_struct mm, pud_t pud)
3464	{
3465	spinlock_t *ptl = pud_lockptr(mm, pud);
3466
3467	spin_lock(lock: ptl);
3468	return ptl;
3469	}
3470
3471	static inline void pagetable_pud_ctor(struct ptdesc *ptdesc)
3472	{
3473	__pagetable_ctor(ptdesc);
3474	}
3475
3476	static inline void pagetable_p4d_ctor(struct ptdesc *ptdesc)
3477	{
3478	__pagetable_ctor(ptdesc);
3479	}
3480
3481	static inline void pagetable_pgd_ctor(struct ptdesc *ptdesc)
3482	{
3483	__pagetable_ctor(ptdesc);
3484	}
3485
3486	extern void __init pagecache_init(void);
3487	extern void free_initmem(void);
3488
3489	/*
3490	* Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
3491	* into the buddy system. The freed pages will be poisoned with pattern
3492	* "poison" if it's within range [0, UCHAR_MAX].
3493	* Return pages freed into the buddy system.
3494	*/
3495	extern unsigned long free_reserved_area(void start, void* *end,
3496	int poison, const char *s);
3497
3498	extern void adjust_managed_page_count(struct page page, long* count);
3499
3500	extern void reserve_bootmem_region(phys_addr_t start,
3501	phys_addr_t end, int nid);
3502
3503	/ Free the reserved page into the buddy system, so it gets managed. /
3504	void free_reserved_page(struct page *page);
3505
3506	static inline void mark_page_reserved(struct page *page)
3507	{
3508	SetPageReserved(page);
3509	adjust_managed_page_count(page, count: -`1`);
3510	}
3511
3512	static inline void free_reserved_ptdesc(struct ptdesc *pt)
3513	{
3514	free_reserved_page(ptdesc_page(pt));
3515	}
3516
3517	/*
3518	* Default method to free all the __init memory into the buddy system.
3519	* The freed pages will be poisoned with pattern "poison" if it's within
3520	* range [0, UCHAR_MAX].
3521	* Return pages freed into the buddy system.
3522	*/
3523	static inline unsigned long free_initmem_default(int poison)
3524	{
3525	extern char __init_begin[], __init_end[];
3526
3527	return free_reserved_area(start: &__init_begin, end: &__init_end,
3528	poison, s: "unused kernel image (initmem)");
3529	}
3530
3531	static inline unsigned long get_num_physpages(void)
3532	{
3533	int nid;
3534	unsigned long phys_pages = `0`;
3535
3536	for_each_online_node(nid)
3537	phys_pages += node_present_pages(nid);
3538
3539	return phys_pages;
3540	}
3541
3542	/*
3543	* Using memblock node mappings, an architecture may initialise its
3544	* zones, allocate the backing mem_map and account for memory holes in an
3545	* architecture independent manner.
3546	*
3547	* An architecture is expected to register range of page frames backed by
3548	* physical memory with memblock_add[_node]() before calling
3549	* free_area_init() passing in the PFN each zone ends at. At a basic
3550	* usage, an architecture is expected to do something like
3551	*
3552	* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
3553	* max_highmem_pfn};
3554	* for_each_valid_physical_page_range()
3555	* memblock_add_node(base, size, nid, MEMBLOCK_NONE)
3556	* free_area_init(max_zone_pfns);
3557	*/
3558	void free_area_init(unsigned long *max_zone_pfn);
3559	unsigned long node_map_pfn_alignment(void);
3560	extern unsigned long absent_pages_in_range(unsigned long start_pfn,
3561	unsigned long end_pfn);
3562	extern void get_pfn_range_for_nid(unsigned int nid,
3563	unsigned long start_pfn, unsigned* long *end_pfn);
3564
3565	#ifndef CONFIG_NUMA
3566	static inline int early_pfn_to_nid(unsigned long pfn)
3567	{
3568	return `0`;
3569	}
3570	#else
3571	/ please see mm/page_alloc.c /
3572	extern int __meminit early_pfn_to_nid(unsigned long pfn);
3573	#endif
3574
3575	extern void mem_init(void);
3576	extern void __init mmap_init(void);
3577
3578	extern void __show_mem(unsigned int flags, nodemask_t nodemask, int* max_zone_idx);
3579	static inline void show_mem(void)
3580	{
3581	__show_mem(flags: `0`, NULL, max_zone_idx: MAX_NR_ZONES - `1`);
3582	}
3583	extern long si_mem_available(void);
3584	extern void si_meminfo(struct sysinfo * val);
3585	extern void si_meminfo_node(struct sysinfo val, int* nid);
3586
3587	extern __printf(`3`, `4`)
3588	void warn_alloc(gfp_t gfp_mask, nodemask_t nodemask, const* char *fmt, ...);
3589
3590	extern void setup_per_cpu_pageset(void);
3591
3592	/ nommu.c /
3593	extern atomic_long_t mmap_pages_allocated;
3594	extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
3595
3596	/ interval_tree.c /
3597	void vma_interval_tree_insert(struct vm_area_struct *node,
3598	struct rb_root_cached *root);
3599	void vma_interval_tree_insert_after(struct vm_area_struct *node,
3600	struct vm_area_struct *prev,
3601	struct rb_root_cached *root);
3602	void vma_interval_tree_remove(struct vm_area_struct *node,
3603	struct rb_root_cached *root);
3604	struct vm_area_struct vma_interval_tree_subtree_search(struct* vm_area_struct *node,
3605	unsigned long start, unsigned long last);
3606	struct vm_area_struct vma_interval_tree_iter_first(struct* rb_root_cached *root,
3607	unsigned long start, unsigned long last);
3608	struct vm_area_struct vma_interval_tree_iter_next(struct* vm_area_struct *node,
3609	unsigned long start, unsigned long last);
3610
3611	#define vma_interval_tree_foreach(vma, root, start, last) \
3612	for (vma = vma_interval_tree_iter_first(root, start, last); \
3613	vma; vma = vma_interval_tree_iter_next(vma, start, last))
3614
3615	void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
3616	struct rb_root_cached *root);
3617	void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
3618	struct rb_root_cached *root);
3619	struct anon_vma_chain *
3620	anon_vma_interval_tree_iter_first(struct rb_root_cached *root,
3621	unsigned long start, unsigned long last);
3622	struct anon_vma_chain *anon_vma_interval_tree_iter_next(
3623	struct anon_vma_chain node, unsigned* long start, unsigned long last);
3624	#ifdef CONFIG_DEBUG_VM_RB
3625	void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
3626	#endif
3627
3628	#define anon_vma_interval_tree_foreach(avc, root, start, last) \
3629	for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
3630	avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
3631
3632	/ mmap.c /
3633	extern int __vm_enough_memory(const struct mm_struct mm, long* pages, int cap_sys_admin);
3634	extern int insert_vm_struct(struct mm_struct , struct* vm_area_struct *);
3635	extern void exit_mmap(struct mm_struct *);
3636	bool mmap_read_lock_maybe_expand(struct mm_struct mm, struct* vm_area_struct *vma,
3637	unsigned long addr, bool write);
3638
3639	static inline int check_data_rlimit(unsigned long rlim,
3640	unsigned long new,
3641	unsigned long start,
3642	unsigned long end_data,
3643	unsigned long start_data)
3644	{
3645	if (rlim < RLIM_INFINITY) {
3646	if (((new - start) + (end_data - start_data)) > rlim)
3647	return -ENOSPC;
3648	}
3649
3650	return `0`;
3651	}
3652
3653	extern int mm_take_all_locks(struct mm_struct *mm);
3654	extern void mm_drop_all_locks(struct mm_struct *mm);
3655
3656	extern int set_mm_exe_file(struct mm_struct mm, struct* file *new_exe_file);
3657	extern int replace_mm_exe_file(struct mm_struct mm, struct* file *new_exe_file);
3658	extern struct file get_mm_exe_file(struct* mm_struct *mm);
3659	extern struct file get_task_exe_file(struct* task_struct *task);
3660
3661	extern bool may_expand_vm(struct mm_struct , vm_flags_t, unsigned* long npages);
3662	extern void vm_stat_account(struct mm_struct , vm_flags_t, long* npages);
3663
3664	extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
3665	const struct vm_special_mapping *sm);
3666	struct vm_area_struct _install_special_mapping(struct* mm_struct *mm,
3667	unsigned long addr, unsigned long len,
3668	vm_flags_t vm_flags,
3669	const struct vm_special_mapping *spec);
3670
3671	unsigned long randomize_stack_top(unsigned long stack_top);
3672	unsigned long randomize_page(unsigned long start, unsigned long range);
3673
3674	unsigned long
3675	__get_unmapped_area(struct file file, unsigned* long addr, unsigned long len,
3676	unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags);
3677
3678	static inline unsigned long
3679	get_unmapped_area(struct file file, unsigned* long addr, unsigned long len,
3680	unsigned long pgoff, unsigned long flags)
3681	{
3682	return __get_unmapped_area(file, addr, len, pgoff, flags, vm_flags: `0`);
3683	}
3684
3685	extern unsigned long do_mmap(struct file file, unsigned* long addr,
3686	unsigned long len, unsigned long prot, unsigned long flags,
3687	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
3688	struct list_head *uf);
3689	extern int do_vmi_munmap(struct vma_iterator vmi, struct* mm_struct *mm,
3690	unsigned long start, size_t len, struct list_head *uf,
3691	bool unlock);
3692	int do_vmi_align_munmap(struct vma_iterator vmi, struct* vm_area_struct *vma,
3693	struct mm_struct mm, unsigned* long start,
3694	unsigned long end, struct list_head *uf, bool unlock);
3695	extern int do_munmap(struct mm_struct , unsigned* long, size_t,
3696	struct list_head *uf);
3697	extern int do_madvise(struct mm_struct mm, unsigned* long start, size_t len_in, int behavior);
3698
3699	#ifdef CONFIG_MMU
3700	extern int __mm_populate(unsigned long addr, unsigned long len,
3701	int ignore_errors);
3702	static inline void mm_populate(unsigned long addr, unsigned long len)
3703	{
3704	/ Ignore errors /
3705	(void) __mm_populate(addr, len, ignore_errors: `1`);
3706	}
3707	#else
3708	static inline void mm_populate(unsigned long addr, unsigned long len) {}
3709	#endif
3710
3711	/ This takes the mm semaphore itself /
3712	extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
3713	extern int vm_munmap(unsigned long, size_t);
3714	extern unsigned long __must_check vm_mmap(struct file , unsigned* long,
3715	unsigned long, unsigned long,
3716	unsigned long, unsigned long);
3717
3718	struct vm_unmapped_area_info {
3719	#define VM_UNMAPPED_AREA_TOPDOWN 1
3720	unsigned long flags;
3721	unsigned long length;
3722	unsigned long low_limit;
3723	unsigned long high_limit;
3724	unsigned long align_mask;
3725	unsigned long align_offset;
3726	unsigned long start_gap;
3727	};
3728
3729	extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
3730
3731	/ truncate.c /
3732	void truncate_inode_pages(struct address_space *mapping, loff_t lstart);
3733	void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart,
3734	uoff_t lend);
3735	void truncate_inode_pages_final(struct address_space *mapping);
3736
3737	/ generic vm_area_ops exported for stackable file systems /
3738	extern vm_fault_t filemap_fault(struct vm_fault *vmf);
3739	extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
3740	pgoff_t start_pgoff, pgoff_t end_pgoff);
3741	extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
3742
3743	extern unsigned long stack_guard_gap;
3744	/ Generic expand stack which grows the stack according to GROWS{UP,DOWN} /
3745	int expand_stack_locked(struct vm_area_struct vma, unsigned* long address);
3746	struct vm_area_struct expand_stack(struct* mm_struct * mm, unsigned long addr);
3747
3748	/ Look up the first VMA which satisfies addr < vm_end, NULL if none. /
3749	extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
3750	extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
3751	struct vm_area_struct **pprev);
3752
3753	/*
3754	* Look up the first VMA which intersects the interval [start_addr, end_addr)
3755	* NULL if none. Assume start_addr < end_addr.
3756	*/
3757	struct vm_area_struct find_vma_intersection(struct* mm_struct *mm,
3758	unsigned long start_addr, unsigned long end_addr);
3759
3760	/**
3761	* vma_lookup() - Find a VMA at a specific address
3762	* @mm: The process address space.
3763	* @addr: The user address.
3764	*
3765	* Return: The vm_area_struct at the given address, %NULL otherwise.
3766	*/
3767	static inline
3768	struct vm_area_struct vma_lookup(struct* mm_struct mm, unsigned* long addr)
3769	{
3770	return mtree_load(mt: &mm->mm_mt, index: addr);
3771	}
3772
3773	static inline unsigned long stack_guard_start_gap(const struct vm_area_struct *vma)
3774	{
3775	if (vma->vm_flags & VM_GROWSDOWN)
3776	return stack_guard_gap;
3777
3778	/ See reasoning around the VM_SHADOW_STACK definition /
3779	if (vma->vm_flags & VM_SHADOW_STACK)
3780	return PAGE_SIZE;
3781
3782	return `0`;
3783	}
3784
3785	static inline unsigned long vm_start_gap(const struct vm_area_struct *vma)
3786	{
3787	unsigned long gap = stack_guard_start_gap(vma);
3788	unsigned long vm_start = vma->vm_start;
3789
3790	vm_start -= gap;
3791	if (vm_start > vma->vm_start)
3792	vm_start = `0`;
3793	return vm_start;
3794	}
3795
3796	static inline unsigned long vm_end_gap(const struct vm_area_struct *vma)
3797	{
3798	unsigned long vm_end = vma->vm_end;
3799
3800	if (vma->vm_flags & VM_GROWSUP) {
3801	vm_end += stack_guard_gap;
3802	if (vm_end < vma->vm_end)
3803	vm_end = -PAGE_SIZE;
3804	}
3805	return vm_end;
3806	}
3807
3808	static inline unsigned long vma_pages(const struct vm_area_struct *vma)
3809	{
3810	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
3811	}
3812
3813	static inline unsigned long vma_desc_size(const struct vm_area_desc *desc)
3814	{
3815	return desc->end - desc->start;
3816	}
3817
3818	static inline unsigned long vma_desc_pages(const struct vm_area_desc *desc)
3819	{
3820	return vma_desc_size(desc) >> PAGE_SHIFT;
3821	}
3822
3823	/**
3824	* mmap_action_remap - helper for mmap_prepare hook to specify that a pure PFN
3825	* remap is required.
3826	* @desc: The VMA descriptor for the VMA requiring remap.
3827	* @start: The virtual address to start the remap from, must be within the VMA.
3828	* @start_pfn: The first PFN in the range to remap.
3829	* @size: The size of the range to remap, in bytes, at most spanning to the end
3830	* of the VMA.
3831	*/
3832	static inline void mmap_action_remap(struct vm_area_desc *desc,
3833	unsigned long start,
3834	unsigned long start_pfn,
3835	unsigned long size)
3836	{
3837	struct mmap_action *action = &desc->action;
3838
3839	/ [start, start + size) must be within the VMA. /
3840	WARN_ON_ONCE(start < desc->start \|\| start >= desc->end);
3841	WARN_ON_ONCE(start + size > desc->end);
3842
3843	action->type = MMAP_REMAP_PFN;
3844	action->remap.start = start;
3845	action->remap.start_pfn = start_pfn;
3846	action->remap.size = size;
3847	action->remap.pgprot = desc->page_prot;
3848	}
3849
3850	/**
3851	* mmap_action_remap_full - helper for mmap_prepare hook to specify that the
3852	* entirety of a VMA should be PFN remapped.
3853	* @desc: The VMA descriptor for the VMA requiring remap.
3854	* @start_pfn: The first PFN in the range to remap.
3855	*/
3856	static inline void mmap_action_remap_full(struct vm_area_desc *desc,
3857	unsigned long start_pfn)
3858	{
3859	mmap_action_remap(desc, start: desc->start, start_pfn, size: vma_desc_size(desc));
3860	}
3861
3862	/**
3863	* mmap_action_ioremap - helper for mmap_prepare hook to specify that a pure PFN
3864	* I/O remap is required.
3865	* @desc: The VMA descriptor for the VMA requiring remap.
3866	* @start: The virtual address to start the remap from, must be within the VMA.
3867	* @start_pfn: The first PFN in the range to remap.
3868	* @size: The size of the range to remap, in bytes, at most spanning to the end
3869	* of the VMA.
3870	*/
3871	static inline void mmap_action_ioremap(struct vm_area_desc *desc,
3872	unsigned long start,
3873	unsigned long start_pfn,
3874	unsigned long size)
3875	{
3876	mmap_action_remap(desc, start, start_pfn, size);
3877	desc->action.type = MMAP_IO_REMAP_PFN;
3878	}
3879
3880	/**
3881	* mmap_action_ioremap_full - helper for mmap_prepare hook to specify that the
3882	* entirety of a VMA should be PFN I/O remapped.
3883	* @desc: The VMA descriptor for the VMA requiring remap.
3884	* @start_pfn: The first PFN in the range to remap.
3885	*/
3886	static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
3887	unsigned long start_pfn)
3888	{
3889	mmap_action_ioremap(desc, start: desc->start, start_pfn, size: vma_desc_size(desc));
3890	}
3891
3892	void mmap_action_prepare(struct mmap_action *action,
3893	struct vm_area_desc *desc);
3894	int mmap_action_complete(struct mmap_action *action,
3895	struct vm_area_struct *vma);
3896
3897	/ Look up the first VMA which exactly match the interval vm_start ... vm_end /
3898	static inline struct vm_area_struct find_exact_vma(struct* mm_struct *mm,
3899	unsigned long vm_start, unsigned long vm_end)
3900	{
3901	struct vm_area_struct *vma = vma_lookup(mm, addr: vm_start);
3902
3903	if (vma && (vma->vm_start != vm_start \|\| vma->vm_end != vm_end))
3904	vma = NULL;
3905
3906	return vma;
3907	}
3908
3909	static inline bool range_in_vma(const struct vm_area_struct *vma,
3910	unsigned long start, unsigned long end)
3911	{
3912	return (vma && vma->vm_start <= start && end <= vma->vm_end);
3913	}
3914
3915	#ifdef CONFIG_MMU
3916	pgprot_t vm_get_page_prot(vm_flags_t vm_flags);
3917	void vma_set_page_prot(struct vm_area_struct *vma);
3918	#else
3919	static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
3920	{
3921	return __pgprot(`0`);
3922	}
3923	static inline void vma_set_page_prot(struct vm_area_struct *vma)
3924	{
3925	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3926	}
3927	#endif
3928
3929	void vma_set_file(struct vm_area_struct vma, struct* file *file);
3930
3931	#ifdef CONFIG_NUMA_BALANCING
3932	unsigned long change_prot_numa(struct vm_area_struct *vma,
3933	unsigned long start, unsigned long end);
3934	#endif
3935
3936	struct vm_area_struct find_extend_vma_locked(struct* mm_struct *,
3937	unsigned long addr);
3938	int remap_pfn_range(struct vm_area_struct vma, unsigned* long addr,
3939	unsigned long pfn, unsigned long size, pgprot_t pgprot);
3940
3941	int vm_insert_page(struct vm_area_struct , unsigned* long addr, struct page *);
3942	int vm_insert_pages(struct vm_area_struct vma, unsigned* long addr,
3943	struct page *pages, unsigned* long *num);
3944	int vm_map_pages(struct vm_area_struct vma, struct* page **pages,
3945	unsigned long num);
3946	int vm_map_pages_zero(struct vm_area_struct vma, struct* page **pages,
3947	unsigned long num);
3948	vm_fault_t vmf_insert_page_mkwrite(struct vm_fault vmf, struct* page *page,
3949	bool write);
3950	vm_fault_t vmf_insert_pfn(struct vm_area_struct vma, unsigned* long addr,
3951	unsigned long pfn);
3952	vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct vma, unsigned* long addr,
3953	unsigned long pfn, pgprot_t pgprot);
3954	vm_fault_t vmf_insert_mixed(struct vm_area_struct vma, unsigned* long addr,
3955	unsigned long pfn);
3956	vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
3957	unsigned long addr, unsigned long pfn);
3958	int vm_iomap_memory(struct vm_area_struct vma, phys_addr_t start, unsigned* long len);
3959
3960	static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
3961	unsigned long addr, struct page *page)
3962	{
3963	int err = vm_insert_page(vma, addr, page);
3964
3965	if (err == -ENOMEM)
3966	return VM_FAULT_OOM;
3967	if (err < `0` && err != -EBUSY)
3968	return VM_FAULT_SIGBUS;
3969
3970	return VM_FAULT_NOPAGE;
3971	}
3972
3973	#ifndef io_remap_pfn_range_pfn
3974	static inline unsigned long io_remap_pfn_range_pfn(unsigned long pfn,
3975	unsigned long size)
3976	{
3977	return pfn;
3978	}
3979	#endif
3980
3981	static inline int io_remap_pfn_range(struct vm_area_struct *vma,
3982	unsigned long addr, unsigned long orig_pfn,
3983	unsigned long size, pgprot_t orig_prot)
3984	{
3985	const unsigned long pfn = io_remap_pfn_range_pfn(pfn: orig_pfn, size);
3986	const pgprot_t prot = pgprot_decrypted(orig_prot);
3987
3988	return remap_pfn_range(vma, addr, pfn, size, pgprot: prot);
3989	}
3990
3991	static inline vm_fault_t vmf_error(int err)
3992	{
3993	if (err == -ENOMEM)
3994	return VM_FAULT_OOM;
3995	else if (err == -EHWPOISON)
3996	return VM_FAULT_HWPOISON;
3997	return VM_FAULT_SIGBUS;
3998	}
3999
4000	/*
4001	* Convert errno to return value for ->page_mkwrite() calls.
4002	*
4003	* This should eventually be merged with vmf_error() above, but will need a
4004	* careful audit of all vmf_error() callers.
4005	*/
4006	static inline vm_fault_t vmf_fs_error(int err)
4007	{
4008	if (err == `0`)
4009	return VM_FAULT_LOCKED;
4010	if (err == -EFAULT \|\| err == -EAGAIN)
4011	return VM_FAULT_NOPAGE;
4012	if (err == -ENOMEM)
4013	return VM_FAULT_OOM;
4014	/ -ENOSPC, -EDQUOT, -EIO ... /
4015	return VM_FAULT_SIGBUS;
4016	}
4017
4018	static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
4019	{
4020	if (vm_fault & VM_FAULT_OOM)
4021	return -ENOMEM;
4022	if (vm_fault & (VM_FAULT_HWPOISON \| VM_FAULT_HWPOISON_LARGE))
4023	return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
4024	if (vm_fault & (VM_FAULT_SIGBUS \| VM_FAULT_SIGSEGV))
4025	return -EFAULT;
4026	return `0`;
4027	}
4028
4029	/*
4030	* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
4031	* a (NUMA hinting) fault is required.
4032	*/
4033	static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma,
4034	unsigned int flags)
4035	{
4036	/*
4037	* If callers don't want to honor NUMA hinting faults, no need to
4038	* determine if we would actually have to trigger a NUMA hinting fault.
4039	*/
4040	if (!(flags & FOLL_HONOR_NUMA_FAULT))
4041	return true;
4042
4043	/*
4044	* NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
4045	*
4046	* Requiring a fault here even for inaccessible VMAs would mean that
4047	* FOLL_FORCE cannot make any progress, because handle_mm_fault()
4048	* refuses to process NUMA hinting faults in inaccessible VMAs.
4049	*/
4050	return !vma_is_accessible(vma);
4051	}
4052
4053	typedef int (pte_fn_t)(pte_t pte, unsigned long addr, void *data);
4054	extern int apply_to_page_range(struct mm_struct mm, unsigned* long address,
4055	unsigned long size, pte_fn_t fn, void *data);
4056	extern int apply_to_existing_page_range(struct mm_struct *mm,
4057	unsigned long address, unsigned long size,
4058	pte_fn_t fn, void *data);
4059
4060	#ifdef CONFIG_PAGE_POISONING
4061	extern void __kernel_poison_pages(struct page page, int* numpages);
4062	extern void __kernel_unpoison_pages(struct page page, int* numpages);
4063	extern bool _page_poisoning_enabled_early;
4064	DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled);
4065	static inline bool page_poisoning_enabled(void)
4066	{
4067	return _page_poisoning_enabled_early;
4068	}
4069	/*
4070	* For use in fast paths after init_mem_debugging() has run, or when a
4071	* false negative result is not harmful when called too early.
4072	*/
4073	static inline bool page_poisoning_enabled_static(void)
4074	{
4075	return static_branch_unlikely(&_page_poisoning_enabled);
4076	}
4077	static inline void kernel_poison_pages(struct page page, int* numpages)
4078	{
4079	if (page_poisoning_enabled_static())
4080	__kernel_poison_pages(page, numpages);
4081	}
4082	static inline void kernel_unpoison_pages(struct page page, int* numpages)
4083	{
4084	if (page_poisoning_enabled_static())
4085	__kernel_unpoison_pages(page, numpages);
4086	}
4087	#else
4088	static inline bool page_poisoning_enabled(void) { return false; }
4089	static inline bool page_poisoning_enabled_static(void) { return false; }
4090	static inline void __kernel_poison_pages(struct page page, int* nunmpages) { }
4091	static inline void kernel_poison_pages(struct page page, int* numpages) { }
4092	static inline void kernel_unpoison_pages(struct page page, int* numpages) { }
4093	#endif
4094
4095	DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
4096	static inline bool want_init_on_alloc(gfp_t flags)
4097	{
4098	if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
4099	&init_on_alloc))
4100	return true;
4101	return flags & __GFP_ZERO;
4102	}
4103
4104	DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
4105	static inline bool want_init_on_free(void)
4106	{
4107	return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
4108	&init_on_free);
4109	}
4110
4111	extern bool _debug_pagealloc_enabled_early;
4112	DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
4113
4114	static inline bool debug_pagealloc_enabled(void)
4115	{
4116	return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
4117	_debug_pagealloc_enabled_early;
4118	}
4119
4120	/*
4121	* For use in fast paths after mem_debugging_and_hardening_init() has run,
4122	* or when a false negative result is not harmful when called too early.
4123	*/
4124	static inline bool debug_pagealloc_enabled_static(void)
4125	{
4126	if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
4127	return false;
4128
4129	return static_branch_unlikely(&_debug_pagealloc_enabled);
4130	}
4131
4132	/*
4133	* To support DEBUG_PAGEALLOC architecture must ensure that
4134	* __kernel_map_pages() never fails
4135	*/
4136	extern void __kernel_map_pages(struct page page, int* numpages, int enable);
4137	#ifdef CONFIG_DEBUG_PAGEALLOC
4138	static inline void debug_pagealloc_map_pages(struct page page, int* numpages)
4139	{
4140	if (debug_pagealloc_enabled_static())
4141	__kernel_map_pages(page, numpages, enable: `1`);
4142	}
4143
4144	static inline void debug_pagealloc_unmap_pages(struct page page, int* numpages)
4145	{
4146	if (debug_pagealloc_enabled_static())
4147	__kernel_map_pages(page, numpages, enable: `0`);
4148	}
4149
4150	extern unsigned int _debug_guardpage_minorder;
4151	DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
4152
4153	static inline unsigned int debug_guardpage_minorder(void)
4154	{
4155	return _debug_guardpage_minorder;
4156	}
4157
4158	static inline bool debug_guardpage_enabled(void)
4159	{
4160	return static_branch_unlikely(&_debug_guardpage_enabled);
4161	}
4162
4163	static inline bool page_is_guard(const struct page *page)
4164	{
4165	if (!debug_guardpage_enabled())
4166	return false;
4167
4168	return PageGuard(page);
4169	}
4170
4171	bool __set_page_guard(struct zone zone, struct* page page, unsigned* int order);
4172	static inline bool set_page_guard(struct zone zone, struct* page *page,
4173	unsigned int order)
4174	{
4175	if (!debug_guardpage_enabled())
4176	return false;
4177	return __set_page_guard(zone, page, order);
4178	}
4179
4180	void __clear_page_guard(struct zone zone, struct* page page, unsigned* int order);
4181	static inline void clear_page_guard(struct zone zone, struct* page *page,
4182	unsigned int order)
4183	{
4184	if (!debug_guardpage_enabled())
4185	return;
4186	__clear_page_guard(zone, page, order);
4187	}
4188
4189	#else /* CONFIG_DEBUG_PAGEALLOC */
4190	static inline void debug_pagealloc_map_pages(struct page page, int* numpages) {}
4191	static inline void debug_pagealloc_unmap_pages(struct page page, int* numpages) {}
4192	static inline unsigned int debug_guardpage_minorder(void) { return `0`; }
4193	static inline bool debug_guardpage_enabled(void) { return false; }
4194	static inline bool page_is_guard(const struct page page) { return* false; }
4195	static inline bool set_page_guard(struct zone zone, struct* page *page,
4196	unsigned int order) { return false; }
4197	static inline void clear_page_guard(struct zone zone, struct* page *page,
4198	unsigned int order) {}
4199	#endif /* CONFIG_DEBUG_PAGEALLOC */
4200
4201	#ifdef __HAVE_ARCH_GATE_AREA
4202	extern struct vm_area_struct get_gate_vma(struct* mm_struct *mm);
4203	extern int in_gate_area_no_mm(unsigned long addr);
4204	extern int in_gate_area(struct mm_struct mm, unsigned* long addr);
4205	#else
4206	static inline struct vm_area_struct get_gate_vma(struct* mm_struct *mm)
4207	{
4208	return NULL;
4209	}
4210	static inline int in_gate_area_no_mm(unsigned long addr) { return `0`; }
4211	static inline int in_gate_area(struct mm_struct mm, unsigned* long addr)
4212	{
4213	return `0`;
4214	}
4215	#endif /* __HAVE_ARCH_GATE_AREA */
4216
4217	bool process_shares_mm(const struct task_struct p, const* struct mm_struct *mm);
4218
4219	void drop_slab(void);
4220
4221	#ifndef CONFIG_MMU
4222	#define randomize_va_space 0
4223	#else
4224	extern int randomize_va_space;
4225	#endif
4226
4227	const char * arch_vma_name(struct vm_area_struct *vma);
4228	#ifdef CONFIG_MMU
4229	void print_vma_addr(char prefix, unsigned* long rip);
4230	#else
4231	static inline void print_vma_addr(char prefix, unsigned* long rip)
4232	{
4233	}
4234	#endif
4235
4236	void sparse_buffer_alloc(unsigned* long size);
4237	unsigned long section_map_size(void);
4238	struct page * __populate_section_memmap(unsigned long pfn,
4239	unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
4240	struct dev_pagemap *pgmap);
4241	pgd_t vmemmap_pgd_populate(unsigned* long addr, int node);
4242	p4d_t vmemmap_p4d_populate(pgd_t pgd, unsigned long addr, int node);
4243	pud_t vmemmap_pud_populate(p4d_t p4d, unsigned long addr, int node);
4244	pmd_t vmemmap_pmd_populate(pud_t pud, unsigned long addr, int node);
4245	pte_t vmemmap_pte_populate(pmd_t pmd, unsigned long addr, int node,
4246	struct vmem_altmap altmap, unsigned* long ptpfn,
4247	unsigned long flags);
4248	void vmemmap_alloc_block(unsigned* long size, int node);
4249	struct vmem_altmap;
4250	void vmemmap_alloc_block_buf(unsigned* long size, int node,
4251	struct vmem_altmap *altmap);
4252	void vmemmap_verify(pte_t , int, unsigned* long, unsigned long);
4253	void vmemmap_set_pmd(pmd_t pmd, void* p, int* node,
4254	unsigned long addr, unsigned long next);
4255	int vmemmap_check_pmd(pmd_t pmd, int* node,
4256	unsigned long addr, unsigned long next);
4257	int vmemmap_populate_basepages(unsigned long start, unsigned long end,
4258	int node, struct vmem_altmap *altmap);
4259	int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
4260	int node, struct vmem_altmap *altmap);
4261	int vmemmap_populate(unsigned long start, unsigned long end, int node,
4262	struct vmem_altmap *altmap);
4263	int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node,
4264	unsigned long headsize);
4265	int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node,
4266	unsigned long headsize);
4267	void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
4268	unsigned long headsize);
4269	void vmemmap_populate_print_last(void);
4270	#ifdef CONFIG_MEMORY_HOTPLUG
4271	void vmemmap_free(unsigned long start, unsigned long end,
4272	struct vmem_altmap *altmap);
4273	#endif
4274
4275	#ifdef CONFIG_SPARSEMEM_VMEMMAP
4276	static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
4277	{
4278	/ number of pfns from base where pfn_to_page() is valid /
4279	if (altmap)
4280	return altmap->reserve + altmap->free;
4281	return `0`;
4282	}
4283
4284	static inline void vmem_altmap_free(struct vmem_altmap *altmap,
4285	unsigned long nr_pfns)
4286	{
4287	altmap->alloc -= nr_pfns;
4288	}
4289	#else
4290	static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap)
4291	{
4292	return `0`;
4293	}
4294
4295	static inline void vmem_altmap_free(struct vmem_altmap *altmap,
4296	unsigned long nr_pfns)
4297	{
4298	}
4299	#endif
4300
4301	#define VMEMMAP_RESERVE_NR 2
4302	#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
4303	static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
4304	struct dev_pagemap *pgmap)
4305	{
4306	unsigned long nr_pages;
4307	unsigned long nr_vmemmap_pages;
4308
4309	if (!pgmap \|\| !is_power_of_2(n: sizeof(struct page)))
4310	return false;
4311
4312	nr_pages = pgmap_vmemmap_nr(pgmap);
4313	nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
4314	/*
4315	* For vmemmap optimization with DAX we need minimum 2 vmemmap
4316	* pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
4317	*/
4318	return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
4319	}
4320	/*
4321	* If we don't have an architecture override, use the generic rule
4322	*/
4323	#ifndef vmemmap_can_optimize
4324	#define vmemmap_can_optimize __vmemmap_can_optimize
4325	#endif
4326
4327	#else
4328	static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
4329	struct dev_pagemap *pgmap)
4330	{
4331	return false;
4332	}
4333	#endif
4334
4335	enum mf_flags {
4336	MF_COUNT_INCREASED = `1` << `0`,
4337	MF_ACTION_REQUIRED = `1` << `1`,
4338	MF_MUST_KILL = `1` << `2`,
4339	MF_SOFT_OFFLINE = `1` << `3`,
4340	MF_UNPOISON = `1` << `4`,
4341	MF_SW_SIMULATED = `1` << `5`,
4342	MF_NO_RETRY = `1` << `6`,
4343	MF_MEM_PRE_REMOVE = `1` << `7`,
4344	};
4345	int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
4346	unsigned long count, int mf_flags);
4347	extern int memory_failure(unsigned long pfn, int flags);
4348	extern int unpoison_memory(unsigned long pfn);
4349	extern atomic_long_t num_poisoned_pages __read_mostly;
4350	extern int soft_offline_page(unsigned long pfn, int flags);
4351	#ifdef CONFIG_MEMORY_FAILURE
4352	/*
4353	* Sysfs entries for memory failure handling statistics.
4354	*/
4355	extern const struct attribute_group memory_failure_attr_group;
4356	extern void memory_failure_queue(unsigned long pfn, int flags);
4357	extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
4358	bool *migratable_cleared);
4359	void num_poisoned_pages_inc(unsigned long pfn);
4360	void num_poisoned_pages_sub(unsigned long pfn, long i);
4361	#else
4362	static inline void memory_failure_queue(unsigned long pfn, int flags)
4363	{
4364	}
4365
4366	static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
4367	bool *migratable_cleared)
4368	{
4369	return `0`;
4370	}
4371
4372	static inline void num_poisoned_pages_inc(unsigned long pfn)
4373	{
4374	}
4375
4376	static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
4377	{
4378	}
4379	#endif
4380
4381	#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
4382	extern void memblk_nr_poison_inc(unsigned long pfn);
4383	extern void memblk_nr_poison_sub(unsigned long pfn, long i);
4384	#else
4385	static inline void memblk_nr_poison_inc(unsigned long pfn)
4386	{
4387	}
4388
4389	static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
4390	{
4391	}
4392	#endif
4393
4394	#ifndef arch_memory_failure
4395	static inline int arch_memory_failure(unsigned long pfn, int flags)
4396	{
4397	return -ENXIO;
4398	}
4399	#endif
4400
4401	#ifndef arch_is_platform_page
4402	static inline bool arch_is_platform_page(u64 paddr)
4403	{
4404	return false;
4405	}
4406	#endif
4407
4408	/*
4409	* Error handlers for various types of pages.
4410	*/
4411	enum mf_result {
4412	MF_IGNORED, / Error: cannot be handled /
4413	MF_FAILED, / Error: handling failed /
4414	MF_DELAYED, / Will be handled later /
4415	MF_RECOVERED, / Successfully recovered /
4416	};
4417
4418	enum mf_action_page_type {
4419	MF_MSG_KERNEL,
4420	MF_MSG_KERNEL_HIGH_ORDER,
4421	MF_MSG_DIFFERENT_COMPOUND,
4422	MF_MSG_HUGE,
4423	MF_MSG_FREE_HUGE,
4424	MF_MSG_GET_HWPOISON,
4425	MF_MSG_UNMAP_FAILED,
4426	MF_MSG_DIRTY_SWAPCACHE,
4427	MF_MSG_CLEAN_SWAPCACHE,
4428	MF_MSG_DIRTY_MLOCKED_LRU,
4429	MF_MSG_CLEAN_MLOCKED_LRU,
4430	MF_MSG_DIRTY_UNEVICTABLE_LRU,
4431	MF_MSG_CLEAN_UNEVICTABLE_LRU,
4432	MF_MSG_DIRTY_LRU,
4433	MF_MSG_CLEAN_LRU,
4434	MF_MSG_TRUNCATED_LRU,
4435	MF_MSG_BUDDY,
4436	MF_MSG_DAX,
4437	MF_MSG_UNSPLIT_THP,
4438	MF_MSG_ALREADY_POISONED,
4439	MF_MSG_PFN_MAP,
4440	MF_MSG_UNKNOWN,
4441	};
4442
4443	#if defined(CONFIG_TRANSPARENT_HUGEPAGE) \|\| defined(CONFIG_HUGETLBFS)
4444	void folio_zero_user(struct folio folio, unsigned* long addr_hint);
4445	int copy_user_large_folio(struct folio dst, struct* folio *src,
4446	unsigned long addr_hint,
4447	struct vm_area_struct *vma);
4448	long copy_folio_from_user(struct folio *dst_folio,
4449	const void __user *usr_src,
4450	bool allow_pagefault);
4451
4452	/**
4453	* vma_is_special_huge - Are transhuge page-table entries considered special?
4454	* @vma: Pointer to the struct vm_area_struct to consider
4455	*
4456	* Whether transhuge page-table entries are considered "special" following
4457	* the definition in vm_normal_page().
4458	*
4459	* Return: true if transhuge page-table entries should be considered special,
4460	* false otherwise.
4461	*/
4462	static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
4463	{
4464	return vma_is_dax(vma) \|\| (vma->vm_file &&
4465	(vma->vm_flags & (VM_PFNMAP \| VM_MIXEDMAP)));
4466	}
4467
4468	#endif /* CONFIG_TRANSPARENT_HUGEPAGE \|\| CONFIG_HUGETLBFS */
4469
4470	#if MAX_NUMNODES > 1
4471	void __init setup_nr_node_ids(void);
4472	#else
4473	static inline void setup_nr_node_ids(void) {}
4474	#endif
4475
4476	extern int memcmp_pages(struct page page1, struct* page *page2);
4477
4478	static inline int pages_identical(struct page page1, struct* page *page2)
4479	{
4480	return !memcmp_pages(page1, page2);
4481	}
4482
4483	#ifdef CONFIG_MAPPING_DIRTY_HELPERS
4484	unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
4485	pgoff_t first_index, pgoff_t nr,
4486	pgoff_t bitmap_pgoff,
4487	unsigned long *bitmap,
4488	pgoff_t *start,
4489	pgoff_t *end);
4490
4491	unsigned long wp_shared_mapping_range(struct address_space *mapping,
4492	pgoff_t first_index, pgoff_t nr);
4493	#endif
4494
4495	#ifdef CONFIG_ANON_VMA_NAME
4496	int set_anon_vma_name(unsigned long addr, unsigned long size,
4497	const char __user *uname);
4498	#else
4499	static inline
4500	int set_anon_vma_name(unsigned long addr, unsigned long size,
4501	const char __user *uname)
4502	{
4503	return -EINVAL;
4504	}
4505	#endif
4506
4507	#ifdef CONFIG_UNACCEPTED_MEMORY
4508
4509	bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size);
4510	void accept_memory(phys_addr_t start, unsigned long size);
4511
4512	#else
4513
4514	static inline bool range_contains_unaccepted_memory(phys_addr_t start,
4515	unsigned long size)
4516	{
4517	return false;
4518	}
4519
4520	static inline void accept_memory(phys_addr_t start, unsigned long size)
4521	{
4522	}
4523
4524	#endif
4525
4526	static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
4527	{
4528	return range_contains_unaccepted_memory(start: pfn << PAGE_SHIFT, PAGE_SIZE);
4529	}
4530
4531	void vma_pgtable_walk_begin(struct vm_area_struct *vma);
4532	void vma_pgtable_walk_end(struct vm_area_struct *vma);
4533
4534	int reserve_mem_find_by_name(const char name, phys_addr_t start, phys_addr_t *size);
4535	int reserve_mem_release_by_name(const char *name);
4536
4537	#ifdef CONFIG_64BIT
4538	int do_mseal(unsigned long start, size_t len_in, unsigned long flags);
4539	#else
4540	static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
4541	{
4542	/ noop on 32 bit /
4543	return `0`;
4544	}
4545	#endif
4546
4547	/*
4548	* user_alloc_needs_zeroing checks if a user folio from page allocator needs to
4549	* be zeroed or not.
4550	*/
4551	static inline bool user_alloc_needs_zeroing(void)
4552	{
4553	/*
4554	* for user folios, arch with cache aliasing requires cache flush and
4555	* arc changes folio->flags to make icache coherent with dcache, so
4556	* always return false to make caller use
4557	* clear_user_page()/clear_user_highpage().
4558	*/
4559	return cpu_dcache_is_aliasing() \|\| cpu_icache_is_aliasing() \|\|
4560	!static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
4561	&init_on_alloc);
4562	}
4563
4564	int arch_get_shadow_stack_status(struct task_struct t, unsigned* long __user *status);
4565	int arch_set_shadow_stack_status(struct task_struct t, unsigned* long status);
4566	int arch_lock_shadow_stack_status(struct task_struct t, unsigned* long status);
4567
4568	/*
4569	* DMA mapping IDs for page_pool
4570	*
4571	* When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
4572	* stashes it in the upper bits of page->pp_magic. We always want to be able to
4573	* unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
4574	* pages can have arbitrary kernel pointers stored in the same field as pp_magic
4575	* (since it overlaps with page->lru.next), so we must ensure that we cannot
4576	* mistake a valid kernel pointer with any of the values we write into this
4577	* field.
4578	*
4579	* On architectures that set POISON_POINTER_DELTA, this is already ensured,
4580	* since this value becomes part of PP_SIGNATURE; meaning we can just use the
4581	* space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
4582	* lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
4583	* 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is
4584	* known at compile-time.
4585	*
4586	* If the value of PAGE_OFFSET is not known at compile time, or if it is too
4587	* small to leave at least 8 bits available above PP_SIGNATURE, we define the
4588	* number of bits to be 0, which turns off the DMA index tracking altogether
4589	* (see page_pool_register_dma_index()).
4590	*/
4591	#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
4592	#if POISON_POINTER_DELTA > 0
4593	/ PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA*
4594	* index to not overlap with that if set
4595	*/
4596	#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
4597	#else
4598	/ Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above /
4599	#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8))
4600	#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \
4601	PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \
4602	!(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \
4603	MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0)
4604
4605	#endif
4606
4607	#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
4608	PP_DMA_INDEX_SHIFT)
4609
4610	/ Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is*
4611	* OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
4612	* the head page of compound page and bit 1 for pfmemalloc page, as well as the
4613	* bits used for the DMA index. page_is_pfmemalloc() is checked in
4614	* __page_pool_put_page() to avoid recycling the pfmemalloc page.
4615	*/
4616	#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK \| 0x3UL)
4617
4618	#ifdef CONFIG_PAGE_POOL
4619	static inline bool page_pool_page_is_pp(const struct page *page)
4620	{
4621	return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
4622	}
4623	#else
4624	static inline bool page_pool_page_is_pp(const struct page *page)
4625	{
4626	return false;
4627	}
4628	#endif
4629
4630	#define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
4631	#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
4632	#define PAGE_SNAPSHOT_PG_IDLE (1 << 2)
4633
4634	struct page_snapshot {
4635	struct folio folio_snapshot;
4636	struct page page_snapshot;
4637	unsigned long pfn;
4638	unsigned long idx;
4639	unsigned long flags;
4640	};
4641
4642	static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
4643	{
4644	return ps->flags & PAGE_SNAPSHOT_FAITHFUL;
4645	}
4646
4647	void snapshot_page(struct page_snapshot ps, const* struct page *page);
4648
4649	#endif /* _LINUX_MM_H */
4650

source code of linux/include/linux/mm.h