sched.h source code [linux/include/linux/sched.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_SCHED_H
3	#define _LINUX_SCHED_H
4
5	/*
6	* Define 'struct task_struct' and provide the main scheduler
7	* APIs (schedule(), wakeup variants, etc.)
8	*/
9
10	#include <uapi/linux/sched.h>
11
12	#include <asm/current.h>
13	#include <asm/processor.h>
14	#include <linux/thread_info.h>
15	#include <linux/preempt.h>
16	#include <linux/cpumask_types.h>
17
18	#include <linux/cache.h>
19	#include <linux/irqflags_types.h>
20	#include <linux/smp_types.h>
21	#include <linux/pid_types.h>
22	#include <linux/sem_types.h>
23	#include <linux/shm.h>
24	#include <linux/kmsan_types.h>
25	#include <linux/mutex_types.h>
26	#include <linux/plist_types.h>
27	#include <linux/hrtimer_types.h>
28	#include <linux/timer_types.h>
29	#include <linux/seccomp_types.h>
30	#include <linux/nodemask_types.h>
31	#include <linux/refcount_types.h>
32	#include <linux/resource.h>
33	#include <linux/latencytop.h>
34	#include <linux/sched/prio.h>
35	#include <linux/sched/types.h>
36	#include <linux/signal_types.h>
37	#include <linux/spinlock.h>
38	#include <linux/syscall_user_dispatch_types.h>
39	#include <linux/mm_types_task.h>
40	#include <linux/netdevice_xmit.h>
41	#include <linux/task_io_accounting.h>
42	#include <linux/posix-timers_types.h>
43	#include <linux/restart_block.h>
44	#include <linux/rseq_types.h>
45	#include <linux/seqlock_types.h>
46	#include <linux/kcsan.h>
47	#include <linux/rv.h>
48	#include <linux/uidgid_types.h>
49	#include <linux/tracepoint-defs.h>
50	#include <linux/unwind_deferred_types.h>
51	#include <asm/kmap_size.h>
52	#ifndef COMPILE_OFFSETS
53	#include <generated/rq-offsets.h>
54	#endif
55
56	/ task_struct member predeclarations (sorted alphabetically): /
57	struct audit_context;
58	struct bio_list;
59	struct blk_plug;
60	struct bpf_local_storage;
61	struct bpf_run_ctx;
62	struct bpf_net_context;
63	struct capture_control;
64	struct cfs_rq;
65	struct fs_struct;
66	struct futex_pi_state;
67	struct io_context;
68	struct io_uring_task;
69	struct mempolicy;
70	struct nameidata;
71	struct nsproxy;
72	struct perf_event_context;
73	struct perf_ctx_data;
74	struct pid_namespace;
75	struct pipe_inode_info;
76	struct rcu_node;
77	struct reclaim_state;
78	struct robust_list_head;
79	struct root_domain;
80	struct rq;
81	struct sched_attr;
82	struct sched_dl_entity;
83	struct seq_file;
84	struct sighand_struct;
85	struct signal_struct;
86	struct task_delay_info;
87	struct task_group;
88	struct task_struct;
89	struct user_event_mm;
90
91	#include <linux/sched/ext.h>
92
93	/*
94	* Task state bitmask. NOTE! These bits are also
95	* encoded in fs/proc/array.c: get_task_state().
96	*
97	* We have two separate sets of flags: task->__state
98	* is about runnability, while task->exit_state are
99	* about the task exiting. Confusing, but this way
100	* modifying one set can't modify the other one by
101	* mistake.
102	*/
103
104	/ Used in tsk->__state: /
105	#define TASK_RUNNING 0x00000000
106	#define TASK_INTERRUPTIBLE 0x00000001
107	#define TASK_UNINTERRUPTIBLE 0x00000002
108	#define __TASK_STOPPED 0x00000004
109	#define __TASK_TRACED 0x00000008
110	/ Used in tsk->exit_state: /
111	#define EXIT_DEAD 0x00000010
112	#define EXIT_ZOMBIE 0x00000020
113	#define EXIT_TRACE (EXIT_ZOMBIE \| EXIT_DEAD)
114	/ Used in tsk->__state again: /
115	#define TASK_PARKED 0x00000040
116	#define TASK_DEAD 0x00000080
117	#define TASK_WAKEKILL 0x00000100
118	#define TASK_WAKING 0x00000200
119	#define TASK_NOLOAD 0x00000400
120	#define TASK_NEW 0x00000800
121	#define TASK_RTLOCK_WAIT 0x00001000
122	#define TASK_FREEZABLE 0x00002000
123	#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
124	#define TASK_FROZEN 0x00008000
125	#define TASK_STATE_MAX 0x00010000
126
127	#define TASK_ANY (TASK_STATE_MAX-1)
128
129	/*
130	* DO NOT ADD ANY NEW USERS !
131	*/
132	#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE \| __TASK_FREEZABLE_UNSAFE)
133
134	/ Convenience macros for the sake of set_current_state: /
135	#define TASK_KILLABLE (TASK_WAKEKILL \| TASK_UNINTERRUPTIBLE)
136	#define TASK_STOPPED (TASK_WAKEKILL \| __TASK_STOPPED)
137	#define TASK_TRACED __TASK_TRACED
138
139	#define TASK_IDLE (TASK_UNINTERRUPTIBLE \| TASK_NOLOAD)
140
141	/ Convenience macros for the sake of wake_up(): /
142	#define TASK_NORMAL (TASK_INTERRUPTIBLE \| TASK_UNINTERRUPTIBLE)
143
144	/ get_task_state(): /
145	#define TASK_REPORT (TASK_RUNNING \| TASK_INTERRUPTIBLE \| \
146	TASK_UNINTERRUPTIBLE \| __TASK_STOPPED \| \
147	__TASK_TRACED \| EXIT_DEAD \| EXIT_ZOMBIE \| \
148	TASK_PARKED)
149
150	#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
151
152	#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
153	#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
154	#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED \| JOBCTL_TRACED)) != 0)
155
156	/*
157	* Special states are those that do not use the normal wait-loop pattern. See
158	* the comment with set_special_state().
159	*/
160	#define is_special_task_state(state) \
161	((state) & (__TASK_STOPPED \| __TASK_TRACED \| TASK_PARKED \| \
162	TASK_DEAD \| TASK_FROZEN))
163
164	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
165	# define debug_normal_state_change(state_value) \
166	do { \
167	WARN_ON_ONCE(is_special_task_state(state_value)); \
168	current->task_state_change = _THIS_IP_; \
169	} while (0)
170
171	# define debug_special_state_change(state_value) \
172	do { \
173	WARN_ON_ONCE(!is_special_task_state(state_value)); \
174	current->task_state_change = _THIS_IP_; \
175	} while (0)
176
177	# define debug_rtlock_wait_set_state() \
178	do { \
179	current->saved_state_change = current->task_state_change;\
180	current->task_state_change = _THIS_IP_; \
181	} while (0)
182
183	# define debug_rtlock_wait_restore_state() \
184	do { \
185	current->task_state_change = current->saved_state_change;\
186	} while (0)
187
188	#else
189	# define debug_normal_state_change(cond) do { } while (0)
190	# define debug_special_state_change(cond) do { } while (0)
191	# define debug_rtlock_wait_set_state() do { } while (0)
192	# define debug_rtlock_wait_restore_state() do { } while (0)
193	#endif
194
195	#define trace_set_current_state(state_value) \
196	do { \
197	if (tracepoint_enabled(sched_set_state_tp)) \
198	__trace_set_current_state(state_value); \
199	} while (0)
200
201	/*
202	* set_current_state() includes a barrier so that the write of current->__state
203	* is correctly serialised wrt the caller's subsequent test of whether to
204	* actually sleep:
205	*
206	* for (;;) {
207	* set_current_state(TASK_UNINTERRUPTIBLE);
208	* if (CONDITION)
209	* break;
210	*
211	* schedule();
212	* }
213	* __set_current_state(TASK_RUNNING);
214	*
215	* If the caller does not need such serialisation (because, for instance, the
216	* CONDITION test and condition change and wakeup are under the same lock) then
217	* use __set_current_state().
218	*
219	* The above is typically ordered against the wakeup, which does:
220	*
221	* CONDITION = 1;
222	* wake_up_state(p, TASK_UNINTERRUPTIBLE);
223	*
224	* where wake_up_state()/try_to_wake_up() executes a full memory barrier before
225	* accessing p->__state.
226	*
227	* Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
228	* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
229	* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
230	*
231	* However, with slightly different timing the wakeup TASK_RUNNING store can
232	* also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
233	* a problem either because that will result in one extra go around the loop
234	* and our @cond test will save the day.
235	*
236	* Also see the comments of try_to_wake_up().
237	*/
238	#define __set_current_state(state_value) \
239	do { \
240	debug_normal_state_change((state_value)); \
241	trace_set_current_state(state_value); \
242	WRITE_ONCE(current->__state, (state_value)); \
243	} while (0)
244
245	#define set_current_state(state_value) \
246	do { \
247	debug_normal_state_change((state_value)); \
248	trace_set_current_state(state_value); \
249	smp_store_mb(current->__state, (state_value)); \
250	} while (0)
251
252	/*
253	* set_special_state() should be used for those states when the blocking task
254	* can not use the regular condition based wait-loop. In that case we must
255	* serialize against wakeups such that any possible in-flight TASK_RUNNING
256	* stores will not collide with our state change.
257	*/
258	#define set_special_state(state_value) \
259	do { \
260	unsigned long flags; /* may shadow */ \
261	\
262	raw_spin_lock_irqsave(&current->pi_lock, flags); \
263	debug_special_state_change((state_value)); \
264	trace_set_current_state(state_value); \
265	WRITE_ONCE(current->__state, (state_value)); \
266	raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
267	} while (0)
268
269	/*
270	* PREEMPT_RT specific variants for "sleeping" spin/rwlocks
271	*
272	* RT's spin/rwlock substitutions are state preserving. The state of the
273	* task when blocking on the lock is saved in task_struct::saved_state and
274	* restored after the lock has been acquired. These operations are
275	* serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
276	* lock related wakeups while the task is blocked on the lock are
277	* redirected to operate on task_struct::saved_state to ensure that these
278	* are not dropped. On restore task_struct::saved_state is set to
279	* TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
280	*
281	* The lock operation looks like this:
282	*
283	* current_save_and_set_rtlock_wait_state();
284	* for (;;) {
285	* if (try_lock())
286	* break;
287	* raw_spin_unlock_irq(&lock->wait_lock);
288	* schedule_rtlock();
289	* raw_spin_lock_irq(&lock->wait_lock);
290	* set_current_state(TASK_RTLOCK_WAIT);
291	* }
292	* current_restore_rtlock_saved_state();
293	*/
294	#define current_save_and_set_rtlock_wait_state() \
295	do { \
296	lockdep_assert_irqs_disabled(); \
297	raw_spin_lock(&current->pi_lock); \
298	current->saved_state = current->__state; \
299	debug_rtlock_wait_set_state(); \
300	trace_set_current_state(TASK_RTLOCK_WAIT); \
301	WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
302	raw_spin_unlock(&current->pi_lock); \
303	} while (0);
304
305	#define current_restore_rtlock_saved_state() \
306	do { \
307	lockdep_assert_irqs_disabled(); \
308	raw_spin_lock(&current->pi_lock); \
309	debug_rtlock_wait_restore_state(); \
310	trace_set_current_state(current->saved_state); \
311	WRITE_ONCE(current->__state, current->saved_state); \
312	current->saved_state = TASK_RUNNING; \
313	raw_spin_unlock(&current->pi_lock); \
314	} while (0);
315
316	#define get_current_state() READ_ONCE(current->__state)
317
318	/*
319	* Define the task command name length as enum, then it can be visible to
320	* BPF programs.
321	*/
322	enum {
323	TASK_COMM_LEN = `16`,
324	};
325
326	extern void sched_tick(void);
327
328	#define MAX_SCHEDULE_TIMEOUT LONG_MAX
329
330	extern long schedule_timeout(long timeout);
331	extern long schedule_timeout_interruptible(long timeout);
332	extern long schedule_timeout_killable(long timeout);
333	extern long schedule_timeout_uninterruptible(long timeout);
334	extern long schedule_timeout_idle(long timeout);
335	asmlinkage void schedule(void);
336	extern void schedule_preempt_disabled(void);
337	asmlinkage void preempt_schedule_irq(void);
338	#ifdef CONFIG_PREEMPT_RT
339	extern void schedule_rtlock(void);
340	#endif
341
342	extern int __must_check io_schedule_prepare(void);
343	extern void io_schedule_finish(int token);
344	extern long io_schedule_timeout(long timeout);
345	extern void io_schedule(void);
346
347	/ wrapper functions to trace from this header file /
348	DECLARE_TRACEPOINT(sched_set_state_tp);
349	extern void __trace_set_current_state(int state_value);
350	DECLARE_TRACEPOINT(sched_set_need_resched_tp);
351	extern void __trace_set_need_resched(struct task_struct curr, int* tif);
352
353	/**
354	* struct prev_cputime - snapshot of system and user cputime
355	* @utime: time spent in user mode
356	* @stime: time spent in system mode
357	* @lock: protects the above two fields
358	*
359	* Stores previous user/system time values such that we can guarantee
360	* monotonicity.
361	*/
362	struct prev_cputime {
363	#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
364	u64 utime;
365	u64 stime;
366	raw_spinlock_t lock;
367	#endif
368	};
369
370	enum vtime_state {
371	/ Task is sleeping or running in a CPU with VTIME inactive: /
372	VTIME_INACTIVE = `0`,
373	/ Task is idle /
374	VTIME_IDLE,
375	/ Task runs in kernelspace in a CPU with VTIME active: /
376	VTIME_SYS,
377	/ Task runs in userspace in a CPU with VTIME active: /
378	VTIME_USER,
379	/ Task runs as guests in a CPU with VTIME active: /
380	VTIME_GUEST,
381	};
382
383	struct vtime {
384	seqcount_t seqcount;
385	unsigned long long starttime;
386	enum vtime_state state;
387	unsigned int cpu;
388	u64 utime;
389	u64 stime;
390	u64 gtime;
391	};
392
393	/*
394	* Utilization clamp constraints.
395	* @UCLAMP_MIN: Minimum utilization
396	* @UCLAMP_MAX: Maximum utilization
397	* @UCLAMP_CNT: Utilization clamp constraints count
398	*/
399	enum uclamp_id {
400	UCLAMP_MIN = `0`,
401	UCLAMP_MAX,
402	UCLAMP_CNT
403	};
404
405	extern struct root_domain def_root_domain;
406	extern struct mutex sched_domains_mutex;
407	extern void sched_domains_mutex_lock(void);
408	extern void sched_domains_mutex_unlock(void);
409
410	struct sched_param {
411	int sched_priority;
412	};
413
414	struct sched_info {
415	#ifdef CONFIG_SCHED_INFO
416	/ Cumulative counters: /
417
418	/ # of times we have run on this CPU: /
419	unsigned long pcount;
420
421	/ Time spent waiting on a runqueue: /
422	unsigned long long run_delay;
423
424	/ Max time spent waiting on a runqueue: /
425	unsigned long long max_run_delay;
426
427	/ Min time spent waiting on a runqueue: /
428	unsigned long long min_run_delay;
429
430	/ Timestamps: /
431
432	/ When did we last run on a CPU? /
433	unsigned long long last_arrival;
434
435	/ When were we last queued to run? /
436	unsigned long long last_queued;
437
438	#endif /* CONFIG_SCHED_INFO */
439	};
440
441	/*
442	* Integer metrics need fixed point arithmetic, e.g., sched/fair
443	* has a few: load, load_avg, util_avg, freq, and capacity.
444	*
445	* We define a basic fixed point arithmetic range, and then formalize
446	* all these metrics based on that basic range.
447	*/
448	# define SCHED_FIXEDPOINT_SHIFT 10
449	# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
450
451	/ Increase resolution of cpu_capacity calculations /
452	# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
453	# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
454
455	struct load_weight {
456	unsigned long weight;
457	u32 inv_weight;
458	};
459
460	/*
461	* The load/runnable/util_avg accumulates an infinite geometric series
462	* (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
463	*
464	* [load_avg definition]
465	*
466	* load_avg = runnable% * scale_load_down(load)
467	*
468	* [runnable_avg definition]
469	*
470	* runnable_avg = runnable% * SCHED_CAPACITY_SCALE
471	*
472	* [util_avg definition]
473	*
474	* util_avg = running% * SCHED_CAPACITY_SCALE
475	*
476	* where runnable% is the time ratio that a sched_entity is runnable and
477	* running% the time ratio that a sched_entity is running.
478	*
479	* For cfs_rq, they are the aggregated values of all runnable and blocked
480	* sched_entities.
481	*
482	* The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
483	* capacity scaling. The scaling is done through the rq_clock_pelt that is used
484	* for computing those signals (see update_rq_clock_pelt())
485	*
486	* N.B., the above ratios (runnable% and running%) themselves are in the
487	* range of [0, 1]. To do fixed point arithmetics, we therefore scale them
488	* to as large a range as necessary. This is for example reflected by
489	* util_avg's SCHED_CAPACITY_SCALE.
490	*
491	* [Overflow issue]
492	*
493	* The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
494	* with the highest load (=88761), always runnable on a single cfs_rq,
495	* and should not overflow as the number already hits PID_MAX_LIMIT.
496	*
497	* For all other cases (including 32-bit kernels), struct load_weight's
498	* weight will overflow first before we do, because:
499	*
500	* Max(load_avg) <= Max(load.weight)
501	*
502	* Then it is the load_weight's responsibility to consider overflow
503	* issues.
504	*/
505	struct sched_avg {
506	u64 last_update_time;
507	u64 load_sum;
508	u64 runnable_sum;
509	u32 util_sum;
510	u32 period_contrib;
511	unsigned long load_avg;
512	unsigned long runnable_avg;
513	unsigned long util_avg;
514	unsigned int util_est;
515	} ____cacheline_aligned;
516
517	/*
518	* The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
519	* updates. When a task is dequeued, its util_est should not be updated if its
520	* util_avg has not been updated in the meantime.
521	* This information is mapped into the MSB bit of util_est at dequeue time.
522	* Since max value of util_est for a task is 1024 (PELT util_avg for a task)
523	* it is safe to use MSB.
524	*/
525	#define UTIL_EST_WEIGHT_SHIFT 2
526	#define UTIL_AVG_UNCHANGED 0x80000000
527
528	struct sched_statistics {
529	#ifdef CONFIG_SCHEDSTATS
530	u64 wait_start;
531	u64 wait_max;
532	u64 wait_count;
533	u64 wait_sum;
534	u64 iowait_count;
535	u64 iowait_sum;
536
537	u64 sleep_start;
538	u64 sleep_max;
539	s64 sum_sleep_runtime;
540
541	u64 block_start;
542	u64 block_max;
543	s64 sum_block_runtime;
544
545	s64 exec_max;
546	u64 slice_max;
547
548	u64 nr_migrations_cold;
549	u64 nr_failed_migrations_affine;
550	u64 nr_failed_migrations_running;
551	u64 nr_failed_migrations_hot;
552	u64 nr_forced_migrations;
553
554	u64 nr_wakeups;
555	u64 nr_wakeups_sync;
556	u64 nr_wakeups_migrate;
557	u64 nr_wakeups_local;
558	u64 nr_wakeups_remote;
559	u64 nr_wakeups_affine;
560	u64 nr_wakeups_affine_attempts;
561	u64 nr_wakeups_passive;
562	u64 nr_wakeups_idle;
563
564	#ifdef CONFIG_SCHED_CORE
565	u64 core_forceidle_sum;
566	#endif
567	#endif /* CONFIG_SCHEDSTATS */
568	} ____cacheline_aligned;
569
570	struct sched_entity {
571	/ For load-balancing: /
572	struct load_weight load;
573	struct rb_node run_node;
574	u64 deadline;
575	u64 min_vruntime;
576	u64 min_slice;
577
578	struct list_head group_node;
579	unsigned char on_rq;
580	unsigned char sched_delayed;
581	unsigned char rel_deadline;
582	unsigned char custom_slice;
583	/ hole /
584
585	u64 exec_start;
586	u64 sum_exec_runtime;
587	u64 prev_sum_exec_runtime;
588	u64 vruntime;
589	union {
590	/*
591	* When !@on_rq this field is vlag.
592	* When cfs_rq->curr == se (which implies @on_rq)
593	* this field is vprot. See protect_slice().
594	*/
595	s64 vlag;
596	u64 vprot;
597	};
598	u64 slice;
599
600	u64 nr_migrations;
601
602	#ifdef CONFIG_FAIR_GROUP_SCHED
603	int depth;
604	struct sched_entity *parent;
605	/ rq on which this entity is (to be) queued: /
606	struct cfs_rq *cfs_rq;
607	/ rq "owned" by this entity/group: /
608	struct cfs_rq *my_q;
609	/ cached value of my_q->h_nr_running /
610	unsigned long runnable_weight;
611	#endif
612
613	/*
614	* Per entity load average tracking.
615	*
616	* Put into separate cache line so it does not
617	* collide with read-mostly values above.
618	*/
619	struct sched_avg avg;
620	};
621
622	struct sched_rt_entity {
623	struct list_head run_list;
624	unsigned long timeout;
625	unsigned long watchdog_stamp;
626	unsigned int time_slice;
627	unsigned short on_rq;
628	unsigned short on_list;
629
630	struct sched_rt_entity *back;
631	#ifdef CONFIG_RT_GROUP_SCHED
632	struct sched_rt_entity *parent;
633	/ rq on which this entity is (to be) queued: /
634	struct rt_rq *rt_rq;
635	/ rq "owned" by this entity/group: /
636	struct rt_rq *my_q;
637	#endif
638	} __randomize_layout;
639
640	struct rq_flags;
641	typedef struct task_struct (dl_server_pick_f)(struct sched_dl_entity , struct* rq_flags *rf);
642
643	struct sched_dl_entity {
644	struct rb_node rb_node;
645
646	/*
647	* Original scheduling parameters. Copied here from sched_attr
648	* during sched_setattr(), they will remain the same until
649	* the next sched_setattr().
650	*/
651	u64 dl_runtime; / Maximum runtime for each instance /
652	u64 dl_deadline; / Relative deadline of each instance /
653	u64 dl_period; / Separation of two instances (period) /
654	u64 dl_bw; / dl_runtime / dl_period /
655	u64 dl_density; / dl_runtime / dl_deadline /
656
657	/*
658	* Actual scheduling parameters. Initialized with the values above,
659	* they are continuously updated during task execution. Note that
660	* the remaining runtime could be < 0 in case we are in overrun.
661	*/
662	s64 runtime; / Remaining runtime for this instance /
663	u64 deadline; / Absolute deadline for this instance /
664	unsigned int flags; / Specifying the scheduler behaviour /
665
666	/*
667	* Some bool flags:
668	*
669	* @dl_throttled tells if we exhausted the runtime. If so, the
670	* task has to wait for a replenishment to be performed at the
671	* next firing of dl_timer.
672	*
673	* @dl_yielded tells if task gave up the CPU before consuming
674	* all its available runtime during the last job.
675	*
676	* @dl_non_contending tells if the task is inactive while still
677	* contributing to the active utilization. In other words, it
678	* indicates if the inactive timer has been armed and its handler
679	* has not been executed yet. This flag is useful to avoid race
680	* conditions between the inactive timer handler and the wakeup
681	* code.
682	*
683	* @dl_overrun tells if the task asked to be informed about runtime
684	* overruns.
685	*
686	* @dl_server tells if this is a server entity.
687	*
688	* @dl_server_active tells if the dlserver is active(started).
689	* dlserver is started on first cfs enqueue on an idle runqueue
690	* and is stopped when a dequeue results in 0 cfs tasks on the
691	* runqueue. In other words, dlserver is active only when cpu's
692	* runqueue has atleast one cfs task.
693	*
694	* @dl_defer tells if this is a deferred or regular server. For
695	* now only defer server exists.
696	*
697	* @dl_defer_armed tells if the deferrable server is waiting
698	* for the replenishment timer to activate it.
699	*
700	* @dl_defer_running tells if the deferrable server is actually
701	* running, skipping the defer phase.
702	*
703	* @dl_defer_idle tracks idle state
704	*/
705	unsigned int dl_throttled : `1`;
706	unsigned int dl_yielded : `1`;
707	unsigned int dl_non_contending : `1`;
708	unsigned int dl_overrun : `1`;
709	unsigned int dl_server : `1`;
710	unsigned int dl_server_active : `1`;
711	unsigned int dl_defer : `1`;
712	unsigned int dl_defer_armed : `1`;
713	unsigned int dl_defer_running : `1`;
714	unsigned int dl_defer_idle : `1`;
715
716	/*
717	* Bandwidth enforcement timer. Each -deadline task has its
718	* own bandwidth to be enforced, thus we need one timer per task.
719	*/
720	struct hrtimer dl_timer;
721
722	/*
723	* Inactive timer, responsible for decreasing the active utilization
724	* at the "0-lag time". When a -deadline task blocks, it contributes
725	* to GRUB's active utilization until the "0-lag time", hence a
726	* timer is needed to decrease the active utilization at the correct
727	* time.
728	*/
729	struct hrtimer inactive_timer;
730
731	/*
732	* Bits for DL-server functionality. Also see the comment near
733	* dl_server_update().
734	*
735	* @rq the runqueue this server is for
736	*/
737	struct rq *rq;
738	dl_server_pick_f server_pick_task;
739
740	#ifdef CONFIG_RT_MUTEXES
741	/*
742	* Priority Inheritance. When a DEADLINE scheduling entity is boosted
743	* pi_se points to the donor, otherwise points to the dl_se it belongs
744	* to (the original one/itself).
745	*/
746	struct sched_dl_entity *pi_se;
747	#endif
748	};
749
750	#ifdef CONFIG_UCLAMP_TASK
751	/ Number of utilization clamp buckets (shorter alias) /
752	#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
753
754	/*
755	* Utilization clamp for a scheduling entity
756	* @value: clamp value "assigned" to a se
757	* @bucket_id: bucket index corresponding to the "assigned" value
758	* @active: the se is currently refcounted in a rq's bucket
759	* @user_defined: the requested clamp value comes from user-space
760	*
761	* The bucket_id is the index of the clamp bucket matching the clamp value
762	* which is pre-computed and stored to avoid expensive integer divisions from
763	* the fast path.
764	*
765	* The active bit is set whenever a task has got an "effective" value assigned,
766	* which can be different from the clamp value "requested" from user-space.
767	* This allows to know a task is refcounted in the rq's bucket corresponding
768	* to the "effective" bucket_id.
769	*
770	* The user_defined bit is set whenever a task has got a task-specific clamp
771	* value requested from userspace, i.e. the system defaults apply to this task
772	* just as a restriction. This allows to relax default clamps when a less
773	* restrictive task-specific value has been requested, thus allowing to
774	* implement a "nice" semantic. For example, a task running with a 20%
775	* default boost can still drop its own boosting to 0%.
776	*/
777	struct uclamp_se {
778	unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
779	unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
780	unsigned int active : `1`;
781	unsigned int user_defined : `1`;
782	};
783	#endif /* CONFIG_UCLAMP_TASK */
784
785	union rcu_special {
786	struct {
787	u8 blocked;
788	u8 need_qs;
789	u8 exp_hint; / Hint for performance. /
790	u8 need_mb; / Readers need smp_mb(). /
791	} b; / Bits. /
792	u32 s; / Set of bits. /
793	};
794
795	enum perf_event_task_context {
796	perf_invalid_context = -`1`,
797	perf_hw_context = `0`,
798	perf_sw_context,
799	perf_nr_task_contexts,
800	};
801
802	/*
803	* Number of contexts where an event can trigger:
804	* task, softirq, hardirq, nmi.
805	*/
806	#define PERF_NR_CONTEXTS 4
807
808	struct wake_q_node {
809	struct wake_q_node *next;
810	};
811
812	struct kmap_ctrl {
813	#ifdef CONFIG_KMAP_LOCAL
814	int idx;
815	pte_t pteval[KM_MAX_IDX];
816	#endif
817	};
818
819	struct task_struct {
820	#ifdef CONFIG_THREAD_INFO_IN_TASK
821	/*
822	* For reasons of header soup (see current_thread_info()), this
823	* must be the first element of task_struct.
824	*/
825	struct thread_info thread_info;
826	#endif
827	unsigned int __state;
828
829	/ saved state for "spinlock sleepers" /
830	unsigned int saved_state;
831
832	/*
833	* This begins the randomizable portion of task_struct. Only
834	* scheduling-critical items should be added above here.
835	*/
836	randomized_struct_fields_start
837
838	void *stack;
839	refcount_t usage;
840	/ Per task flags (PF_), defined further below: /*
841	unsigned int flags;
842	unsigned int ptrace;
843
844	#ifdef CONFIG_MEM_ALLOC_PROFILING
845	struct alloc_tag *alloc_tag;
846	#endif
847
848	int on_cpu;
849	struct __call_single_node wake_entry;
850	unsigned int wakee_flips;
851	unsigned long wakee_flip_decay_ts;
852	struct task_struct *last_wakee;
853
854	/*
855	* recent_used_cpu is initially set as the last CPU used by a task
856	* that wakes affine another task. Waker/wakee relationships can
857	* push tasks around a CPU where each wakeup moves to the next one.
858	* Tracking a recently used CPU allows a quick search for a recently
859	* used CPU that may be idle.
860	*/
861	int recent_used_cpu;
862	int wake_cpu;
863	int on_rq;
864
865	int prio;
866	int static_prio;
867	int normal_prio;
868	unsigned int rt_priority;
869
870	struct sched_entity se;
871	struct sched_rt_entity rt;
872	struct sched_dl_entity dl;
873	struct sched_dl_entity *dl_server;
874	#ifdef CONFIG_SCHED_CLASS_EXT
875	struct sched_ext_entity scx;
876	#endif
877	const struct sched_class *sched_class;
878
879	#ifdef CONFIG_SCHED_CORE
880	struct rb_node core_node;
881	unsigned long core_cookie;
882	unsigned int core_occupation;
883	#endif
884
885	#ifdef CONFIG_CGROUP_SCHED
886	struct task_group *sched_task_group;
887	#ifdef CONFIG_CFS_BANDWIDTH
888	struct callback_head sched_throttle_work;
889	struct list_head throttle_node;
890	bool throttled;
891	#endif
892	#endif
893
894
895	#ifdef CONFIG_UCLAMP_TASK
896	/*
897	* Clamp values requested for a scheduling entity.
898	* Must be updated with task_rq_lock() held.
899	*/
900	struct uclamp_se uclamp_req[UCLAMP_CNT];
901	/*
902	* Effective clamp values used for a scheduling entity.
903	* Must be updated with task_rq_lock() held.
904	*/
905	struct uclamp_se uclamp[UCLAMP_CNT];
906	#endif
907
908	struct sched_statistics stats;
909
910	#ifdef CONFIG_PREEMPT_NOTIFIERS
911	/ List of struct preempt_notifier: /
912	struct hlist_head preempt_notifiers;
913	#endif
914
915	#ifdef CONFIG_BLK_DEV_IO_TRACE
916	unsigned int btrace_seq;
917	#endif
918
919	unsigned int policy;
920	unsigned long max_allowed_capacity;
921	int nr_cpus_allowed;
922	const cpumask_t *cpus_ptr;
923	cpumask_t *user_cpus_ptr;
924	cpumask_t cpus_mask;
925	void *migration_pending;
926	unsigned short migration_disabled;
927	unsigned short migration_flags;
928
929	#ifdef CONFIG_PREEMPT_RCU
930	int rcu_read_lock_nesting;
931	union rcu_special rcu_read_unlock_special;
932	struct list_head rcu_node_entry;
933	struct rcu_node *rcu_blocked_node;
934	#endif /* #ifdef CONFIG_PREEMPT_RCU */
935
936	#ifdef CONFIG_TASKS_RCU
937	unsigned long rcu_tasks_nvcsw;
938	u8 rcu_tasks_holdout;
939	u8 rcu_tasks_idx;
940	int rcu_tasks_idle_cpu;
941	struct list_head rcu_tasks_holdout_list;
942	int rcu_tasks_exit_cpu;
943	struct list_head rcu_tasks_exit_list;
944	#endif /* #ifdef CONFIG_TASKS_RCU */
945
946	#ifdef CONFIG_TASKS_TRACE_RCU
947	int trc_reader_nesting;
948	int trc_ipi_to_cpu;
949	union rcu_special trc_reader_special;
950	struct list_head trc_holdout_list;
951	struct list_head trc_blkd_node;
952	int trc_blkd_cpu;
953	#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
954
955	struct sched_info sched_info;
956
957	struct list_head tasks;
958	struct plist_node pushable_tasks;
959	struct rb_node pushable_dl_tasks;
960
961	struct mm_struct *mm;
962	struct mm_struct *active_mm;
963	struct address_space *faults_disabled_mapping;
964
965	int exit_state;
966	int exit_code;
967	int exit_signal;
968	/ The signal sent when the parent dies: /
969	int pdeath_signal;
970	/ JOBCTL_, siglock protected: /*
971	unsigned long jobctl;
972
973	/ Used for emulating ABI behavior of previous Linux versions: /
974	unsigned int personality;
975
976	/ Scheduler bits, serialized by scheduler locks: /
977	unsigned sched_reset_on_fork:`1`;
978	unsigned sched_contributes_to_load:`1`;
979	unsigned sched_migrated:`1`;
980	unsigned sched_task_hot:`1`;
981
982	/ Force alignment to the next boundary: /
983	unsigned :`0`;
984
985	/ Unserialized, strictly 'current' /
986
987	/*
988	* This field must not be in the scheduler word above due to wakelist
989	* queueing no longer being serialized by p->on_cpu. However:
990	*
991	* p->XXX = X; ttwu()
992	* schedule() if (p->on_rq && ..) // false
993	* smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
994	* deactivate_task() ttwu_queue_wakelist())
995	* p->on_rq = 0; p->sched_remote_wakeup = Y;
996	*
997	* guarantees all stores of 'current' are visible before
998	* ->sched_remote_wakeup gets used, so it can be in this word.
999	*/
1000	unsigned sched_remote_wakeup:`1`;
1001	#ifdef CONFIG_RT_MUTEXES
1002	unsigned sched_rt_mutex:`1`;
1003	#endif
1004
1005	/ Bit to tell TOMOYO we're in execve(): /
1006	unsigned in_execve:`1`;
1007	unsigned in_iowait:`1`;
1008	#ifndef TIF_RESTORE_SIGMASK
1009	unsigned restore_sigmask:`1`;
1010	#endif
1011	#ifdef CONFIG_MEMCG_V1
1012	unsigned in_user_fault:`1`;
1013	#endif
1014	#ifdef CONFIG_LRU_GEN
1015	/ whether the LRU algorithm may apply to this access /
1016	unsigned in_lru_fault:`1`;
1017	#endif
1018	#ifdef CONFIG_COMPAT_BRK
1019	unsigned brk_randomized:`1`;
1020	#endif
1021	#ifdef CONFIG_CGROUPS
1022	/ disallow userland-initiated cgroup migration /
1023	unsigned no_cgroup_migration:`1`;
1024	/ task is frozen/stopped (used by the cgroup freezer) /
1025	unsigned frozen:`1`;
1026	#endif
1027	#ifdef CONFIG_BLK_CGROUP
1028	unsigned use_memdelay:`1`;
1029	#endif
1030	#ifdef CONFIG_PSI
1031	/ Stalled due to lack of memory /
1032	unsigned in_memstall:`1`;
1033	#endif
1034	#ifdef CONFIG_PAGE_OWNER
1035	/ Used by page_owner=on to detect recursion in page tracking. /
1036	unsigned in_page_owner:`1`;
1037	#endif
1038	#ifdef CONFIG_EVENTFD
1039	/ Recursion prevention for eventfd_signal() /
1040	unsigned in_eventfd:`1`;
1041	#endif
1042	#ifdef CONFIG_ARCH_HAS_CPU_PASID
1043	unsigned pasid_activated:`1`;
1044	#endif
1045	#ifdef CONFIG_X86_BUS_LOCK_DETECT
1046	unsigned reported_split_lock:`1`;
1047	#endif
1048	#ifdef CONFIG_TASK_DELAY_ACCT
1049	/ delay due to memory thrashing /
1050	unsigned in_thrashing:`1`;
1051	#endif
1052	unsigned in_nf_duplicate:`1`;
1053	#ifdef CONFIG_PREEMPT_RT
1054	struct netdev_xmit net_xmit;
1055	#endif
1056	unsigned long atomic_flags; / Flags requiring atomic access. /
1057
1058	struct restart_block restart_block;
1059
1060	pid_t pid;
1061	pid_t tgid;
1062
1063	#ifdef CONFIG_STACKPROTECTOR
1064	/ Canary value for the -fstack-protector GCC feature: /
1065	unsigned long stack_canary;
1066	#endif
1067	/*
1068	* Pointers to the (original) parent process, youngest child, younger sibling,
1069	* older sibling, respectively. (p->father can be replaced with
1070	* p->real_parent->pid)
1071	*/
1072
1073	/ Real parent process: /
1074	struct task_struct __rcu *real_parent;
1075
1076	/ Recipient of SIGCHLD, wait4() reports: /
1077	struct task_struct __rcu *parent;
1078
1079	/*
1080	* Children/sibling form the list of natural children:
1081	*/
1082	struct list_head children;
1083	struct list_head sibling;
1084	struct task_struct *group_leader;
1085
1086	/*
1087	* 'ptraced' is the list of tasks this task is using ptrace() on.
1088	*
1089	* This includes both natural children and PTRACE_ATTACH targets.
1090	* 'ptrace_entry' is this task's link on the p->parent->ptraced list.
1091	*/
1092	struct list_head ptraced;
1093	struct list_head ptrace_entry;
1094
1095	/ PID/PID hash table linkage. /
1096	struct pid *thread_pid;
1097	struct hlist_node pid_links[PIDTYPE_MAX];
1098	struct list_head thread_node;
1099
1100	struct completion *vfork_done;
1101
1102	/ CLONE_CHILD_SETTID: /
1103	int __user *set_child_tid;
1104
1105	/ CLONE_CHILD_CLEARTID: /
1106	int __user *clear_child_tid;
1107
1108	/ PF_KTHREAD \| PF_IO_WORKER /
1109	void *worker_private;
1110
1111	u64 utime;
1112	u64 stime;
1113	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
1114	u64 utimescaled;
1115	u64 stimescaled;
1116	#endif
1117	u64 gtime;
1118	struct prev_cputime prev_cputime;
1119	#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1120	struct vtime vtime;
1121	#endif
1122
1123	#ifdef CONFIG_NO_HZ_FULL
1124	atomic_t tick_dep_mask;
1125	#endif
1126	/ Context switch counts: /
1127	unsigned long nvcsw;
1128	unsigned long nivcsw;
1129
1130	/ Monotonic time in nsecs: /
1131	u64 start_time;
1132
1133	/ Boot based time in nsecs: /
1134	u64 start_boottime;
1135
1136	/ MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: /
1137	unsigned long min_flt;
1138	unsigned long maj_flt;
1139
1140	/ Empty if CONFIG_POSIX_CPUTIMERS=n /
1141	struct posix_cputimers posix_cputimers;
1142
1143	#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1144	struct posix_cputimers_work posix_cputimers_work;
1145	#endif
1146
1147	/ Process credentials: /
1148
1149	/ Tracer's credentials at attach: /
1150	const struct cred __rcu *ptracer_cred;
1151
1152	/ Objective and real subjective task credentials (COW): /
1153	const struct cred __rcu *real_cred;
1154
1155	/ Effective (overridable) subjective task credentials (COW): /
1156	const struct cred __rcu *cred;
1157
1158	#ifdef CONFIG_KEYS
1159	/ Cached requested key. /
1160	struct key *cached_requested_key;
1161	#endif
1162
1163	/*
1164	* executable name, excluding path.
1165	*
1166	* - normally initialized begin_new_exec()
1167	* - set it with set_task_comm()
1168	* - strscpy_pad() to ensure it is always NUL-terminated and
1169	* zero-padded
1170	* - task_lock() to ensure the operation is atomic and the name is
1171	* fully updated.
1172	*/
1173	char comm[TASK_COMM_LEN];
1174
1175	struct nameidata *nameidata;
1176
1177	#ifdef CONFIG_SYSVIPC
1178	struct sysv_sem sysvsem;
1179	struct sysv_shm sysvshm;
1180	#endif
1181	#ifdef CONFIG_DETECT_HUNG_TASK
1182	unsigned long last_switch_count;
1183	unsigned long last_switch_time;
1184	#endif
1185	/ Filesystem information: /
1186	struct fs_struct *fs;
1187
1188	/ Open file information: /
1189	struct files_struct *files;
1190
1191	#ifdef CONFIG_IO_URING
1192	struct io_uring_task *io_uring;
1193	#endif
1194
1195	/ Namespaces: /
1196	struct nsproxy *nsproxy;
1197
1198	/ Signal handlers: /
1199	struct signal_struct *signal;
1200	struct sighand_struct __rcu *sighand;
1201	sigset_t blocked;
1202	sigset_t real_blocked;
1203	/ Restored if set_restore_sigmask() was used: /
1204	sigset_t saved_sigmask;
1205	struct sigpending pending;
1206	unsigned long sas_ss_sp;
1207	size_t sas_ss_size;
1208	unsigned int sas_ss_flags;
1209
1210	struct callback_head *task_works;
1211
1212	#ifdef CONFIG_AUDIT
1213	#ifdef CONFIG_AUDITSYSCALL
1214	struct audit_context *audit_context;
1215	#endif
1216	kuid_t loginuid;
1217	unsigned int sessionid;
1218	#endif
1219	struct seccomp seccomp;
1220	struct syscall_user_dispatch syscall_dispatch;
1221
1222	/ Thread group tracking: /
1223	u64 parent_exec_id;
1224	u64 self_exec_id;
1225
1226	/ Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: /
1227	spinlock_t alloc_lock;
1228
1229	/ Protection of the PI data structures: /
1230	raw_spinlock_t pi_lock;
1231
1232	struct wake_q_node wake_q;
1233
1234	#ifdef CONFIG_RT_MUTEXES
1235	/ PI waiters blocked on a rt_mutex held by this task: /
1236	struct rb_root_cached pi_waiters;
1237	/ Updated under owner's pi_lock and rq lock /
1238	struct task_struct *pi_top_task;
1239	/ Deadlock detection and priority inheritance handling: /
1240	struct rt_mutex_waiter *pi_blocked_on;
1241	#endif
1242
1243	struct mutex blocked_on; /* lock we're blocked on /
1244
1245	#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
1246	/*
1247	* Encoded lock address causing task block (lower 2 bits = type from
1248	* <linux/hung_task.h>). Accessed via hung_task_*() helpers.
1249	*/
1250	unsigned long blocker;
1251	#endif
1252
1253	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1254	int non_block_count;
1255	#endif
1256
1257	#ifdef CONFIG_TRACE_IRQFLAGS
1258	struct irqtrace_events irqtrace;
1259	unsigned int hardirq_threaded;
1260	u64 hardirq_chain_key;
1261	int softirqs_enabled;
1262	int softirq_context;
1263	int irq_config;
1264	#endif
1265	#ifdef CONFIG_PREEMPT_RT
1266	int softirq_disable_cnt;
1267	#endif
1268
1269	#ifdef CONFIG_LOCKDEP
1270	# define MAX_LOCK_DEPTH 48UL
1271	u64 curr_chain_key;
1272	int lockdep_depth;
1273	unsigned int lockdep_recursion;
1274	struct held_lock held_locks[MAX_LOCK_DEPTH];
1275	#endif
1276
1277	#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
1278	unsigned int in_ubsan;
1279	#endif
1280
1281	/ Journalling filesystem info: /
1282	void *journal_info;
1283
1284	/ Stacked block device info: /
1285	struct bio_list *bio_list;
1286
1287	/ Stack plugging: /
1288	struct blk_plug *plug;
1289
1290	/ VM state: /
1291	struct reclaim_state *reclaim_state;
1292
1293	struct io_context *io_context;
1294
1295	#ifdef CONFIG_COMPACTION
1296	struct capture_control *capture_control;
1297	#endif
1298	/ Ptrace state: /
1299	unsigned long ptrace_message;
1300	kernel_siginfo_t *last_siginfo;
1301
1302	struct task_io_accounting ioac;
1303	#ifdef CONFIG_PSI
1304	/ Pressure stall state /
1305	unsigned int psi_flags;
1306	#endif
1307	#ifdef CONFIG_TASK_XACCT
1308	/ Accumulated RSS usage: /
1309	u64 acct_rss_mem1;
1310	/ Accumulated virtual memory usage: /
1311	u64 acct_vm_mem1;
1312	/ stime + utime since last update: /
1313	u64 acct_timexpd;
1314	#endif
1315	#ifdef CONFIG_CPUSETS
1316	/ Protected by ->alloc_lock: /
1317	nodemask_t mems_allowed;
1318	/ Sequence number to catch updates: /
1319	seqcount_spinlock_t mems_allowed_seq;
1320	int cpuset_mem_spread_rotor;
1321	#endif
1322	#ifdef CONFIG_CGROUPS
1323	/ Control Group info protected by css_set_lock: /
1324	struct css_set __rcu *cgroups;
1325	/ cg_list protected by css_set_lock and tsk->alloc_lock: /
1326	struct list_head cg_list;
1327	#ifdef CONFIG_PREEMPT_RT
1328	struct llist_node cg_dead_lnode;
1329	#endif /* CONFIG_PREEMPT_RT */
1330	#endif /* CONFIG_CGROUPS */
1331	#ifdef CONFIG_X86_CPU_RESCTRL
1332	u32 closid;
1333	u32 rmid;
1334	#endif
1335	#ifdef CONFIG_FUTEX
1336	struct robust_list_head __user *robust_list;
1337	#ifdef CONFIG_COMPAT
1338	struct compat_robust_list_head __user *compat_robust_list;
1339	#endif
1340	struct list_head pi_state_list;
1341	struct futex_pi_state *pi_state_cache;
1342	struct mutex futex_exit_mutex;
1343	unsigned int futex_state;
1344	#endif
1345	#ifdef CONFIG_PERF_EVENTS
1346	u8 perf_recursion[PERF_NR_CONTEXTS];
1347	struct perf_event_context *perf_event_ctxp;
1348	struct mutex perf_event_mutex;
1349	struct list_head perf_event_list;
1350	struct perf_ctx_data __rcu *perf_ctx_data;
1351	#endif
1352	#ifdef CONFIG_DEBUG_PREEMPT
1353	unsigned long preempt_disable_ip;
1354	#endif
1355	#ifdef CONFIG_NUMA
1356	/ Protected by alloc_lock: /
1357	struct mempolicy *mempolicy;
1358	short il_prev;
1359	u8 il_weight;
1360	short pref_node_fork;
1361	#endif
1362	#ifdef CONFIG_NUMA_BALANCING
1363	int numa_scan_seq;
1364	unsigned int numa_scan_period;
1365	unsigned int numa_scan_period_max;
1366	int numa_preferred_nid;
1367	unsigned long numa_migrate_retry;
1368	/ Migration stamp: /
1369	u64 node_stamp;
1370	u64 last_task_numa_placement;
1371	u64 last_sum_exec_runtime;
1372	struct callback_head numa_work;
1373
1374	/*
1375	* This pointer is only modified for current in syscall and
1376	* pagefault context (and for tasks being destroyed), so it can be read
1377	* from any of the following contexts:
1378	* - RCU read-side critical section
1379	* - current->numa_group from everywhere
1380	* - task's runqueue locked, task not running
1381	*/
1382	struct numa_group __rcu *numa_group;
1383
1384	/*
1385	* numa_faults is an array split into four regions:
1386	* faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1387	* in this precise order.
1388	*
1389	* faults_memory: Exponential decaying average of faults on a per-node
1390	* basis. Scheduling placement decisions are made based on these
1391	* counts. The values remain static for the duration of a PTE scan.
1392	* faults_cpu: Track the nodes the process was running on when a NUMA
1393	* hinting fault was incurred.
1394	* faults_memory_buffer and faults_cpu_buffer: Record faults per node
1395	* during the current scan window. When the scan completes, the counts
1396	* in faults_memory and faults_cpu decay and these values are copied.
1397	*/
1398	unsigned long *numa_faults;
1399	unsigned long total_numa_faults;
1400
1401	/*
1402	* numa_faults_locality tracks if faults recorded during the last
1403	* scan window were remote/local or failed to migrate. The task scan
1404	* period is adapted based on the locality of the faults with different
1405	* weights depending on whether they were shared or private faults
1406	*/
1407	unsigned long numa_faults_locality[`3`];
1408
1409	unsigned long numa_pages_migrated;
1410	#endif /* CONFIG_NUMA_BALANCING */
1411
1412	struct rseq_data rseq;
1413	struct sched_mm_cid mm_cid;
1414
1415	struct tlbflush_unmap_batch tlb_ubc;
1416
1417	/ Cache last used pipe for splice(): /
1418	struct pipe_inode_info *splice_pipe;
1419
1420	struct page_frag task_frag;
1421
1422	#ifdef CONFIG_TASK_DELAY_ACCT
1423	struct task_delay_info *delays;
1424	#endif
1425
1426	#ifdef CONFIG_FAULT_INJECTION
1427	int make_it_fail;
1428	unsigned int fail_nth;
1429	#endif
1430	/*
1431	* When (nr_dirtied >= nr_dirtied_pause), it's time to call
1432	* balance_dirty_pages() for a dirty throttling pause:
1433	*/
1434	int nr_dirtied;
1435	int nr_dirtied_pause;
1436	/ Start of a write-and-pause period: /
1437	unsigned long dirty_paused_when;
1438
1439	#ifdef CONFIG_LATENCYTOP
1440	int latency_record_count;
1441	struct latency_record latency_record[LT_SAVECOUNT];
1442	#endif
1443	/*
1444	* Time slack values; these are used to round up poll() and
1445	* select() etc timeout values. These are in nanoseconds.
1446	*/
1447	u64 timer_slack_ns;
1448	u64 default_timer_slack_ns;
1449
1450	#if defined(CONFIG_KASAN_GENERIC) \|\| defined(CONFIG_KASAN_SW_TAGS)
1451	unsigned int kasan_depth;
1452	#endif
1453
1454	#ifdef CONFIG_KCSAN
1455	struct kcsan_ctx kcsan_ctx;
1456	#ifdef CONFIG_TRACE_IRQFLAGS
1457	struct irqtrace_events kcsan_save_irqtrace;
1458	#endif
1459	#ifdef CONFIG_KCSAN_WEAK_MEMORY
1460	int kcsan_stack_depth;
1461	#endif
1462	#endif
1463
1464	#ifdef CONFIG_KMSAN
1465	struct kmsan_ctx kmsan_ctx;
1466	#endif
1467
1468	#if IS_ENABLED(CONFIG_KUNIT)
1469	struct kunit *kunit_test;
1470	#endif
1471
1472	#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1473	/ Index of current stored address in ret_stack: /
1474	int curr_ret_stack;
1475	int curr_ret_depth;
1476
1477	/ Stack of return addresses for return function tracing: /
1478	unsigned long *ret_stack;
1479
1480	/ Timestamp for last schedule: /
1481	unsigned long long ftrace_timestamp;
1482	unsigned long long ftrace_sleeptime;
1483
1484	/*
1485	* Number of functions that haven't been traced
1486	* because of depth overrun:
1487	*/
1488	atomic_t trace_overrun;
1489
1490	/ Pause tracing: /
1491	atomic_t tracing_graph_pause;
1492	#endif
1493
1494	#ifdef CONFIG_TRACING
1495	/ Bitmask and counter of trace recursion: /
1496	unsigned long trace_recursion;
1497	#endif /* CONFIG_TRACING */
1498
1499	#ifdef CONFIG_KCOV
1500	/ See kernel/kcov.c for more details. /
1501
1502	/ Coverage collection mode enabled for this task (0 if disabled): /
1503	unsigned int kcov_mode;
1504
1505	/ Size of the kcov_area: /
1506	unsigned int kcov_size;
1507
1508	/ Buffer for coverage collection: /
1509	void *kcov_area;
1510
1511	/ KCOV descriptor wired with this task or NULL: /
1512	struct kcov *kcov;
1513
1514	/ KCOV common handle for remote coverage collection: /
1515	u64 kcov_handle;
1516
1517	/ KCOV sequence number: /
1518	int kcov_sequence;
1519
1520	/ Collect coverage from softirq context: /
1521	unsigned int kcov_softirq;
1522	#endif
1523
1524	#ifdef CONFIG_MEMCG_V1
1525	struct mem_cgroup *memcg_in_oom;
1526	#endif
1527
1528	#ifdef CONFIG_MEMCG
1529	/ Number of pages to reclaim on returning to userland: /
1530	unsigned int memcg_nr_pages_over_high;
1531
1532	/ Used by memcontrol for targeted memcg charge: /
1533	struct mem_cgroup *active_memcg;
1534
1535	/ Cache for current->cgroups->memcg->objcg lookups: /
1536	struct obj_cgroup *objcg;
1537	#endif
1538
1539	#ifdef CONFIG_BLK_CGROUP
1540	struct gendisk *throttle_disk;
1541	#endif
1542
1543	#ifdef CONFIG_UPROBES
1544	struct uprobe_task *utask;
1545	#endif
1546	#if defined(CONFIG_BCACHE) \|\| defined(CONFIG_BCACHE_MODULE)
1547	unsigned int sequential_io;
1548	unsigned int sequential_io_avg;
1549	#endif
1550	struct kmap_ctrl kmap_ctrl;
1551	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1552	unsigned long task_state_change;
1553	# ifdef CONFIG_PREEMPT_RT
1554	unsigned long saved_state_change;
1555	# endif
1556	#endif
1557	struct rcu_head rcu;
1558	refcount_t rcu_users;
1559	int pagefault_disabled;
1560	#ifdef CONFIG_MMU
1561	struct task_struct *oom_reaper_list;
1562	struct timer_list oom_reaper_timer;
1563	#endif
1564	#ifdef CONFIG_VMAP_STACK
1565	struct vm_struct *stack_vm_area;
1566	#endif
1567	#ifdef CONFIG_THREAD_INFO_IN_TASK
1568	/ A live task holds one reference: /
1569	refcount_t stack_refcount;
1570	#endif
1571	#ifdef CONFIG_LIVEPATCH
1572	int patch_state;
1573	#endif
1574	#ifdef CONFIG_SECURITY
1575	/ Used by LSM modules for access restriction: /
1576	void *security;
1577	#endif
1578	#ifdef CONFIG_BPF_SYSCALL
1579	/ Used by BPF task local storage /
1580	struct bpf_local_storage __rcu *bpf_storage;
1581	/ Used for BPF run context /
1582	struct bpf_run_ctx *bpf_ctx;
1583	#endif
1584	/ Used by BPF for per-TASK xdp storage /
1585	struct bpf_net_context *bpf_net_context;
1586
1587	#ifdef CONFIG_KSTACK_ERASE
1588	unsigned long lowest_stack;
1589	#endif
1590	#ifdef CONFIG_KSTACK_ERASE_METRICS
1591	unsigned long prev_lowest_stack;
1592	#endif
1593
1594	#ifdef CONFIG_X86_MCE
1595	void __user *mce_vaddr;
1596	__u64 mce_kflags;
1597	u64 mce_addr;
1598	__u64 mce_ripv : `1`,
1599	mce_whole_page : `1`,
1600	__mce_reserved : `62`;
1601	struct callback_head mce_kill_me;
1602	int mce_count;
1603	#endif
1604
1605	#ifdef CONFIG_KRETPROBES
1606	struct llist_head kretprobe_instances;
1607	#endif
1608	#ifdef CONFIG_RETHOOK
1609	struct llist_head rethooks;
1610	#endif
1611
1612	#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
1613	/*
1614	* If L1D flush is supported on mm context switch
1615	* then we use this callback head to queue kill work
1616	* to kill tasks that are not running on SMT disabled
1617	* cores
1618	*/
1619	struct callback_head l1d_flush_kill;
1620	#endif
1621
1622	#ifdef CONFIG_RV
1623	/*
1624	* Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS.
1625	* If memory becomes a concern, we can think about a dynamic method.
1626	*/
1627	union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS];
1628	#endif
1629
1630	#ifdef CONFIG_USER_EVENTS
1631	struct user_event_mm *user_event_mm;
1632	#endif
1633
1634	#ifdef CONFIG_UNWIND_USER
1635	struct unwind_task_info unwind_info;
1636	#endif
1637
1638	/ CPU-specific state of this task: /
1639	struct thread_struct thread;
1640
1641	/*
1642	* New fields for task_struct should be added above here, so that
1643	* they are included in the randomized portion of task_struct.
1644	*/
1645	randomized_struct_fields_end
1646	} __attribute__ ((aligned (`64`)));
1647
1648	#ifdef CONFIG_SCHED_PROXY_EXEC
1649	DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
1650	static inline bool sched_proxy_exec(void)
1651	{
1652	return static_branch_likely(&__sched_proxy_exec);
1653	}
1654	#else
1655	static inline bool sched_proxy_exec(void)
1656	{
1657	return false;
1658	}
1659	#endif
1660
1661	#define TASK_REPORT_IDLE (TASK_REPORT + 1)
1662	#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
1663
1664	static inline unsigned int __task_state_index(unsigned int tsk_state,
1665	unsigned int tsk_exit_state)
1666	{
1667	unsigned int state = (tsk_state \| tsk_exit_state) & TASK_REPORT;
1668
1669	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
1670
1671	if ((tsk_state & TASK_IDLE) == TASK_IDLE)
1672	state = TASK_REPORT_IDLE;
1673
1674	/*
1675	* We're lying here, but rather than expose a completely new task state
1676	* to userspace, we can make this appear as if the task has gone through
1677	* a regular rt_mutex_lock() call.
1678	* Report frozen tasks as uninterruptible.
1679	*/
1680	if ((tsk_state & TASK_RTLOCK_WAIT) \|\| (tsk_state & TASK_FROZEN))
1681	state = TASK_UNINTERRUPTIBLE;
1682
1683	return fls(x: state);
1684	}
1685
1686	static inline unsigned int task_state_index(struct task_struct *tsk)
1687	{
1688	return __task_state_index(READ_ONCE(tsk->__state), tsk_exit_state: tsk->exit_state);
1689	}
1690
1691	static inline char task_index_to_char(unsigned int state)
1692	{
1693	static const char state_char[] = "RSDTtXZPI";
1694
1695	BUILD_BUG_ON(TASK_REPORT_MAX * `2` != `1` << (sizeof(state_char) - `1`));
1696
1697	return state_char[state];
1698	}
1699
1700	static inline char task_state_to_char(struct task_struct *tsk)
1701	{
1702	return task_index_to_char(state: task_state_index(tsk));
1703	}
1704
1705	extern struct pid *cad_pid;
1706
1707	/*
1708	* Per process flags
1709	*/
1710	#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
1711	#define PF_IDLE 0x00000002 /* I am an IDLE thread */
1712	#define PF_EXITING 0x00000004 /* Getting shut down */
1713	#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
1714	#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
1715	#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
1716	#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
1717	#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */
1718	#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */
1719	#define PF_DUMPCORE 0x00000200 /* Dumped core */
1720	#define PF_SIGNALED 0x00000400 /* Killed by a signal */
1721	#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */
1722	#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
1723	#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
1724	#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
1725	#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
1726	#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
1727	#define PF_KSWAPD 0x00020000 /* I am kswapd */
1728	#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
1729	#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
1730	#define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
1731	* I am cleaning dirty pages from some other bdi. */
1732	#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1733	#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
1734	#define PF__HOLE__00800000 0x00800000
1735	#define PF__HOLE__01000000 0x01000000
1736	#define PF__HOLE__02000000 0x02000000
1737	#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
1738	#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1739	#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning.
1740	* See memalloc_pin_save() */
1741	#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */
1742	#define PF__HOLE__40000000 0x40000000
1743	#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
1744
1745	/*
1746	* Only the _current_ task can read/write to tsk->flags, but other
1747	* tasks can access tsk->flags in readonly mode for example
1748	* with tsk_used_math (like during threaded core dumping).
1749	* There is however an exception to this rule during ptrace
1750	* or during fork: the ptracer task is allowed to write to the
1751	* child->flags of its traced child (same goes for fork, the parent
1752	* can write to the child->flags), because we're guaranteed the
1753	* child is not running and in turn not changing child->flags
1754	* at the same time the parent does it.
1755	*/
1756	#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
1757	#define set_stopped_child_used_math(child) do { (child)->flags \|= PF_USED_MATH; } while (0)
1758	#define clear_used_math() clear_stopped_child_used_math(current)
1759	#define set_used_math() set_stopped_child_used_math(current)
1760
1761	#define conditional_stopped_child_used_math(condition, child) \
1762	do { (child)->flags &= ~PF_USED_MATH, (child)->flags \|= (condition) ? PF_USED_MATH : 0; } while (0)
1763
1764	#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current)
1765
1766	#define copy_to_stopped_child_used_math(child) \
1767	do { (child)->flags &= ~PF_USED_MATH, (child)->flags \|= current->flags & PF_USED_MATH; } while (0)
1768
1769	/ NOTE: this will return 0 or PF_USED_MATH, it will never return 1 /
1770	#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1771	#define used_math() tsk_used_math(current)
1772
1773	static __always_inline bool is_percpu_thread(void)
1774	{
1775	return (current->flags & PF_NO_SETAFFINITY) &&
1776	(current->nr_cpus_allowed == `1`);
1777	}
1778
1779	static __always_inline bool is_user_task(struct task_struct *task)
1780	{
1781	return task->mm && !(task->flags & (PF_KTHREAD \| PF_USER_WORKER));
1782	}
1783
1784	/ Per-process atomic flags. /
1785	#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1786	#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1787	#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1788	#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
1789	#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
1790	#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
1791	#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
1792	#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
1793
1794	#define TASK_PFA_TEST(name, func) \
1795	static inline bool task_##func(struct task_struct *p) \
1796	{ return test_bit(PFA_##name, &p->atomic_flags); }
1797
1798	#define TASK_PFA_SET(name, func) \
1799	static inline void task_set_##func(struct task_struct *p) \
1800	{ set_bit(PFA_##name, &p->atomic_flags); }
1801
1802	#define TASK_PFA_CLEAR(name, func) \
1803	static inline void task_clear_##func(struct task_struct *p) \
1804	{ clear_bit(PFA_##name, &p->atomic_flags); }
1805
1806	TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
1807	TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1808
1809	TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1810	TASK_PFA_SET(SPREAD_PAGE, spread_page)
1811	TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1812
1813	TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1814	TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1815	TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1816
1817	TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
1818	TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
1819	TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
1820
1821	TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1822	TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1823	TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1824
1825	TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1826	TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1827
1828	TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
1829	TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
1830	TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
1831
1832	TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1833	TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1834
1835	static inline void
1836	current_restore_flags(unsigned long orig_flags, unsigned long flags)
1837	{
1838	current->flags &= ~flags;
1839	current->flags \|= orig_flags & flags;
1840	}
1841
1842	extern int cpuset_cpumask_can_shrink(const struct cpumask cur, const* struct cpumask *trial);
1843	extern int task_can_attach(struct task_struct *p);
1844	extern int dl_bw_alloc(int cpu, u64 dl_bw);
1845	extern void dl_bw_free(int cpu, u64 dl_bw);
1846
1847	/ set_cpus_allowed_force() - consider using set_cpus_allowed_ptr() instead /
1848	extern void set_cpus_allowed_force(struct task_struct p, const* struct cpumask *new_mask);
1849
1850	/**
1851	* set_cpus_allowed_ptr - set CPU affinity mask of a task
1852	* @p: the task
1853	* @new_mask: CPU affinity mask
1854	*
1855	* Return: zero if successful, or a negative error code
1856	*/
1857	extern int set_cpus_allowed_ptr(struct task_struct p, const* struct cpumask *new_mask);
1858	extern int dup_user_cpus_ptr(struct task_struct dst, struct* task_struct src, int* node);
1859	extern void release_user_cpus_ptr(struct task_struct *p);
1860	extern int dl_task_check_affinity(struct task_struct p, const* struct cpumask *mask);
1861	extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
1862	extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
1863
1864	extern int yield_to(struct task_struct *p, bool preempt);
1865	extern void set_user_nice(struct task_struct p, long* nice);
1866	extern int task_prio(const struct task_struct *p);
1867
1868	/**
1869	* task_nice - return the nice value of a given task.
1870	* @p: the task in question.
1871	*
1872	* Return: The nice value [ -20 ... 0 ... 19 ].
1873	*/
1874	static inline int task_nice(const struct task_struct *p)
1875	{
1876	return PRIO_TO_NICE((p)->static_prio);
1877	}
1878
1879	extern int can_nice(const struct task_struct p, const* int nice);
1880	extern int task_curr(const struct task_struct *p);
1881	extern int idle_cpu(int cpu);
1882	extern int sched_setscheduler(struct task_struct , int, const* struct sched_param *);
1883	extern int sched_setscheduler_nocheck(struct task_struct , int, const* struct sched_param *);
1884	extern void sched_set_fifo(struct task_struct *p);
1885	extern void sched_set_fifo_low(struct task_struct *p);
1886	extern void sched_set_fifo_secondary(struct task_struct *p);
1887	extern void sched_set_normal(struct task_struct p, int* nice);
1888	extern int sched_setattr(struct task_struct , const* struct sched_attr *);
1889	extern int sched_setattr_nocheck(struct task_struct , const* struct sched_attr *);
1890	extern struct task_struct idle_task(int* cpu);
1891
1892	/**
1893	* is_idle_task - is the specified task an idle task?
1894	* @p: the task in question.
1895	*
1896	* Return: 1 if @p is an idle task. 0 otherwise.
1897	*/
1898	static __always_inline bool is_idle_task(const struct task_struct *p)
1899	{
1900	return !!(p->flags & PF_IDLE);
1901	}
1902
1903	extern struct task_struct curr_task(int* cpu);
1904	extern void ia64_set_curr_task(int cpu, struct task_struct *p);
1905
1906	void yield(void);
1907
1908	union thread_union {
1909	struct task_struct task;
1910	#ifndef CONFIG_THREAD_INFO_IN_TASK
1911	struct thread_info thread_info;
1912	#endif
1913	unsigned long stack[THREAD_SIZE/sizeof(long)];
1914	};
1915
1916	#ifndef CONFIG_THREAD_INFO_IN_TASK
1917	extern struct thread_info init_thread_info;
1918	#endif
1919
1920	extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
1921
1922	#ifdef CONFIG_THREAD_INFO_IN_TASK
1923	# define task_thread_info(task) (&(task)->thread_info)
1924	#else
1925	# define task_thread_info(task) ((struct thread_info *)(task)->stack)
1926	#endif
1927
1928	/*
1929	* find a task by one of its numerical ids
1930	*
1931	* find_task_by_pid_ns():
1932	* finds a task by its pid in the specified namespace
1933	* find_task_by_vpid():
1934	* finds a task by its virtual pid
1935	*
1936	* see also find_vpid() etc in include/linux/pid.h
1937	*/
1938
1939	extern struct task_struct *find_task_by_vpid(pid_t nr);
1940	extern struct task_struct find_task_by_pid_ns(pid_t nr, struct* pid_namespace *ns);
1941
1942	/*
1943	* find a task by its virtual pid and get the task struct
1944	*/
1945	extern struct task_struct *find_get_task_by_vpid(pid_t nr);
1946
1947	extern int wake_up_state(struct task_struct tsk, unsigned* int state);
1948	extern int wake_up_process(struct task_struct *tsk);
1949	extern void wake_up_new_task(struct task_struct *tsk);
1950
1951	extern void kick_process(struct task_struct *tsk);
1952
1953	extern void __set_task_comm(struct task_struct tsk, const* char *from, bool exec);
1954	#define set_task_comm(tsk, from) ({ \
1955	BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \
1956	__set_task_comm(tsk, from, false); \
1957	})
1958
1959	/*
1960	* - Why not use task_lock()?
1961	* User space can randomly change their names anyway, so locking for readers
1962	* doesn't make sense. For writers, locking is probably necessary, as a race
1963	* condition could lead to long-term mixed results.
1964	* The strscpy_pad() in __set_task_comm() can ensure that the task comm is
1965	* always NUL-terminated and zero-padded. Therefore the race condition between
1966	* reader and writer is not an issue.
1967	*
1968	* - BUILD_BUG_ON() can help prevent the buf from being truncated.
1969	* Since the callers don't perform any return value checks, this safeguard is
1970	* necessary.
1971	*/
1972	#define get_task_comm(buf, tsk) ({ \
1973	BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \
1974	strscpy_pad(buf, (tsk)->comm); \
1975	buf; \
1976	})
1977
1978	static __always_inline void scheduler_ipi(void)
1979	{
1980	/*
1981	* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
1982	* TIF_NEED_RESCHED remotely (for the first time) will also send
1983	* this IPI.
1984	*/
1985	preempt_fold_need_resched();
1986	}
1987
1988	extern unsigned long wait_task_inactive(struct task_struct , unsigned* int match_state);
1989
1990	/*
1991	* Set thread flags in other task's structures.
1992	* See asm/thread_info.h for TIF_xxxx flags available:
1993	*/
1994	static inline void set_tsk_thread_flag(struct task_struct tsk, int* flag)
1995	{
1996	set_ti_thread_flag(task_thread_info(tsk), flag);
1997	}
1998
1999	static inline void clear_tsk_thread_flag(struct task_struct tsk, int* flag)
2000	{
2001	clear_ti_thread_flag(task_thread_info(tsk), flag);
2002	}
2003
2004	static inline void update_tsk_thread_flag(struct task_struct tsk, int* flag,
2005	bool value)
2006	{
2007	update_ti_thread_flag(task_thread_info(tsk), flag, value);
2008	}
2009
2010	static inline int test_and_set_tsk_thread_flag(struct task_struct tsk, int* flag)
2011	{
2012	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2013	}
2014
2015	static inline int test_and_clear_tsk_thread_flag(struct task_struct tsk, int* flag)
2016	{
2017	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2018	}
2019
2020	static inline int test_tsk_thread_flag(struct task_struct tsk, int* flag)
2021	{
2022	return test_ti_thread_flag(task_thread_info(tsk), flag);
2023	}
2024
2025	static inline void set_tsk_need_resched(struct task_struct *tsk)
2026	{
2027	if (tracepoint_enabled(sched_set_need_resched_tp) &&
2028	!test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
2029	__trace_set_need_resched(curr: tsk, TIF_NEED_RESCHED);
2030	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2031	}
2032
2033	static inline void clear_tsk_need_resched(struct task_struct *tsk)
2034	{
2035	atomic_long_andnot(_TIF_NEED_RESCHED \| _TIF_NEED_RESCHED_LAZY,
2036	v: (atomic_long_t *)&task_thread_info(tsk)->flags);
2037	}
2038
2039	static inline int test_tsk_need_resched(struct task_struct *tsk)
2040	{
2041	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2042	}
2043
2044	static inline void set_need_resched_current(void)
2045	{
2046	lockdep_assert_irqs_disabled();
2047	set_tsk_need_resched(current);
2048	set_preempt_need_resched();
2049	}
2050
2051	/*
2052	* cond_resched() and cond_resched_lock(): latency reduction via
2053	* explicit rescheduling in places that are safe. The return
2054	* value indicates whether a reschedule was done in fact.
2055	* cond_resched_lock() will drop the spinlock before scheduling,
2056	*/
2057	#if !defined(CONFIG_PREEMPTION) \|\| defined(CONFIG_PREEMPT_DYNAMIC)
2058	extern int __cond_resched(void);
2059
2060	#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
2061
2062	DECLARE_STATIC_CALL(cond_resched, __cond_resched);
2063
2064	static __always_inline int _cond_resched(void)
2065	{
2066	return static_call_mod(cond_resched)();
2067	}
2068
2069	#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
2070
2071	extern int dynamic_cond_resched(void);
2072
2073	static __always_inline int _cond_resched(void)
2074	{
2075	return dynamic_cond_resched();
2076	}
2077
2078	#else /* !CONFIG_PREEMPTION */
2079
2080	static inline int _cond_resched(void)
2081	{
2082	return __cond_resched();
2083	}
2084
2085	#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
2086
2087	#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
2088
2089	static inline int _cond_resched(void)
2090	{
2091	return `0`;
2092	}
2093
2094	#endif /* !CONFIG_PREEMPTION \|\| CONFIG_PREEMPT_DYNAMIC */
2095
2096	#define cond_resched() ({ \
2097	__might_resched(__FILE__, __LINE__, 0); \
2098	_cond_resched(); \
2099	})
2100
2101	extern int __cond_resched_lock(spinlock_t *lock);
2102	extern int __cond_resched_rwlock_read(rwlock_t *lock);
2103	extern int __cond_resched_rwlock_write(rwlock_t *lock);
2104
2105	#define MIGHT_RESCHED_RCU_SHIFT 8
2106	#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
2107
2108	#ifndef CONFIG_PREEMPT_RT
2109	/*
2110	* Non RT kernels have an elevated preempt count due to the held lock,
2111	* but are not allowed to be inside a RCU read side critical section
2112	*/
2113	# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
2114	#else
2115	/*
2116	* spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
2117	* cond_resched*lock() has to take that into account because it checks for
2118	* preempt_count() and rcu_preempt_depth().
2119	*/
2120	# define PREEMPT_LOCK_RESCHED_OFFSETS \
2121	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
2122	#endif
2123
2124	#define cond_resched_lock(lock) ({ \
2125	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2126	__cond_resched_lock(lock); \
2127	})
2128
2129	#define cond_resched_rwlock_read(lock) ({ \
2130	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2131	__cond_resched_rwlock_read(lock); \
2132	})
2133
2134	#define cond_resched_rwlock_write(lock) ({ \
2135	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2136	__cond_resched_rwlock_write(lock); \
2137	})
2138
2139	#ifndef CONFIG_PREEMPT_RT
2140	static inline struct mutex __get_task_blocked_on(struct* task_struct *p)
2141	{
2142	struct mutex *m = p->blocked_on;
2143
2144	if (m)
2145	lockdep_assert_held_once(&m->wait_lock);
2146	return m;
2147	}
2148
2149	static inline void __set_task_blocked_on(struct task_struct p, struct* mutex *m)
2150	{
2151	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
2152
2153	WARN_ON_ONCE(!m);
2154	/ The task should only be setting itself as blocked /
2155	WARN_ON_ONCE(p != current);
2156	/ Currently we serialize blocked_on under the mutex::wait_lock /
2157	lockdep_assert_held_once(&m->wait_lock);
2158	/*
2159	* Check ensure we don't overwrite existing mutex value
2160	* with a different mutex. Note, setting it to the same
2161	* lock repeatedly is ok.
2162	*/
2163	WARN_ON_ONCE(blocked_on && blocked_on != m);
2164	WRITE_ONCE(p->blocked_on, m);
2165	}
2166
2167	static inline void set_task_blocked_on(struct task_struct p, struct* mutex *m)
2168	{
2169	guard(raw_spinlock_irqsave)(l: &m->wait_lock);
2170	__set_task_blocked_on(p, m);
2171	}
2172
2173	static inline void __clear_task_blocked_on(struct task_struct p, struct* mutex *m)
2174	{
2175	if (m) {
2176	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
2177
2178	/ Currently we serialize blocked_on under the mutex::wait_lock /
2179	lockdep_assert_held_once(&m->wait_lock);
2180	/*
2181	* There may be cases where we re-clear already cleared
2182	* blocked_on relationships, but make sure we are not
2183	* clearing the relationship with a different lock.
2184	*/
2185	WARN_ON_ONCE(blocked_on && blocked_on != m);
2186	}
2187	WRITE_ONCE(p->blocked_on, NULL);
2188	}
2189
2190	static inline void clear_task_blocked_on(struct task_struct p, struct* mutex *m)
2191	{
2192	guard(raw_spinlock_irqsave)(l: &m->wait_lock);
2193	__clear_task_blocked_on(p, m);
2194	}
2195	#else
2196	static inline void __clear_task_blocked_on(struct task_struct p, struct* rt_mutex *m)
2197	{
2198	}
2199
2200	static inline void clear_task_blocked_on(struct task_struct p, struct* rt_mutex *m)
2201	{
2202	}
2203	#endif /* !CONFIG_PREEMPT_RT */
2204
2205	static __always_inline bool need_resched(void)
2206	{
2207	return unlikely(tif_need_resched());
2208	}
2209
2210	/*
2211	* Wrappers for p->thread_info->cpu access. No-op on UP.
2212	*/
2213	#ifdef CONFIG_SMP
2214
2215	static inline unsigned int task_cpu(const struct task_struct *p)
2216	{
2217	return READ_ONCE(task_thread_info(p)->cpu);
2218	}
2219
2220	extern void set_task_cpu(struct task_struct p, unsigned* int cpu);
2221
2222	#else
2223
2224	static inline unsigned int task_cpu(const struct task_struct *p)
2225	{
2226	return `0`;
2227	}
2228
2229	static inline void set_task_cpu(struct task_struct p, unsigned* int cpu)
2230	{
2231	}
2232
2233	#endif /* CONFIG_SMP */
2234
2235	static inline bool task_is_runnable(struct task_struct *p)
2236	{
2237	return p->on_rq && !p->se.sched_delayed;
2238	}
2239
2240	extern bool sched_task_on_rq(struct task_struct *p);
2241	extern unsigned long get_wchan(struct task_struct *p);
2242	extern struct task_struct cpu_curr_snapshot(int* cpu);
2243
2244	/*
2245	* In order to reduce various lock holder preemption latencies provide an
2246	* interface to see if a vCPU is currently running or not.
2247	*
2248	* This allows us to terminate optimistic spin loops and block, analogous to
2249	* the native optimistic spin heuristic of testing if the lock owner task is
2250	* running or not.
2251	*/
2252	#ifndef vcpu_is_preempted
2253	static inline bool vcpu_is_preempted(int cpu)
2254	{
2255	return false;
2256	}
2257	#endif
2258
2259	extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2260	extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2261
2262	#ifndef TASK_SIZE_OF
2263	#define TASK_SIZE_OF(tsk) TASK_SIZE
2264	#endif
2265
2266	static inline bool owner_on_cpu(struct task_struct *owner)
2267	{
2268	/*
2269	* As lock holder preemption issue, we both skip spinning if
2270	* task is not on cpu or its cpu is preempted
2271	*/
2272	return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(cpu: task_cpu(p: owner));
2273	}
2274
2275	/ Returns effective CPU energy utilization, as seen by the scheduler /
2276	unsigned long sched_cpu_util(int cpu);
2277
2278	#ifdef CONFIG_SCHED_CORE
2279	extern void sched_core_free(struct task_struct *tsk);
2280	extern void sched_core_fork(struct task_struct *p);
2281	extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
2282	unsigned long uaddr);
2283	extern int sched_core_idle_cpu(int cpu);
2284	#else
2285	static inline void sched_core_free(struct task_struct *tsk) { }
2286	static inline void sched_core_fork(struct task_struct *p) { }
2287	static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
2288	#endif
2289
2290	extern void sched_set_stop_task(int cpu, struct task_struct *stop);
2291
2292	#ifdef CONFIG_MEM_ALLOC_PROFILING
2293	static __always_inline struct alloc_tag alloc_tag_save(struct* alloc_tag *tag)
2294	{
2295	swap(current->alloc_tag, tag);
2296	return tag;
2297	}
2298
2299	static __always_inline void alloc_tag_restore(struct alloc_tag tag, struct* alloc_tag *old)
2300	{
2301	#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
2302	WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
2303	#endif
2304	current->alloc_tag = old;
2305	}
2306	#else
2307	#define alloc_tag_save(_tag) NULL
2308	#define alloc_tag_restore(_tag, _old) do {} while (0)
2309	#endif
2310
2311	/ Avoids recursive inclusion hell /
2312	#ifdef CONFIG_SCHED_MM_CID
2313	void sched_mm_cid_before_execve(struct task_struct *t);
2314	void sched_mm_cid_after_execve(struct task_struct *t);
2315	void sched_mm_cid_fork(struct task_struct *t);
2316	void sched_mm_cid_exit(struct task_struct *t);
2317	static __always_inline int task_mm_cid(struct task_struct *t)
2318	{
2319	return t->mm_cid.cid & ~(MM_CID_ONCPU \| MM_CID_TRANSIT);
2320	}
2321	#else
2322	static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
2323	static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
2324	static inline void sched_mm_cid_fork(struct task_struct *t) { }
2325	static inline void sched_mm_cid_exit(struct task_struct *t) { }
2326	static __always_inline int task_mm_cid(struct task_struct *t)
2327	{
2328	/*
2329	* Use the processor id as a fall-back when the mm cid feature is
2330	* disabled. This provides functional per-cpu data structure accesses
2331	* in user-space, althrough it won't provide the memory usage benefits.
2332	*/
2333	return task_cpu(t);
2334	}
2335	#endif
2336
2337	#ifndef MODULE
2338	#ifndef COMPILE_OFFSETS
2339
2340	extern void ___migrate_enable(void);
2341
2342	struct rq;
2343	DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
2344
2345	/*
2346	* The "struct rq" is not available here, so we can't access the
2347	* "runqueues" with this_cpu_ptr(), as the compilation will fail in
2348	* this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
2349	* typeof((ptr) + 0)
2350	*
2351	* So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
2352	*/
2353	#ifdef CONFIG_SMP
2354	#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
2355	#else
2356	#define this_rq_raw() PERCPU_PTR(&runqueues)
2357	#endif
2358	#define this_rq_pinned() ((unsigned int )((void *)this_rq_raw() + RQ_nr_pinned))
2359
2360	static inline void __migrate_enable(void)
2361	{
2362	struct task_struct *p = current;
2363
2364	#ifdef CONFIG_DEBUG_PREEMPT
2365	/*
2366	* Check both overflow from migrate_disable() and superfluous
2367	* migrate_enable().
2368	*/
2369	if (WARN_ON_ONCE((s16)p->migration_disabled <= `0`))
2370	return;
2371	#endif
2372
2373	if (p->migration_disabled > `1`) {
2374	p->migration_disabled--;
2375	return;
2376	}
2377
2378	/*
2379	* Ensure stop_task runs either before or after this, and that
2380	* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2381	*/
2382	guard(preempt)();
2383	if (unlikely(p->cpus_ptr != &p->cpus_mask))
2384	___migrate_enable();
2385	/*
2386	* Mustn't clear migration_disabled() until cpus_ptr points back at the
2387	* regular cpus_mask, otherwise things that race (eg.
2388	* select_fallback_rq) get confused.
2389	*/
2390	barrier();
2391	p->migration_disabled = `0`;
2392	this_rq_pinned()--;
2393	}
2394
2395	static inline void __migrate_disable(void)
2396	{
2397	struct task_struct *p = current;
2398
2399	if (p->migration_disabled) {
2400	#ifdef CONFIG_DEBUG_PREEMPT
2401	/*
2402	*Warn about overflow half-way through the range.
2403	*/
2404	WARN_ON_ONCE((s16)p->migration_disabled < `0`);
2405	#endif
2406	p->migration_disabled++;
2407	return;
2408	}
2409
2410	guard(preempt)();
2411	this_rq_pinned()++;
2412	p->migration_disabled = `1`;
2413	}
2414	#else /* !COMPILE_OFFSETS */
2415	static inline void __migrate_disable(void) { }
2416	static inline void __migrate_enable(void) { }
2417	#endif /* !COMPILE_OFFSETS */
2418
2419	/*
2420	* So that it is possible to not export the runqueues variable, define and
2421	* export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
2422	* them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
2423	* be defined in kernel/sched/core.c.
2424	*/
2425	#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
2426	static __always_inline void migrate_disable(void)
2427	{
2428	__migrate_disable();
2429	}
2430
2431	static __always_inline void migrate_enable(void)
2432	{
2433	__migrate_enable();
2434	}
2435	#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2436	extern void migrate_disable(void);
2437	extern void migrate_enable(void);
2438	#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2439
2440	#else /* MODULE */
2441	extern void migrate_disable(void);
2442	extern void migrate_enable(void);
2443	#endif /* MODULE */
2444
2445	DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
2446
2447	#endif
2448

source code of linux/include/linux/sched.h