| 1 | /* SPDX-License-Identifier: GPL-2.0+ */ |
| 2 | /* |
| 3 | * Sleepable Read-Copy Update mechanism for mutual exclusion, |
| 4 | * tree variant. |
| 5 | * |
| 6 | * Copyright (C) IBM Corporation, 2017 |
| 7 | * |
| 8 | * Author: Paul McKenney <paulmck@linux.ibm.com> |
| 9 | */ |
| 10 | |
| 11 | #ifndef _LINUX_SRCU_TREE_H |
| 12 | #define _LINUX_SRCU_TREE_H |
| 13 | |
| 14 | #include <linux/rcu_node_tree.h> |
| 15 | #include <linux/completion.h> |
| 16 | |
| 17 | struct srcu_node; |
| 18 | struct srcu_struct; |
| 19 | |
| 20 | /* One element of the srcu_data srcu_ctrs array. */ |
| 21 | struct srcu_ctr { |
| 22 | atomic_long_t srcu_locks; /* Locks per CPU. */ |
| 23 | atomic_long_t srcu_unlocks; /* Unlocks per CPU. */ |
| 24 | }; |
| 25 | |
| 26 | /* |
| 27 | * Per-CPU structure feeding into leaf srcu_node, similar in function |
| 28 | * to rcu_node. |
| 29 | */ |
| 30 | struct srcu_data { |
| 31 | /* Read-side state. */ |
| 32 | struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */ |
| 33 | int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ |
| 34 | /* Values: SRCU_READ_FLAVOR_.* */ |
| 35 | |
| 36 | /* Update-side state. */ |
| 37 | spinlock_t __private lock ____cacheline_internodealigned_in_smp; |
| 38 | struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ |
| 39 | unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ |
| 40 | unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ |
| 41 | bool srcu_cblist_invoking; /* Invoking these CBs? */ |
| 42 | struct timer_list delay_work; /* Delay for CB invoking */ |
| 43 | struct work_struct work; /* Context for CB invoking. */ |
| 44 | struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ |
| 45 | struct rcu_head srcu_ec_head; /* For srcu_expedite_current() use. */ |
| 46 | int srcu_ec_state; /* State for srcu_expedite_current(). */ |
| 47 | struct srcu_node *mynode; /* Leaf srcu_node. */ |
| 48 | unsigned long grpmask; /* Mask for leaf srcu_node */ |
| 49 | /* ->srcu_data_have_cbs[]. */ |
| 50 | int cpu; |
| 51 | struct srcu_struct *ssp; |
| 52 | }; |
| 53 | |
| 54 | /* |
| 55 | * Node in SRCU combining tree, similar in function to rcu_data. |
| 56 | */ |
| 57 | struct srcu_node { |
| 58 | spinlock_t __private lock; |
| 59 | unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ |
| 60 | /* if greater than ->srcu_gp_seq. */ |
| 61 | unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ |
| 62 | unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ |
| 63 | struct srcu_node *srcu_parent; /* Next up in tree. */ |
| 64 | int grplo; /* Least CPU for node. */ |
| 65 | int grphi; /* Biggest CPU for node. */ |
| 66 | }; |
| 67 | |
| 68 | /* |
| 69 | * Per-SRCU-domain structure, update-side data linked from srcu_struct. |
| 70 | */ |
| 71 | struct srcu_usage { |
| 72 | struct srcu_node *node; /* Combining tree. */ |
| 73 | struct srcu_node *level[RCU_NUM_LVLS + 1]; |
| 74 | /* First node at each level. */ |
| 75 | int srcu_size_state; /* Small-to-big transition state. */ |
| 76 | struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ |
| 77 | spinlock_t __private lock; /* Protect counters and size state. */ |
| 78 | struct mutex srcu_gp_mutex; /* Serialize GP work. */ |
| 79 | unsigned long srcu_gp_seq; /* Grace-period seq #. */ |
| 80 | unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ |
| 81 | unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ |
| 82 | unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ |
| 83 | unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ |
| 84 | unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ |
| 85 | unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ |
| 86 | unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ |
| 87 | bool sda_is_static; /* May ->sda be passed to free_percpu()? */ |
| 88 | unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ |
| 89 | struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ |
| 90 | struct completion srcu_barrier_completion; |
| 91 | /* Awaken barrier rq at end. */ |
| 92 | atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ |
| 93 | /* callback for the barrier */ |
| 94 | /* operation. */ |
| 95 | unsigned long reschedule_jiffies; |
| 96 | unsigned long reschedule_count; |
| 97 | struct delayed_work work; |
| 98 | struct srcu_struct *srcu_ssp; |
| 99 | }; |
| 100 | |
| 101 | /* |
| 102 | * Per-SRCU-domain structure, similar in function to rcu_state. |
| 103 | */ |
| 104 | struct srcu_struct { |
| 105 | struct srcu_ctr __percpu *srcu_ctrp; |
| 106 | struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ |
| 107 | u8 srcu_reader_flavor; |
| 108 | struct lockdep_map dep_map; |
| 109 | struct srcu_usage *srcu_sup; /* Update-side data. */ |
| 110 | }; |
| 111 | |
| 112 | // Values for size state variable (->srcu_size_state). Once the state |
| 113 | // has been set to SRCU_SIZE_ALLOC, the grace-period code advances through |
| 114 | // this state machine one step per grace period until the SRCU_SIZE_BIG state |
| 115 | // is reached. Otherwise, the state machine remains in the SRCU_SIZE_SMALL |
| 116 | // state indefinitely. |
| 117 | #define SRCU_SIZE_SMALL 0 // No srcu_node combining tree, ->node == NULL |
| 118 | #define SRCU_SIZE_ALLOC 1 // An srcu_node tree is being allocated, initialized, |
| 119 | // and then referenced by ->node. It will not be used. |
| 120 | #define SRCU_SIZE_WAIT_BARRIER 2 // The srcu_node tree starts being used by everything |
| 121 | // except call_srcu(), especially by srcu_barrier(). |
| 122 | // By the end of this state, all CPUs and threads |
| 123 | // are aware of this tree's existence. |
| 124 | #define SRCU_SIZE_WAIT_CALL 3 // The srcu_node tree starts being used by call_srcu(). |
| 125 | // By the end of this state, all of the call_srcu() |
| 126 | // invocations that were running on a non-boot CPU |
| 127 | // and using the boot CPU's callback queue will have |
| 128 | // completed. |
| 129 | #define SRCU_SIZE_WAIT_CBS1 4 // Don't trust the ->srcu_have_cbs[] grace-period |
| 130 | #define SRCU_SIZE_WAIT_CBS2 5 // sequence elements or the ->srcu_data_have_cbs[] |
| 131 | #define SRCU_SIZE_WAIT_CBS3 6 // CPU-bitmask elements until all four elements of |
| 132 | #define SRCU_SIZE_WAIT_CBS4 7 // each array have been initialized. |
| 133 | #define SRCU_SIZE_BIG 8 // The srcu_node combining tree is fully initialized |
| 134 | // and all aspects of it are being put to use. |
| 135 | |
| 136 | /* Values for state variable (bottom bits of ->srcu_gp_seq). */ |
| 137 | #define SRCU_STATE_IDLE 0 |
| 138 | #define SRCU_STATE_SCAN1 1 |
| 139 | #define SRCU_STATE_SCAN2 2 |
| 140 | |
| 141 | /* Values for srcu_expedite_current() state (->srcu_ec_state). */ |
| 142 | #define SRCU_EC_IDLE 0 |
| 143 | #define SRCU_EC_PENDING 1 |
| 144 | #define SRCU_EC_REPOST 2 |
| 145 | |
| 146 | /* |
| 147 | * Values for initializing gp sequence fields. Higher values allow wrap arounds to |
| 148 | * occur earlier. |
| 149 | * The second value with state is useful in the case of static initialization of |
| 150 | * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its |
| 151 | * lower bits (or else it will appear to be already initialized within |
| 152 | * the call check_init_srcu_struct()). |
| 153 | */ |
| 154 | #define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT) |
| 155 | #define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1) |
| 156 | |
| 157 | #define __SRCU_USAGE_INIT(name) \ |
| 158 | { \ |
| 159 | .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ |
| 160 | .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ |
| 161 | .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ |
| 162 | .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ |
| 163 | .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ |
| 164 | } |
| 165 | |
| 166 | #define __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ |
| 167 | .srcu_sup = &usage_name, \ |
| 168 | .srcu_reader_flavor = fast, \ |
| 169 | __SRCU_DEP_MAP_INIT(name) |
| 170 | |
| 171 | #define __SRCU_STRUCT_INIT_MODULE(name, usage_name, fast) \ |
| 172 | { \ |
| 173 | __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ |
| 174 | } |
| 175 | |
| 176 | #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name, fast) \ |
| 177 | { \ |
| 178 | .sda = &pcpu_name, \ |
| 179 | .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \ |
| 180 | __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ |
| 181 | } |
| 182 | |
| 183 | /* |
| 184 | * Define and initialize a srcu struct at build time. |
| 185 | * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. |
| 186 | * |
| 187 | * Note that although DEFINE_STATIC_SRCU() hides the name from other |
| 188 | * files, the per-CPU variable rules nevertheless require that the |
| 189 | * chosen name be globally unique. These rules also prohibit use of |
| 190 | * DEFINE_STATIC_SRCU() within a function. If these rules are too |
| 191 | * restrictive, declare the srcu_struct manually. For example, in |
| 192 | * each file: |
| 193 | * |
| 194 | * static struct srcu_struct my_srcu; |
| 195 | * |
| 196 | * Then, before the first use of each my_srcu, manually initialize it: |
| 197 | * |
| 198 | * init_srcu_struct(&my_srcu); |
| 199 | * |
| 200 | * See include/linux/percpu-defs.h for the rules on per-CPU variables. |
| 201 | * |
| 202 | * DEFINE_SRCU_FAST() and DEFINE_STATIC_SRCU_FAST create an srcu_struct |
| 203 | * and associated structures whose readers must be of the SRCU-fast variety. |
| 204 | * DEFINE_SRCU_FAST_UPDOWN() and DEFINE_STATIC_SRCU_FAST_UPDOWN() create |
| 205 | * an srcu_struct and associated structures whose readers must be of the |
| 206 | * SRCU-fast-updown variety. The key point (aside from error checking) with |
| 207 | * both varieties is that the grace periods must use synchronize_rcu() |
| 208 | * instead of smp_mb(), and given that the first (for example) |
| 209 | * srcu_read_lock_fast() might race with the first synchronize_srcu(), |
| 210 | * this different must be specified at initialization time. |
| 211 | */ |
| 212 | #ifdef MODULE |
| 213 | # define __DEFINE_SRCU(name, fast, is_static) \ |
| 214 | static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ |
| 215 | is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage, \ |
| 216 | fast); \ |
| 217 | extern struct srcu_struct * const __srcu_struct_##name; \ |
| 218 | struct srcu_struct * const __srcu_struct_##name \ |
| 219 | __section("___srcu_struct_ptrs") = &name |
| 220 | #else |
| 221 | # define __DEFINE_SRCU(name, fast, is_static) \ |
| 222 | static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ |
| 223 | static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ |
| 224 | is_static struct srcu_struct name = \ |
| 225 | __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data, fast) |
| 226 | #endif |
| 227 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, 0, /* not static */) |
| 228 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, 0, static) |
| 229 | #define DEFINE_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, /* not static */) |
| 230 | #define DEFINE_STATIC_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, static) |
| 231 | #define DEFINE_SRCU_FAST_UPDOWN(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, \ |
| 232 | /* not static */) |
| 233 | #define DEFINE_STATIC_SRCU_FAST_UPDOWN(name) \ |
| 234 | __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, static) |
| 235 | |
| 236 | int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); |
| 237 | void synchronize_srcu_expedited(struct srcu_struct *ssp); |
| 238 | void srcu_barrier(struct srcu_struct *ssp); |
| 239 | void srcu_expedite_current(struct srcu_struct *ssp); |
| 240 | void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); |
| 241 | |
| 242 | // Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that |
| 243 | // element's index. |
| 244 | static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) |
| 245 | { |
| 246 | return scpp - &ssp->sda->srcu_ctrs[0]; |
| 247 | } |
| 248 | |
| 249 | // Converts an integer to a per-CPU pointer to the corresponding |
| 250 | // ->srcu_ctrs[] array element. |
| 251 | static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) |
| 252 | { |
| 253 | return &ssp->sda->srcu_ctrs[idx]; |
| 254 | } |
| 255 | |
| 256 | /* |
| 257 | * Counts the new reader in the appropriate per-CPU element of the |
| 258 | * srcu_struct. Returns a pointer that must be passed to the matching |
| 259 | * srcu_read_unlock_fast(). |
| 260 | * |
| 261 | * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side |
| 262 | * critical sections either because they disables interrupts, because |
| 263 | * they are a single instruction, or because they are read-modify-write |
| 264 | * atomic operations, depending on the whims of the architecture. |
| 265 | * This matters because the SRCU-fast grace-period mechanism uses either |
| 266 | * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, |
| 267 | * *not* SRCU, in order to eliminate the need for the read-side smp_mb() |
| 268 | * invocations that are used by srcu_read_lock() and srcu_read_unlock(). |
| 269 | * The __srcu_read_unlock_fast() function also relies on this same RCU |
| 270 | * (again, *not* SRCU) trick to eliminate the need for smp_mb(). |
| 271 | * |
| 272 | * The key point behind this RCU trick is that if any part of a given |
| 273 | * RCU reader precedes the beginning of a given RCU grace period, then |
| 274 | * the entirety of that RCU reader and everything preceding it happens |
| 275 | * before the end of that same RCU grace period. Similarly, if any part |
| 276 | * of a given RCU reader follows the end of a given RCU grace period, |
| 277 | * then the entirety of that RCU reader and everything following it |
| 278 | * happens after the beginning of that same RCU grace period. Therefore, |
| 279 | * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z |
| 280 | * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU |
| 281 | * read-side critical section from the viewpoint of the SRCU grace period. |
| 282 | * This is all the ordering that is required, hence no calls to smp_mb(). |
| 283 | * |
| 284 | * This means that __srcu_read_lock_fast() is not all that fast |
| 285 | * on architectures that support NMIs but do not supply NMI-safe |
| 286 | * implementations of this_cpu_inc(). |
| 287 | */ |
| 288 | static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) |
| 289 | { |
| 290 | struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); |
| 291 | |
| 292 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) |
| 293 | this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. |
| 294 | else |
| 295 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. |
| 296 | barrier(); /* Avoid leaking the critical section. */ |
| 297 | return scp; |
| 298 | } |
| 299 | |
| 300 | /* |
| 301 | * Removes the count for the old reader from the appropriate |
| 302 | * per-CPU element of the srcu_struct. Note that this may well be a |
| 303 | * different CPU than that which was incremented by the corresponding |
| 304 | * srcu_read_lock_fast(), but it must be within the same task. |
| 305 | * |
| 306 | * Please see the __srcu_read_lock_fast() function's header comment for |
| 307 | * information on implicit RCU readers and NMI safety. |
| 308 | */ |
| 309 | static inline void notrace |
| 310 | __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) |
| 311 | { |
| 312 | barrier(); /* Avoid leaking the critical section. */ |
| 313 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) |
| 314 | this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. |
| 315 | else |
| 316 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. |
| 317 | } |
| 318 | |
| 319 | /* |
| 320 | * Counts the new reader in the appropriate per-CPU element of the |
| 321 | * srcu_struct. Returns a pointer that must be passed to the matching |
| 322 | * srcu_read_unlock_fast_updown(). This type of reader is compatible |
| 323 | * with srcu_down_read_fast() and srcu_up_read_fast(). |
| 324 | * |
| 325 | * See the __srcu_read_lock_fast() comment for more details. |
| 326 | */ |
| 327 | static inline |
| 328 | struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struct *ssp) |
| 329 | { |
| 330 | struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); |
| 331 | |
| 332 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) |
| 333 | this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. |
| 334 | else |
| 335 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. |
| 336 | barrier(); /* Avoid leaking the critical section. */ |
| 337 | return scp; |
| 338 | } |
| 339 | |
| 340 | /* |
| 341 | * Removes the count for the old reader from the appropriate |
| 342 | * per-CPU element of the srcu_struct. Note that this may well be a |
| 343 | * different CPU than that which was incremented by the corresponding |
| 344 | * srcu_read_lock_fast(), but it must be within the same task. |
| 345 | * |
| 346 | * Please see the __srcu_read_lock_fast() function's header comment for |
| 347 | * information on implicit RCU readers and NMI safety. |
| 348 | */ |
| 349 | static inline void notrace |
| 350 | __srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) |
| 351 | { |
| 352 | barrier(); /* Avoid leaking the critical section. */ |
| 353 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) |
| 354 | this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. |
| 355 | else |
| 356 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. |
| 357 | } |
| 358 | |
| 359 | void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); |
| 360 | |
| 361 | // Record SRCU-reader usage type only for CONFIG_PROVE_RCU=y kernels. |
| 362 | static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) |
| 363 | { |
| 364 | if (IS_ENABLED(CONFIG_PROVE_RCU)) |
| 365 | __srcu_check_read_flavor(ssp, read_flavor); |
| 366 | } |
| 367 | |
| 368 | #endif |
| 369 | |