| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _ASM_X86_PROCESSOR_H |
| 3 | #define _ASM_X86_PROCESSOR_H |
| 4 | |
| 5 | #include <asm/processor-flags.h> |
| 6 | |
| 7 | /* Forward declaration, a strange C thing */ |
| 8 | struct task_struct; |
| 9 | struct mm_struct; |
| 10 | struct io_bitmap; |
| 11 | struct vm86; |
| 12 | |
| 13 | #include <asm/math_emu.h> |
| 14 | #include <asm/segment.h> |
| 15 | #include <asm/types.h> |
| 16 | #include <uapi/asm/sigcontext.h> |
| 17 | #include <asm/current.h> |
| 18 | #include <asm/cpufeatures.h> |
| 19 | #include <asm/cpuid/api.h> |
| 20 | #include <asm/page.h> |
| 21 | #include <asm/pgtable_types.h> |
| 22 | #include <asm/percpu.h> |
| 23 | #include <asm/desc_defs.h> |
| 24 | #include <asm/nops.h> |
| 25 | #include <asm/special_insns.h> |
| 26 | #include <asm/fpu/types.h> |
| 27 | #include <asm/unwind_hints.h> |
| 28 | #include <asm/vmxfeatures.h> |
| 29 | #include <asm/vdso/processor.h> |
| 30 | #include <asm/shstk.h> |
| 31 | |
| 32 | #include <linux/personality.h> |
| 33 | #include <linux/cache.h> |
| 34 | #include <linux/threads.h> |
| 35 | #include <linux/math64.h> |
| 36 | #include <linux/err.h> |
| 37 | #include <linux/irqflags.h> |
| 38 | #include <linux/mem_encrypt.h> |
| 39 | |
| 40 | /* |
| 41 | * We handle most unaligned accesses in hardware. On the other hand |
| 42 | * unaligned DMA can be quite expensive on some Nehalem processors. |
| 43 | * |
| 44 | * Based on this we disable the IP header alignment in network drivers. |
| 45 | */ |
| 46 | #define NET_IP_ALIGN 0 |
| 47 | |
| 48 | #define HBP_NUM 4 |
| 49 | |
| 50 | /* |
| 51 | * These alignment constraints are for performance in the vSMP case, |
| 52 | * but in the task_struct case we must also meet hardware imposed |
| 53 | * alignment requirements of the FPU state: |
| 54 | */ |
| 55 | #ifdef CONFIG_X86_VSMP |
| 56 | # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) |
| 57 | # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) |
| 58 | #else |
| 59 | # define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) |
| 60 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 |
| 61 | #endif |
| 62 | |
| 63 | extern u16 __read_mostly tlb_lli_4k; |
| 64 | extern u16 __read_mostly tlb_lli_2m; |
| 65 | extern u16 __read_mostly tlb_lli_4m; |
| 66 | extern u16 __read_mostly tlb_lld_4k; |
| 67 | extern u16 __read_mostly tlb_lld_2m; |
| 68 | extern u16 __read_mostly tlb_lld_4m; |
| 69 | extern u16 __read_mostly tlb_lld_1g; |
| 70 | |
| 71 | /* |
| 72 | * CPU type and hardware bug flags. Kept separately for each CPU. |
| 73 | */ |
| 74 | |
| 75 | struct cpuinfo_topology { |
| 76 | // Real APIC ID read from the local APIC |
| 77 | u32 apicid; |
| 78 | // The initial APIC ID provided by CPUID |
| 79 | u32 initial_apicid; |
| 80 | |
| 81 | // Physical package ID |
| 82 | u32 pkg_id; |
| 83 | |
| 84 | // Physical die ID on AMD, Relative on Intel |
| 85 | u32 die_id; |
| 86 | |
| 87 | // Compute unit ID - AMD specific |
| 88 | u32 cu_id; |
| 89 | |
| 90 | // Core ID relative to the package |
| 91 | u32 core_id; |
| 92 | |
| 93 | // Logical ID mappings |
| 94 | u32 logical_pkg_id; |
| 95 | u32 logical_die_id; |
| 96 | u32 logical_core_id; |
| 97 | |
| 98 | // AMD Node ID and Nodes per Package info |
| 99 | u32 amd_node_id; |
| 100 | |
| 101 | // Cache level topology IDs |
| 102 | u32 llc_id; |
| 103 | u32 l2c_id; |
| 104 | |
| 105 | // Hardware defined CPU-type |
| 106 | union { |
| 107 | u32 cpu_type; |
| 108 | struct { |
| 109 | // CPUID.1A.EAX[23-0] |
| 110 | u32 intel_native_model_id :24; |
| 111 | // CPUID.1A.EAX[31-24] |
| 112 | u32 intel_type :8; |
| 113 | }; |
| 114 | struct { |
| 115 | // CPUID 0x80000026.EBX |
| 116 | u32 amd_num_processors :16, |
| 117 | amd_power_eff_ranking :8, |
| 118 | amd_native_model_id :4, |
| 119 | amd_type :4; |
| 120 | }; |
| 121 | }; |
| 122 | }; |
| 123 | |
| 124 | struct cpuinfo_x86 { |
| 125 | union { |
| 126 | /* |
| 127 | * The particular ordering (low-to-high) of (vendor, |
| 128 | * family, model) is done in case range of models, like |
| 129 | * it is usually done on AMD, need to be compared. |
| 130 | */ |
| 131 | struct { |
| 132 | __u8 x86_model; |
| 133 | /* CPU family */ |
| 134 | __u8 x86; |
| 135 | /* CPU vendor */ |
| 136 | __u8 x86_vendor; |
| 137 | __u8 x86_reserved; |
| 138 | }; |
| 139 | /* combined vendor, family, model */ |
| 140 | __u32 x86_vfm; |
| 141 | }; |
| 142 | __u8 x86_stepping; |
| 143 | #ifdef CONFIG_X86_64 |
| 144 | /* Number of 4K pages in DTLB/ITLB combined(in pages): */ |
| 145 | int x86_tlbsize; |
| 146 | #endif |
| 147 | #ifdef CONFIG_X86_VMX_FEATURE_NAMES |
| 148 | __u32 vmx_capability[NVMXINTS]; |
| 149 | #endif |
| 150 | __u8 x86_virt_bits; |
| 151 | __u8 x86_phys_bits; |
| 152 | /* Max extended CPUID function supported: */ |
| 153 | __u32 extended_cpuid_level; |
| 154 | /* Maximum supported CPUID level, -1=no CPUID: */ |
| 155 | int cpuid_level; |
| 156 | /* |
| 157 | * Align to size of unsigned long because the x86_capability array |
| 158 | * is passed to bitops which require the alignment. Use unnamed |
| 159 | * union to enforce the array is aligned to size of unsigned long. |
| 160 | */ |
| 161 | union { |
| 162 | __u32 x86_capability[NCAPINTS + NBUGINTS]; |
| 163 | unsigned long x86_capability_alignment; |
| 164 | }; |
| 165 | char x86_vendor_id[16]; |
| 166 | char x86_model_id[64]; |
| 167 | struct cpuinfo_topology topo; |
| 168 | /* in KB - valid for CPUS which support this call: */ |
| 169 | unsigned int x86_cache_size; |
| 170 | int x86_cache_alignment; /* In bytes */ |
| 171 | /* Cache QoS architectural values, valid only on the BSP: */ |
| 172 | int x86_cache_max_rmid; /* max index */ |
| 173 | int x86_cache_occ_scale; /* scale to bytes */ |
| 174 | int x86_cache_mbm_width_offset; |
| 175 | int x86_power; |
| 176 | unsigned long loops_per_jiffy; |
| 177 | /* protected processor identification number */ |
| 178 | u64 ppin; |
| 179 | u16 x86_clflush_size; |
| 180 | /* number of cores as seen by the OS: */ |
| 181 | u16 booted_cores; |
| 182 | /* Index into per_cpu list: */ |
| 183 | u16 cpu_index; |
| 184 | /* Is SMT active on this core? */ |
| 185 | bool smt_active; |
| 186 | u32 microcode; |
| 187 | /* Address space bits used by the cache internally */ |
| 188 | u8 x86_cache_bits; |
| 189 | unsigned initialized : 1; |
| 190 | } __randomize_layout; |
| 191 | |
| 192 | #define X86_VENDOR_INTEL 0 |
| 193 | #define X86_VENDOR_CYRIX 1 |
| 194 | #define X86_VENDOR_AMD 2 |
| 195 | #define X86_VENDOR_UMC 3 |
| 196 | #define X86_VENDOR_CENTAUR 5 |
| 197 | #define X86_VENDOR_TRANSMETA 7 |
| 198 | #define X86_VENDOR_NSC 8 |
| 199 | #define X86_VENDOR_HYGON 9 |
| 200 | #define X86_VENDOR_ZHAOXIN 10 |
| 201 | #define X86_VENDOR_VORTEX 11 |
| 202 | #define X86_VENDOR_NUM 12 |
| 203 | |
| 204 | #define X86_VENDOR_UNKNOWN 0xff |
| 205 | |
| 206 | /* |
| 207 | * capabilities of CPUs |
| 208 | */ |
| 209 | extern struct cpuinfo_x86 boot_cpu_data; |
| 210 | extern struct cpuinfo_x86 new_cpu_data; |
| 211 | |
| 212 | extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; |
| 213 | extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; |
| 214 | |
| 215 | DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); |
| 216 | #define cpu_data(cpu) per_cpu(cpu_info, cpu) |
| 217 | |
| 218 | extern const struct seq_operations cpuinfo_op; |
| 219 | |
| 220 | #define cache_line_size() (boot_cpu_data.x86_cache_alignment) |
| 221 | |
| 222 | extern void cpu_detect(struct cpuinfo_x86 *c); |
| 223 | |
| 224 | static inline unsigned long long l1tf_pfn_limit(void) |
| 225 | { |
| 226 | return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); |
| 227 | } |
| 228 | |
| 229 | void init_cpu_devs(void); |
| 230 | void get_cpu_vendor(struct cpuinfo_x86 *c); |
| 231 | extern void early_cpu_init(void); |
| 232 | extern void identify_secondary_cpu(unsigned int cpu); |
| 233 | extern void print_cpu_info(struct cpuinfo_x86 *); |
| 234 | void print_cpu_msr(struct cpuinfo_x86 *); |
| 235 | |
| 236 | /* |
| 237 | * Friendlier CR3 helpers. |
| 238 | */ |
| 239 | static inline unsigned long read_cr3_pa(void) |
| 240 | { |
| 241 | return __read_cr3() & CR3_ADDR_MASK; |
| 242 | } |
| 243 | |
| 244 | static inline unsigned long native_read_cr3_pa(void) |
| 245 | { |
| 246 | return __native_read_cr3() & CR3_ADDR_MASK; |
| 247 | } |
| 248 | |
| 249 | static inline void load_cr3(pgd_t *pgdir) |
| 250 | { |
| 251 | write_cr3(__sme_pa(pgdir)); |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * Note that while the legacy 'TSS' name comes from 'Task State Segment', |
| 256 | * on modern x86 CPUs the TSS also holds information important to 64-bit mode, |
| 257 | * unrelated to the task-switch mechanism: |
| 258 | */ |
| 259 | #ifdef CONFIG_X86_32 |
| 260 | /* This is the TSS defined by the hardware. */ |
| 261 | struct x86_hw_tss { |
| 262 | unsigned short back_link, __blh; |
| 263 | unsigned long sp0; |
| 264 | unsigned short ss0, __ss0h; |
| 265 | unsigned long sp1; |
| 266 | |
| 267 | /* |
| 268 | * We don't use ring 1, so ss1 is a convenient scratch space in |
| 269 | * the same cacheline as sp0. We use ss1 to cache the value in |
| 270 | * MSR_IA32_SYSENTER_CS. When we context switch |
| 271 | * MSR_IA32_SYSENTER_CS, we first check if the new value being |
| 272 | * written matches ss1, and, if it's not, then we wrmsr the new |
| 273 | * value and update ss1. |
| 274 | * |
| 275 | * The only reason we context switch MSR_IA32_SYSENTER_CS is |
| 276 | * that we set it to zero in vm86 tasks to avoid corrupting the |
| 277 | * stack if we were to go through the sysenter path from vm86 |
| 278 | * mode. |
| 279 | */ |
| 280 | unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ |
| 281 | |
| 282 | unsigned short __ss1h; |
| 283 | unsigned long sp2; |
| 284 | unsigned short ss2, __ss2h; |
| 285 | unsigned long __cr3; |
| 286 | unsigned long ip; |
| 287 | unsigned long flags; |
| 288 | unsigned long ax; |
| 289 | unsigned long cx; |
| 290 | unsigned long dx; |
| 291 | unsigned long bx; |
| 292 | unsigned long sp; |
| 293 | unsigned long bp; |
| 294 | unsigned long si; |
| 295 | unsigned long di; |
| 296 | unsigned short es, __esh; |
| 297 | unsigned short cs, __csh; |
| 298 | unsigned short ss, __ssh; |
| 299 | unsigned short ds, __dsh; |
| 300 | unsigned short fs, __fsh; |
| 301 | unsigned short gs, __gsh; |
| 302 | unsigned short ldt, __ldth; |
| 303 | unsigned short trace; |
| 304 | unsigned short io_bitmap_base; |
| 305 | |
| 306 | } __attribute__((packed)); |
| 307 | #else |
| 308 | struct x86_hw_tss { |
| 309 | u32 reserved1; |
| 310 | u64 sp0; |
| 311 | u64 sp1; |
| 312 | |
| 313 | /* |
| 314 | * Since Linux does not use ring 2, the 'sp2' slot is unused by |
| 315 | * hardware. entry_SYSCALL_64 uses it as scratch space to stash |
| 316 | * the user RSP value. |
| 317 | */ |
| 318 | u64 sp2; |
| 319 | |
| 320 | u64 reserved2; |
| 321 | u64 ist[7]; |
| 322 | u32 reserved3; |
| 323 | u32 reserved4; |
| 324 | u16 reserved5; |
| 325 | u16 io_bitmap_base; |
| 326 | |
| 327 | } __attribute__((packed)); |
| 328 | #endif |
| 329 | |
| 330 | /* |
| 331 | * IO-bitmap sizes: |
| 332 | */ |
| 333 | #define IO_BITMAP_BITS 65536 |
| 334 | #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) |
| 335 | #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) |
| 336 | |
| 337 | #define IO_BITMAP_OFFSET_VALID_MAP \ |
| 338 | (offsetof(struct tss_struct, io_bitmap.bitmap) - \ |
| 339 | offsetof(struct tss_struct, x86_tss)) |
| 340 | |
| 341 | #define IO_BITMAP_OFFSET_VALID_ALL \ |
| 342 | (offsetof(struct tss_struct, io_bitmap.mapall) - \ |
| 343 | offsetof(struct tss_struct, x86_tss)) |
| 344 | |
| 345 | #ifdef CONFIG_X86_IOPL_IOPERM |
| 346 | /* |
| 347 | * sizeof(unsigned long) coming from an extra "long" at the end of the |
| 348 | * iobitmap. The limit is inclusive, i.e. the last valid byte. |
| 349 | */ |
| 350 | # define __KERNEL_TSS_LIMIT \ |
| 351 | (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ |
| 352 | sizeof(unsigned long) - 1) |
| 353 | #else |
| 354 | # define __KERNEL_TSS_LIMIT \ |
| 355 | (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) |
| 356 | #endif |
| 357 | |
| 358 | /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ |
| 359 | #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) |
| 360 | |
| 361 | struct entry_stack { |
| 362 | char stack[PAGE_SIZE]; |
| 363 | }; |
| 364 | |
| 365 | struct entry_stack_page { |
| 366 | struct entry_stack stack; |
| 367 | } __aligned(PAGE_SIZE); |
| 368 | |
| 369 | /* |
| 370 | * All IO bitmap related data stored in the TSS: |
| 371 | */ |
| 372 | struct x86_io_bitmap { |
| 373 | /* The sequence number of the last active bitmap. */ |
| 374 | u64 prev_sequence; |
| 375 | |
| 376 | /* |
| 377 | * Store the dirty size of the last io bitmap offender. The next |
| 378 | * one will have to do the cleanup as the switch out to a non io |
| 379 | * bitmap user will just set x86_tss.io_bitmap_base to a value |
| 380 | * outside of the TSS limit. So for sane tasks there is no need to |
| 381 | * actually touch the io_bitmap at all. |
| 382 | */ |
| 383 | unsigned int prev_max; |
| 384 | |
| 385 | /* |
| 386 | * The extra 1 is there because the CPU will access an |
| 387 | * additional byte beyond the end of the IO permission |
| 388 | * bitmap. The extra byte must be all 1 bits, and must |
| 389 | * be within the limit. |
| 390 | */ |
| 391 | unsigned long bitmap[IO_BITMAP_LONGS + 1]; |
| 392 | |
| 393 | /* |
| 394 | * Special I/O bitmap to emulate IOPL(3). All bytes zero, |
| 395 | * except the additional byte at the end. |
| 396 | */ |
| 397 | unsigned long mapall[IO_BITMAP_LONGS + 1]; |
| 398 | }; |
| 399 | |
| 400 | struct tss_struct { |
| 401 | /* |
| 402 | * The fixed hardware portion. This must not cross a page boundary |
| 403 | * at risk of violating the SDM's advice and potentially triggering |
| 404 | * errata. |
| 405 | */ |
| 406 | struct x86_hw_tss x86_tss; |
| 407 | |
| 408 | struct x86_io_bitmap io_bitmap; |
| 409 | } __aligned(PAGE_SIZE); |
| 410 | |
| 411 | DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); |
| 412 | |
| 413 | /* Per CPU interrupt stacks */ |
| 414 | struct irq_stack { |
| 415 | char stack[IRQ_STACK_SIZE]; |
| 416 | } __aligned(IRQ_STACK_SIZE); |
| 417 | |
| 418 | DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); |
| 419 | #ifdef CONFIG_X86_64 |
| 420 | DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); |
| 421 | #else |
| 422 | DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); |
| 423 | #endif |
| 424 | |
| 425 | DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack); |
| 426 | /* const-qualified alias provided by the linker. */ |
| 427 | DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override, |
| 428 | const_cpu_current_top_of_stack); |
| 429 | |
| 430 | #ifdef CONFIG_X86_64 |
| 431 | static inline unsigned long cpu_kernelmode_gs_base(int cpu) |
| 432 | { |
| 433 | #ifdef CONFIG_SMP |
| 434 | return per_cpu_offset(cpu); |
| 435 | #else |
| 436 | return 0; |
| 437 | #endif |
| 438 | } |
| 439 | |
| 440 | extern asmlinkage void entry_SYSCALL32_ignore(void); |
| 441 | |
| 442 | /* Save actual FS/GS selectors and bases to current->thread */ |
| 443 | void current_save_fsgs(void); |
| 444 | #endif /* X86_64 */ |
| 445 | |
| 446 | struct perf_event; |
| 447 | |
| 448 | struct thread_struct { |
| 449 | /* Cached TLS descriptors: */ |
| 450 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
| 451 | #ifdef CONFIG_X86_32 |
| 452 | unsigned long sp0; |
| 453 | #endif |
| 454 | unsigned long sp; |
| 455 | #ifdef CONFIG_X86_32 |
| 456 | unsigned long sysenter_cs; |
| 457 | #else |
| 458 | unsigned short es; |
| 459 | unsigned short ds; |
| 460 | unsigned short fsindex; |
| 461 | unsigned short gsindex; |
| 462 | #endif |
| 463 | |
| 464 | #ifdef CONFIG_X86_64 |
| 465 | unsigned long fsbase; |
| 466 | unsigned long gsbase; |
| 467 | #else |
| 468 | /* |
| 469 | * XXX: this could presumably be unsigned short. Alternatively, |
| 470 | * 32-bit kernels could be taught to use fsindex instead. |
| 471 | */ |
| 472 | unsigned long fs; |
| 473 | unsigned long gs; |
| 474 | #endif |
| 475 | |
| 476 | /* Save middle states of ptrace breakpoints */ |
| 477 | struct perf_event *ptrace_bps[HBP_NUM]; |
| 478 | /* Debug status used for traps, single steps, etc... */ |
| 479 | unsigned long virtual_dr6; |
| 480 | /* Keep track of the exact dr7 value set by the user */ |
| 481 | unsigned long ptrace_dr7; |
| 482 | /* Fault info: */ |
| 483 | unsigned long cr2; |
| 484 | unsigned long trap_nr; |
| 485 | unsigned long error_code; |
| 486 | #ifdef CONFIG_VM86 |
| 487 | /* Virtual 86 mode info */ |
| 488 | struct vm86 *vm86; |
| 489 | #endif |
| 490 | /* IO permissions: */ |
| 491 | struct io_bitmap *io_bitmap; |
| 492 | |
| 493 | /* |
| 494 | * IOPL. Privilege level dependent I/O permission which is |
| 495 | * emulated via the I/O bitmap to prevent user space from disabling |
| 496 | * interrupts. |
| 497 | */ |
| 498 | unsigned long iopl_emul; |
| 499 | |
| 500 | unsigned int iopl_warn:1; |
| 501 | |
| 502 | /* |
| 503 | * Protection Keys Register for Userspace. Loaded immediately on |
| 504 | * context switch. Store it in thread_struct to avoid a lookup in |
| 505 | * the tasks's FPU xstate buffer. This value is only valid when a |
| 506 | * task is scheduled out. For 'current' the authoritative source of |
| 507 | * PKRU is the hardware itself. |
| 508 | */ |
| 509 | u32 pkru; |
| 510 | |
| 511 | #ifdef CONFIG_X86_USER_SHADOW_STACK |
| 512 | unsigned long features; |
| 513 | unsigned long features_locked; |
| 514 | |
| 515 | struct thread_shstk shstk; |
| 516 | #endif |
| 517 | }; |
| 518 | |
| 519 | #ifdef CONFIG_X86_DEBUG_FPU |
| 520 | extern struct fpu *x86_task_fpu(struct task_struct *task); |
| 521 | #else |
| 522 | # define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task)))) |
| 523 | #endif |
| 524 | |
| 525 | extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); |
| 526 | |
| 527 | static inline void arch_thread_struct_whitelist(unsigned long *offset, |
| 528 | unsigned long *size) |
| 529 | { |
| 530 | fpu_thread_struct_whitelist(offset, size); |
| 531 | } |
| 532 | |
| 533 | static inline void |
| 534 | native_load_sp0(unsigned long sp0) |
| 535 | { |
| 536 | this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); |
| 537 | } |
| 538 | |
| 539 | static __always_inline void native_swapgs(void) |
| 540 | { |
| 541 | #ifdef CONFIG_X86_64 |
| 542 | asm volatile("swapgs" ::: "memory" ); |
| 543 | #endif |
| 544 | } |
| 545 | |
| 546 | static __always_inline unsigned long current_top_of_stack(void) |
| 547 | { |
| 548 | /* |
| 549 | * We can't read directly from tss.sp0: sp0 on x86_32 is special in |
| 550 | * and around vm86 mode and sp0 on x86_64 is special because of the |
| 551 | * entry trampoline. |
| 552 | */ |
| 553 | if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) |
| 554 | return this_cpu_read_const(const_cpu_current_top_of_stack); |
| 555 | |
| 556 | return this_cpu_read_stable(cpu_current_top_of_stack); |
| 557 | } |
| 558 | |
| 559 | static __always_inline bool on_thread_stack(void) |
| 560 | { |
| 561 | return (unsigned long)(current_top_of_stack() - |
| 562 | current_stack_pointer) < THREAD_SIZE; |
| 563 | } |
| 564 | |
| 565 | #ifdef CONFIG_PARAVIRT_XXL |
| 566 | #include <asm/paravirt.h> |
| 567 | #else |
| 568 | |
| 569 | static inline void load_sp0(unsigned long sp0) |
| 570 | { |
| 571 | native_load_sp0(sp0); |
| 572 | } |
| 573 | |
| 574 | #endif /* CONFIG_PARAVIRT_XXL */ |
| 575 | |
| 576 | unsigned long __get_wchan(struct task_struct *p); |
| 577 | |
| 578 | extern void select_idle_routine(void); |
| 579 | extern void amd_e400_c1e_apic_setup(void); |
| 580 | |
| 581 | extern unsigned long boot_option_idle_override; |
| 582 | |
| 583 | enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, |
| 584 | IDLE_POLL}; |
| 585 | |
| 586 | extern void enable_sep_cpu(void); |
| 587 | |
| 588 | |
| 589 | /* Defined in head.S */ |
| 590 | extern struct desc_ptr early_gdt_descr; |
| 591 | |
| 592 | extern void switch_gdt_and_percpu_base(int); |
| 593 | extern void load_direct_gdt(int); |
| 594 | extern void load_fixmap_gdt(int); |
| 595 | extern void cpu_init(void); |
| 596 | extern void cpu_init_exception_handling(bool boot_cpu); |
| 597 | extern void cpu_init_replace_early_idt(void); |
| 598 | extern void cr4_init(void); |
| 599 | |
| 600 | extern void set_task_blockstep(struct task_struct *task, bool on); |
| 601 | |
| 602 | /* Boot loader type from the setup header: */ |
| 603 | extern int bootloader_type; |
| 604 | extern int bootloader_version; |
| 605 | |
| 606 | extern char ignore_fpu_irq; |
| 607 | |
| 608 | #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 |
| 609 | #define ARCH_HAS_PREFETCHW |
| 610 | |
| 611 | #ifdef CONFIG_X86_32 |
| 612 | # define BASE_PREFETCH "" |
| 613 | # define ARCH_HAS_PREFETCH |
| 614 | #else |
| 615 | # define BASE_PREFETCH "prefetcht0 %1" |
| 616 | #endif |
| 617 | |
| 618 | /* |
| 619 | * Prefetch instructions for Pentium III (+) and AMD Athlon (+) |
| 620 | * |
| 621 | * It's not worth to care about 3dnow prefetches for the K6 |
| 622 | * because they are microcoded there and very slow. |
| 623 | */ |
| 624 | static inline void prefetch(const void *x) |
| 625 | { |
| 626 | alternative_input(BASE_PREFETCH, "prefetchnta %1" , |
| 627 | X86_FEATURE_XMM, |
| 628 | "m" (*(const char *)x)); |
| 629 | } |
| 630 | |
| 631 | /* |
| 632 | * 3dnow prefetch to get an exclusive cache line. |
| 633 | * Useful for spinlocks to avoid one state transition in the |
| 634 | * cache coherency protocol: |
| 635 | */ |
| 636 | static __always_inline void prefetchw(const void *x) |
| 637 | { |
| 638 | alternative_input(BASE_PREFETCH, "prefetchw %1" , |
| 639 | X86_FEATURE_3DNOWPREFETCH, |
| 640 | "m" (*(const char *)x)); |
| 641 | } |
| 642 | |
| 643 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ |
| 644 | TOP_OF_KERNEL_STACK_PADDING) |
| 645 | |
| 646 | #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) |
| 647 | |
| 648 | #define task_pt_regs(task) \ |
| 649 | ({ \ |
| 650 | unsigned long __ptr = (unsigned long)task_stack_page(task); \ |
| 651 | __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ |
| 652 | ((struct pt_regs *)__ptr) - 1; \ |
| 653 | }) |
| 654 | |
| 655 | #ifdef CONFIG_X86_32 |
| 656 | #define INIT_THREAD { \ |
| 657 | .sp0 = TOP_OF_INIT_STACK, \ |
| 658 | .sysenter_cs = __KERNEL_CS, \ |
| 659 | } |
| 660 | |
| 661 | #else |
| 662 | extern unsigned long __top_init_kernel_stack[]; |
| 663 | |
| 664 | #define INIT_THREAD { \ |
| 665 | .sp = (unsigned long)&__top_init_kernel_stack, \ |
| 666 | } |
| 667 | |
| 668 | #endif /* CONFIG_X86_64 */ |
| 669 | |
| 670 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, |
| 671 | unsigned long new_sp); |
| 672 | |
| 673 | /* |
| 674 | * This decides where the kernel will search for a free chunk of vm |
| 675 | * space during mmap's. |
| 676 | */ |
| 677 | #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) |
| 678 | #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) |
| 679 | |
| 680 | #define KSTK_EIP(task) (task_pt_regs(task)->ip) |
| 681 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) |
| 682 | |
| 683 | /* Get/set a process' ability to use the timestamp counter instruction */ |
| 684 | #define GET_TSC_CTL(adr) get_tsc_mode((adr)) |
| 685 | #define SET_TSC_CTL(val) set_tsc_mode((val)) |
| 686 | |
| 687 | extern int get_tsc_mode(unsigned long adr); |
| 688 | extern int set_tsc_mode(unsigned int val); |
| 689 | |
| 690 | DECLARE_PER_CPU(u64, msr_misc_features_shadow); |
| 691 | |
| 692 | static inline u32 per_cpu_llc_id(unsigned int cpu) |
| 693 | { |
| 694 | return per_cpu(cpu_info.topo.llc_id, cpu); |
| 695 | } |
| 696 | |
| 697 | static inline u32 per_cpu_l2c_id(unsigned int cpu) |
| 698 | { |
| 699 | return per_cpu(cpu_info.topo.l2c_id, cpu); |
| 700 | } |
| 701 | |
| 702 | #ifdef CONFIG_CPU_SUP_AMD |
| 703 | /* |
| 704 | * Issue a DIV 0/1 insn to clear any division data from previous DIV |
| 705 | * operations. |
| 706 | */ |
| 707 | static __always_inline void amd_clear_divider(void) |
| 708 | { |
| 709 | asm volatile(ALTERNATIVE("" , "div %2\n\t" , X86_BUG_DIV0) |
| 710 | :: "a" (0), "d" (0), "r" (1)); |
| 711 | } |
| 712 | |
| 713 | extern void amd_check_microcode(void); |
| 714 | #else |
| 715 | static inline void amd_clear_divider(void) { } |
| 716 | static inline void amd_check_microcode(void) { } |
| 717 | #endif |
| 718 | |
| 719 | extern unsigned long arch_align_stack(unsigned long sp); |
| 720 | void free_init_pages(const char *what, unsigned long begin, unsigned long end); |
| 721 | extern void free_kernel_image_pages(const char *what, void *begin, void *end); |
| 722 | |
| 723 | void default_idle(void); |
| 724 | #ifdef CONFIG_XEN |
| 725 | bool xen_set_default_idle(void); |
| 726 | #else |
| 727 | #define xen_set_default_idle 0 |
| 728 | #endif |
| 729 | |
| 730 | void __noreturn stop_this_cpu(void *dummy); |
| 731 | void microcode_check(struct cpuinfo_x86 *prev_info); |
| 732 | void store_cpu_caps(struct cpuinfo_x86 *info); |
| 733 | |
| 734 | DECLARE_PER_CPU(bool, cache_state_incoherent); |
| 735 | |
| 736 | enum l1tf_mitigations { |
| 737 | L1TF_MITIGATION_OFF, |
| 738 | L1TF_MITIGATION_AUTO, |
| 739 | L1TF_MITIGATION_FLUSH_NOWARN, |
| 740 | L1TF_MITIGATION_FLUSH, |
| 741 | L1TF_MITIGATION_FLUSH_NOSMT, |
| 742 | L1TF_MITIGATION_FULL, |
| 743 | L1TF_MITIGATION_FULL_FORCE |
| 744 | }; |
| 745 | |
| 746 | extern enum l1tf_mitigations l1tf_mitigation; |
| 747 | |
| 748 | enum mds_mitigations { |
| 749 | MDS_MITIGATION_OFF, |
| 750 | MDS_MITIGATION_AUTO, |
| 751 | MDS_MITIGATION_FULL, |
| 752 | MDS_MITIGATION_VMWERV, |
| 753 | }; |
| 754 | |
| 755 | extern bool gds_ucode_mitigated(void); |
| 756 | |
| 757 | /* |
| 758 | * Make previous memory operations globally visible before |
| 759 | * a WRMSR. |
| 760 | * |
| 761 | * MFENCE makes writes visible, but only affects load/store |
| 762 | * instructions. WRMSR is unfortunately not a load/store |
| 763 | * instruction and is unaffected by MFENCE. The LFENCE ensures |
| 764 | * that the WRMSR is not reordered. |
| 765 | * |
| 766 | * Most WRMSRs are full serializing instructions themselves and |
| 767 | * do not require this barrier. This is only required for the |
| 768 | * IA32_TSC_DEADLINE and X2APIC MSRs. |
| 769 | */ |
| 770 | static inline void weak_wrmsr_fence(void) |
| 771 | { |
| 772 | alternative("mfence; lfence" , "" , ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE)); |
| 773 | } |
| 774 | |
| 775 | #endif /* _ASM_X86_PROCESSOR_H */ |
| 776 | |