8000 x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue · bsd-unix/linux@61f01dd · GitHub
[go: up one dir, main page]

Skip to content

Commit 61f01dd

Browse files
amlutotorvalds
authored andcommitted
x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue
AMD CPUs don't reinitialize the SS descriptor on SYSRET, so SYSRET with SS == 0 results in an invalid usermode state in which SS is apparently equal to __USER_DS but causes #SS if used. Work around the issue by setting SS to __KERNEL_DS __switch_to, thus ensuring that SYSRET never happens with SS set to NULL. This was exposed by a recent vDSO cleanup. Fixes: e7d6eef x86/vdso32/syscall.S: Do not load __USER32_DS to %ss Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <vda.linux@googlemail.com> Cc: Brian Gerst <brgerst@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 1190944 commit 61f01dd

File tree

5 files changed

+48
-0
lines changed

5 files changed

+48
-0
lines changed

arch/x86/ia32/ia32entry.S

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ - 8000 427,6 +427,13 @@ sysretl_from_sys_call:
427427
* cs and ss are loaded from MSRs.
428428
* (Note: 32bit->32bit SYSRET is different: since r11
429429
* does not exist, it merely sets eflags.IF=1).
430+
*
431+
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
432+
* descriptor is not reinitialized. This means that we must
433+
* avoid SYSRET with SS == NULL, which could happen if we schedule,
434+
* exit the kernel, and re-enter using an interrupt vector. (All
435+
* interrupt entries on x86_64 set SS to NULL.) We prevent that
436+
* from happening by reloading SS in __switch_to.
430437
*/
431438
USERGS_SYSRET32
432439

arch/x86/include/asm/cpufeature.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@
265265
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
266266
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
267267
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
268+
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
268269

269270
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
270271

arch/x86/kernel/cpu/amd.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
720720
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
721721
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
722722
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
723+
724+
/* AMD CPUs don't reset SS attributes on SYSRET */
725+
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
723726
}
724727

725728
#ifdef CONFIG_X86_32

arch/x86/kernel/entry_64.S

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,15 @@ system_call_fastpath:
295295
* rflags from r11 (but RF and VM bits are forced to 0),
296296
* cs and ss are loaded from MSRs.
297297
* Restoration of rflags re-enables interrupts.
298+
*
299+
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
300+
* descriptor is not reinitialized. This means that we should
301+
* avoid SYSRET with SS == NULL, which could happen if we schedule,
302+
* exit the kernel, and re-enter using an interrupt vector. (All
303+
* interrupt entries on x86_64 set SS to NULL.) We prevent that
304+
* from happening by reloading SS in __switch_to. (Actually
305+
* detecting the failure in 64-bit userspace is tricky but can be
306+
* done.)
298307
*/
299308
USERGS_SYSRET64
300309

arch/x86/kernel/process_64.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
419419
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
420420
__switch_to_xtra(prev_p, next_p, tss);
421421

422+
if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
423+
/*
424+
* AMD CPUs have a misfeature: SYSRET sets the SS selector but
425+
* does not update the cached descriptor. As a result, if we
426+
* do SYSRET while SS is NULL, we'll end up in user mode with
427+
* SS apparently equal to __USER_DS but actually unusable.
428+
*
429+
* The straightforward workaround would be to fix it up just
430+
* before SYSRET, but that would slow down the system call
431+
* fast paths. Instead, we ensure that SS is never NULL in
432+
* system call context. We do this by replacing NULL SS
433+
* selectors at every context switch. SYSCALL sets up a valid
434+
* SS, so the only way to get NULL is to re-enter the kernel
435+
* from CPL 3 through an interrupt. Since that can't happen
436+
* in the same task as a running syscall, we are guaranteed to
437+
* context switch between every interrupt vector entry and a
438+
* subsequent SYSRET.
439+
*
440+
* We read SS first because SS reads are much faster than
441+
* writes. Out of caution, we force SS to __KERNEL_DS even if
442+
* it previously had a different non-NULL value.
443+
*/
444+
unsigned short ss_sel;
445+
savesegment(ss, ss_sel);
446+
if (ss_sel != __KERNEL_DS)
447+
loadsegment(ss, __KERNEL_DS);
448+
}
449+
422450
return prev_p;
423451
}
424452

0 commit comments

Comments
 (0)
0