8000 Merge branches 'pm-cpuidle-fixes' and 'pm-sleep-fixes' · bsd-unix/linux@7fe39a2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7fe39a2

Browse files
committed
Merge branches 'pm-cpuidle-fixes' and 'pm-sleep-fixes'
* pm-cpuidle-fixes: cpuidle: Fix last_residency division * pm-sleep-fixes: x86/power/64: Fix kernel text mapping corruption during image restoration
3 parents a99cde4 + dbd1b8e + 65c0554 commit 7fe39a2

File tree

3 files changed

+113
-51
lines changed

3 files changed

+113
-51
lines changed

arch/x86/power/hibernate_64.c

Lines changed: 85 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <asm/mtrr.h>
2020
#include <asm/sections.h>
2121
#include <asm/suspend.h>
22+
#include <asm/tlbflush.h>
2223

2324
/* Defined in hibernate_asm_64.S */
2425
extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
2829
* kernel's text (this value is passed in the image header).
2930
*/
3031
unsigned long restore_jump_address __visible;
32+
unsigned long jump_address_phys;
3133

3234
/*
3335
* Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
3739

3840
pgd_t *temp_level4_pgt __visible;
3941

40-
void *relocated_restore_code __visible;
42+
unsigned long relocated_restore_code __visible;
43+
44+
static int set_up_temporary_text_mapping(void)
45+
{
46+
pmd_t *pmd;
47+
pud_t *pud;
48+
49+
/*
50+
* The new mapping only has to cover the page containing the image
51+
* kernel's entry point (jump_address_phys), because the switch over to
52+
* it is carried out by relocated code running from a page allocated
53+
* specifically for this purpose and covered by the identity mapping, so
54+
* the temporary kernel text mapping is only needed for the final jump.
55+
* Moreover, in that mapping the virtual address of the image kernel's
56+
* entry point must be the same as its virtual address in the image
57+
* kernel (restore_jump_address), so the image kernel's
58+
* restore_registers() code doesn't find itself in a different area of
59+
* the virtual address space after switching over to the original page
60+
* tables used by the image kernel.
61+
*/
62+
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
63+
if (!pud)
64+
return -ENOMEM;
65+
66+
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
67+
if (!pmd)
68+
return -ENOMEM;
69+
70+
set_pmd(pmd + pmd_index(restore_jump_address),
71+
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
72+
set_pud(pud + pud_index(restore_jump_address),
73+
__pud(__pa(pmd) | _KERNPG_TABLE));
74+
set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
75+
__pgd(__pa(pud) | _KERNPG_TABLE));
76+
77+
return 0;
78+
}
4179

4280
static void *alloc_pgt_page(void *context)
4381
{
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
5997
if (!temp_level4_pgt)
6098
return -ENOMEM;
6199

62-
/* It is safe to reuse the original kernel mapping */
63-
set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
64-
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
100+
/* Prepare a temporary mapping for the kernel text */
101+
result = set_up_temporary_text_mapping();
102+
if (result)
103+
return result;
65104

66105
/* Set up the direct mapping from scratch */
67106
for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
78117
return 0;
79118
}
80119

120+
static int relocate_restore_code(void)
121+
{
122+
pgd_t *pgd;
123+
pud_t *pud;
124+
125+
relocated_restore_code = get_safe_page(GFP_ATOMIC);
126+
if (!relocated_restore_code)
127+
return -ENOMEM;
128+
129+
memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
130+
131+
/* Make the page containing the relocated code executable */
132+
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
133+
pud = pud_offset(pgd, relocated_restore_code);
134+
if (pud_large(*pud)) {
135+
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
136+
} else {
137+
pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
138+
139+
if (pmd_large(*pmd)) {
140+
set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
141+
} else {
142+
pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
143+
144+
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
145+
}
146+
}
147+
__flush_tlb_all();
148+
149+
return 0;
150+
}
151+
81152
int swsusp_arch_resume(void)
82153
{
83154
int error;
84155

85156
/* We have got enough memory and from now on we cannot recover */
86-
if ((error = set_up_temporary_mappings()))
157+
error = set_up_temporary_mappings();
158+
if (error)
87159
return error;
88160

89-
relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
90-
if (!relocated_restore_code)
91-
return -ENOMEM;
92-
memcpy(relocated_restore_code, &core_restore_code,
93-
&restore_registers - &core_restore_code);
161+
error = relocate_restore_code();
162+
if (error)
163+
return error;
94164

95165
restore_image();
96166
return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
109179

110180
struct restore_data_record {
111181
unsigned long jump_address;
182+
unsigned long jump_address_phys;
112183
unsigned long cr3;
113184
unsigned long magic;
114185
};
115186

116-
#define RESTORE_MAGIC 0x0123456789ABCDEFUL
187+
#define RESTORE_MAGIC 0x123456789ABCDEF0UL
117188

118189
/**
119190
* arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
126197

127198
if (max_size < sizeof(struct restore_data_record))
128199
return -EOVERFLOW;
129-
rdr->jump_address = restore_jump_address;
200+
rdr->jump_address = (unsigned long)&restore_registers;
201+
rdr->jump_address_phys = __pa_symbol(&restore_registers);
130202
rdr->cr3 = restore_cr3;
131203
rdr->magic = RESTORE_MAGIC;
132204
return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
142214
struct restore_data_record *rdr = addr;
143215

144216
restore_jump_address = rdr->jump_address;
217+
jump_address_phys = rdr->jump_address_phys;
145218
restore_cr3 = rdr->cr3;
146219
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
147220
}

arch/x86/power/hibernate_asm_64.S

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
4444
pushfq
4545
popq pt_regs_flags(%rax)
4646

47-
/* save the address of restore_registers */
48-
movq $restore_registers, %rax
49-
movq %rax, restore_jump_address(%rip)
5047
/* save cr3 */
5148
movq %cr3, %rax
5249
movq %rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
5754
ENDPROC(swsusp_arch_suspend)
5855

5956
ENTRY(restore_image)
60-
/* switch to temporary page tables */
61-
movq $__PAGE_OFFSET, %rdx
62-
movq temp_level4_pgt(%rip), %rax
63-
subq %rdx, %rax
64-
movq %rax, %cr3
65-
/* Flush TLB */
66-
movq mmu_cr4_features(%rip), %rax
67-
movq %rax, %rdx
68-
andq $~(X86_CR4_PGE), %rdx
69-
movq %rdx, %cr4; # turn off PGE
70-
movq %cr3, %rcx; # flush TLB
71-
movq %rcx, %cr3;
72-
movq %rax, %cr4; # turn PGE back on
73-
7457
/* prepare to jump to the image kernel */
75-
movq restore_jump_address(%rip), %rax
76-
movq restore_cr3(%rip), %rbx
58+
movq restore_jump_address(%rip), %r8
59+
movq restore_cr3(%rip), %r9
60+
61+
/* prepare to switch to temporary page tables */
62+
movq temp_level4_pgt(%rip), %rax
63+
movq mmu_cr4_features(%rip), %rbx
7764

7865
/* prepare to copy image data to their original locations */
7966
movq restore_pblist(%rip), %rdx
67+
68+
/* jump to relocated restore code */
8069
movq relocated_restore_code(%rip), %rcx
8170
jmpq *%rcx
8271

8372
/* code below has been relocated to a safe page */
8473
ENTRY(core_restore_code)
74+
/* switch to temporary page tables */
75+
movq $__PAGE_OFFSET, %rcx
76+
subq %rcx, %rax
77+
movq %rax, %cr3
78+
/* flush TLB */
79+
movq %rbx, %rcx
80+
andq $~(X86_CR4_PGE), %rcx
81+
movq %rcx, %cr4; # turn off PGE
82+
movq %cr3, %rcx; # flush TLB
83+
movq %rcx, %cr3;
84+
movq %rbx, %cr4; # turn PGE back on
8585
.Lloop:
8686
testq %rdx, %rdx
8787
jz .Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
9696
/* progress to the next pbe */
9797
movq pbe_next(%rdx), %rdx
9898
jmp .Lloop
99+
99100
.Ldone:
100101
/* jump to the restore_registers address from the image header */
101-
jmpq *%rax
102-
/*
103-
* NOTE: This assumes that the boot kernel's text mapping covers the
104-
* image kernel's page containing restore_registers and the address of
105-
* this page is the same as in the image kernel's text mapping (it
106-
* should always be true, because the text mapping is linear, starting
107-
* from 0, and is supposed to cover the entire kernel text for every
108-
* kernel).
109-
*
110-
* code below belongs to the image kernel
111-
*/
102+
jmpq *%r8
112103

104+
/* code below belongs to the image kernel */
105+
.align PAGE_SIZE
113106
ENTRY(restore_registers)
114107
FRAME_BEGIN
115108
/* go back to the original page tables */
116-
movq %rbx, %cr3
109+
movq %r9, %cr3
117110

118111
/* Flush TLB, including "global" things (vmalloc) */
119112
movq mmu_cr4_features(%rip), %rax

drivers/cpuidle/cpuidle.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
173173

174174
struct cpuidle_state *target_state = &drv->states[index];
175175
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
176-
u64 time_start, time_end;
176+
ktime_t time_start, time_end;
177177
s64 diff;
178178

179179
/*
@@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
195195
sched_idle_set_state(target_state);
196196

197197
trace_cpu_idle_rcuidle(index, dev->cpu);
198-
time_start = local_clock();
198+
time_start = ns_to_ktime(local_clock());
199199

200200
stop_critical_timings();
201201
entered_state = target_state->enter(dev, drv, index);
202202
start_critical_timings();
203203

204-
time_end = local_clock();
204+
time_end = ns_to_ktime(local_clock());
205205
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
206206

207207
/* The cpu is no longer idle or about to enter idle. */
@@ -217,11 +217,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
217217
if (!cpuidle_state_is_coupled(drv, index))
218218
local_irq_enable();
219219

220-
/*
221-
* local_clock() returns the time in nanosecond, let's shift
222-
* by 10 (divide by 1024) to have microsecond based time.
223-
*/
224-
diff = (time_end - time_start) >> 10;
220+
diff = ktime_us_delta(time_end, time_start);
225221
if (diff > INT_MAX)
226222
diff = INT_MAX;
227223

0 commit comments

Comments
 (0)
0