Linux-libre 3.14.34-gnu
[librecmc/linux-libre.git] / arch / x86 / mm / dump_pagetables.c
1 /*
2  * Debug helper to dump the current kernel pagetables of the system
3  * so that we can see what the various memory ranges are set to.
4  *
5  * (C) Copyright 2008 Intel Corporation
6  *
7  * Author: Arjan van de Ven <arjan@linux.intel.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; version 2
12  * of the License.
13  */
14
15 #include <linux/debugfs.h>
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/seq_file.h>
19
20 #include <asm/pgtable.h>
21
22 /*
23  * The dumper groups pagetable entries of the same type into one, and for
24  * that it needs to keep some state when walking, and flush this state
25  * when a "break" in the continuity is found.
26  */
27 struct pg_state {
28         int level;
29         pgprot_t current_prot;
30         unsigned long start_address;
31         unsigned long current_address;
32         const struct addr_marker *marker;
33         unsigned long lines;
34 };
35
36 struct addr_marker {
37         unsigned long start_address;
38         const char *name;
39         unsigned long max_lines;
40 };
41
42 /* indices for address_markers; keep sync'd w/ address_markers below */
43 enum address_markers_idx {
44         USER_SPACE_NR = 0,
45 #ifdef CONFIG_X86_64
46         KERNEL_SPACE_NR,
47         LOW_KERNEL_NR,
48         VMALLOC_START_NR,
49         VMEMMAP_START_NR,
50         ESPFIX_START_NR,
51         HIGH_KERNEL_NR,
52         MODULES_VADDR_NR,
53         MODULES_END_NR,
54 #else
55         KERNEL_SPACE_NR,
56         VMALLOC_START_NR,
57         VMALLOC_END_NR,
58 # ifdef CONFIG_HIGHMEM
59         PKMAP_BASE_NR,
60 # endif
61         FIXADDR_START_NR,
62 #endif
63 };
64
65 /* Address space markers hints */
66 static struct addr_marker address_markers[] = {
67         { 0, "User Space" },
68 #ifdef CONFIG_X86_64
69         { 0x8000000000000000UL, "Kernel Space" },
70         { PAGE_OFFSET,          "Low Kernel Mapping" },
71         { VMALLOC_START,        "vmalloc() Area" },
72         { VMEMMAP_START,        "Vmemmap" },
73         { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
74         { __START_KERNEL_map,   "High Kernel Mapping" },
75         { MODULES_VADDR,        "Modules" },
76         { MODULES_END,          "End Modules" },
77 #else
78         { PAGE_OFFSET,          "Kernel Mapping" },
79         { 0/* VMALLOC_START */, "vmalloc() Area" },
80         { 0/*VMALLOC_END*/,     "vmalloc() End" },
81 # ifdef CONFIG_HIGHMEM
82         { 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
83 # endif
84         { 0/*FIXADDR_START*/,   "Fixmap Area" },
85 #endif
86         { -1, NULL }            /* End of list */
87 };
88
89 /* Multipliers for offsets within the PTEs */
90 #define PTE_LEVEL_MULT (PAGE_SIZE)
91 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
92 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
93 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
94
95 /*
96  * Print a readable form of a pgprot_t to the seq_file
97  */
98 static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
99 {
100         pgprotval_t pr = pgprot_val(prot);
101         static const char * const level_name[] =
102                 { "cr3", "pgd", "pud", "pmd", "pte" };
103
104         if (!pgprot_val(prot)) {
105                 /* Not present */
106                 seq_printf(m, "                          ");
107         } else {
108                 if (pr & _PAGE_USER)
109                         seq_printf(m, "USR ");
110                 else
111                         seq_printf(m, "    ");
112                 if (pr & _PAGE_RW)
113                         seq_printf(m, "RW ");
114                 else
115                         seq_printf(m, "ro ");
116                 if (pr & _PAGE_PWT)
117                         seq_printf(m, "PWT ");
118                 else
119                         seq_printf(m, "    ");
120                 if (pr & _PAGE_PCD)
121                         seq_printf(m, "PCD ");
122                 else
123                         seq_printf(m, "    ");
124
125                 /* Bit 9 has a different meaning on level 3 vs 4 */
126                 if (level <= 3) {
127                         if (pr & _PAGE_PSE)
128                                 seq_printf(m, "PSE ");
129                         else
130                                 seq_printf(m, "    ");
131                 } else {
132                         if (pr & _PAGE_PAT)
133                                 seq_printf(m, "pat ");
134                         else
135                                 seq_printf(m, "    ");
136                 }
137                 if (pr & _PAGE_GLOBAL)
138                         seq_printf(m, "GLB ");
139                 else
140                         seq_printf(m, "    ");
141                 if (pr & _PAGE_NX)
142                         seq_printf(m, "NX ");
143                 else
144                         seq_printf(m, "x  ");
145         }
146         seq_printf(m, "%s\n", level_name[level]);
147 }
148
149 /*
150  * On 64 bits, sign-extend the 48 bit address to 64 bit
151  */
152 static unsigned long normalize_addr(unsigned long u)
153 {
154 #ifdef CONFIG_X86_64
155         return (signed long)(u << 16) >> 16;
156 #else
157         return u;
158 #endif
159 }
160
161 /*
162  * This function gets called on a break in a continuous series
163  * of PTE entries; the next one is different so we need to
164  * print what we collected so far.
165  */
166 static void note_page(struct seq_file *m, struct pg_state *st,
167                       pgprot_t new_prot, int level)
168 {
169         pgprotval_t prot, cur;
170         static const char units[] = "BKMGTPE";
171
172         /*
173          * If we have a "break" in the series, we need to flush the state that
174          * we have now. "break" is either changing perms, levels or
175          * address space marker.
176          */
177         prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
178         cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
179
180         if (!st->level) {
181                 /* First entry */
182                 st->current_prot = new_prot;
183                 st->level = level;
184                 st->marker = address_markers;
185                 st->lines = 0;
186                 seq_printf(m, "---[ %s ]---\n", st->marker->name);
187         } else if (prot != cur || level != st->level ||
188                    st->current_address >= st->marker[1].start_address) {
189                 const char *unit = units;
190                 unsigned long delta;
191                 int width = sizeof(unsigned long) * 2;
192
193                 /*
194                  * Now print the actual finished series
195                  */
196                 if (!st->marker->max_lines ||
197                     st->lines < st->marker->max_lines) {
198                         seq_printf(m, "0x%0*lx-0x%0*lx   ",
199                                    width, st->start_address,
200                                    width, st->current_address);
201
202                         delta = (st->current_address - st->start_address) >> 10;
203                         while (!(delta & 1023) && unit[1]) {
204                                 delta >>= 10;
205                                 unit++;
206                         }
207                         seq_printf(m, "%9lu%c ", delta, *unit);
208                         printk_prot(m, st->current_prot, st->level);
209                 }
210                 st->lines++;
211
212                 /*
213                  * We print markers for special areas of address space,
214                  * such as the start of vmalloc space etc.
215                  * This helps in the interpretation.
216                  */
217                 if (st->current_address >= st->marker[1].start_address) {
218                         st->marker++;
219                         seq_printf(m, "---[ %s ]---\n", st->marker->name);
220                 }
221
222                 st->start_address = st->current_address;
223                 st->current_prot = new_prot;
224                 st->level = level;
225         }
226 }
227
228 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
229                                                         unsigned long P)
230 {
231         int i;
232         pte_t *start;
233
234         start = (pte_t *) pmd_page_vaddr(addr);
235         for (i = 0; i < PTRS_PER_PTE; i++) {
236                 pgprot_t prot = pte_pgprot(*start);
237
238                 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
239                 note_page(m, st, prot, 4);
240                 start++;
241         }
242 }
243
244 #if PTRS_PER_PMD > 1
245
246 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
247                                                         unsigned long P)
248 {
249         int i;
250         pmd_t *start;
251
252         start = (pmd_t *) pud_page_vaddr(addr);
253         for (i = 0; i < PTRS_PER_PMD; i++) {
254                 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
255                 if (!pmd_none(*start)) {
256                         pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
257
258                         if (pmd_large(*start) || !pmd_present(*start))
259                                 note_page(m, st, __pgprot(prot), 3);
260                         else
261                                 walk_pte_level(m, st, *start,
262                                                P + i * PMD_LEVEL_MULT);
263                 } else
264                         note_page(m, st, __pgprot(0), 3);
265                 start++;
266         }
267 }
268
269 #else
270 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
271 #define pud_large(a) pmd_large(__pmd(pud_val(a)))
272 #define pud_none(a)  pmd_none(__pmd(pud_val(a)))
273 #endif
274
275 #if PTRS_PER_PUD > 1
276
277 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
278                                                         unsigned long P)
279 {
280         int i;
281         pud_t *start;
282
283         start = (pud_t *) pgd_page_vaddr(addr);
284
285         for (i = 0; i < PTRS_PER_PUD; i++) {
286                 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
287                 if (!pud_none(*start)) {
288                         pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
289
290                         if (pud_large(*start) || !pud_present(*start))
291                                 note_page(m, st, __pgprot(prot), 2);
292                         else
293                                 walk_pmd_level(m, st, *start,
294                                                P + i * PUD_LEVEL_MULT);
295                 } else
296                         note_page(m, st, __pgprot(0), 2);
297
298                 start++;
299         }
300 }
301
302 #else
303 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
304 #define pgd_large(a) pud_large(__pud(pgd_val(a)))
305 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
306 #endif
307
308 static void walk_pgd_level(struct seq_file *m)
309 {
310 #ifdef CONFIG_X86_64
311         pgd_t *start = (pgd_t *) &init_level4_pgt;
312 #else
313         pgd_t *start = swapper_pg_dir;
314 #endif
315         int i;
316         struct pg_state st;
317
318         memset(&st, 0, sizeof(st));
319
320         for (i = 0; i < PTRS_PER_PGD; i++) {
321                 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
322                 if (!pgd_none(*start)) {
323                         pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
324
325                         if (pgd_large(*start) || !pgd_present(*start))
326                                 note_page(m, &st, __pgprot(prot), 1);
327                         else
328                                 walk_pud_level(m, &st, *start,
329                                                i * PGD_LEVEL_MULT);
330                 } else
331                         note_page(m, &st, __pgprot(0), 1);
332
333                 start++;
334         }
335
336         /* Flush out the last page */
337         st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
338         note_page(m, &st, __pgprot(0), 0);
339 }
340
341 static int ptdump_show(struct seq_file *m, void *v)
342 {
343         walk_pgd_level(m);
344         return 0;
345 }
346
347 static int ptdump_open(struct inode *inode, struct file *filp)
348 {
349         return single_open(filp, ptdump_show, NULL);
350 }
351
352 static const struct file_operations ptdump_fops = {
353         .open           = ptdump_open,
354         .read           = seq_read,
355         .llseek         = seq_lseek,
356         .release        = single_release,
357 };
358
359 static int pt_dump_init(void)
360 {
361         struct dentry *pe;
362
363 #ifdef CONFIG_X86_32
364         /* Not a compile-time constant on x86-32 */
365         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
366         address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
367 # ifdef CONFIG_HIGHMEM
368         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
369 # endif
370         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
371 #endif
372
373         pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
374                                  &ptdump_fops);
375         if (!pe)
376                 return -ENOMEM;
377
378         return 0;
379 }
380
381 __initcall(pt_dump_init);
382 MODULE_LICENSE("GPL");
383 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
384 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");