PageRenderTime 63ms CodeModel.GetById 9ms app.highlight 48ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/x86_64/mm/init.c

https://bitbucket.org/evzijst/gittest
C | 630 lines | 476 code | 92 blank | 62 comment | 72 complexity | 74ef3692c192b7645b7ae4c9d60c64e9 MD5 | raw file
  1/*
  2 *  linux/arch/x86_64/mm/init.c
  3 *
  4 *  Copyright (C) 1995  Linus Torvalds
  5 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
  6 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
  7 */
  8
  9#include <linux/config.h>
 10#include <linux/signal.h>
 11#include <linux/sched.h>
 12#include <linux/kernel.h>
 13#include <linux/errno.h>
 14#include <linux/string.h>
 15#include <linux/types.h>
 16#include <linux/ptrace.h>
 17#include <linux/mman.h>
 18#include <linux/mm.h>
 19#include <linux/swap.h>
 20#include <linux/smp.h>
 21#include <linux/init.h>
 22#include <linux/pagemap.h>
 23#include <linux/bootmem.h>
 24#include <linux/proc_fs.h>
 25
 26#include <asm/processor.h>
 27#include <asm/system.h>
 28#include <asm/uaccess.h>
 29#include <asm/pgtable.h>
 30#include <asm/pgalloc.h>
 31#include <asm/dma.h>
 32#include <asm/fixmap.h>
 33#include <asm/e820.h>
 34#include <asm/apic.h>
 35#include <asm/tlb.h>
 36#include <asm/mmu_context.h>
 37#include <asm/proto.h>
 38#include <asm/smp.h>
 39
 40#ifndef Dprintk
 41#define Dprintk(x...)
 42#endif
 43
 44#ifdef CONFIG_GART_IOMMU
 45extern int swiotlb;
 46#endif
 47
 48extern char _stext[];
 49
 50DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 51
 52/*
 53 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
 54 * physical space so we can cache the place of the first one and move
 55 * around without checking the pgd every time.
 56 */
 57
 58void show_mem(void)
 59{
 60	int i, total = 0, reserved = 0;
 61	int shared = 0, cached = 0;
 62	pg_data_t *pgdat;
 63	struct page *page;
 64
 65	printk("Mem-info:\n");
 66	show_free_areas();
 67	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 68
 69	for_each_pgdat(pgdat) {
 70               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 71			page = pfn_to_page(pgdat->node_start_pfn + i);
 72			total++;
 73                       if (PageReserved(page))
 74			reserved++;
 75                       else if (PageSwapCache(page))
 76			cached++;
 77                       else if (page_count(page))
 78                               shared += page_count(page) - 1;
 79               }
 80	}
 81	printk("%d pages of RAM\n", total);
 82	printk("%d reserved pages\n",reserved);
 83	printk("%d pages shared\n",shared);
 84	printk("%d pages swap cached\n",cached);
 85}
 86
 87/* References to section boundaries */
 88
 89extern char _text, _etext, _edata, __bss_start, _end[];
 90extern char __init_begin, __init_end;
 91
 92int after_bootmem;
 93
 94static void *spp_getpage(void)
 95{ 
 96	void *ptr;
 97	if (after_bootmem)
 98		ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
 99	else
100		ptr = alloc_bootmem_pages(PAGE_SIZE);
101	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
102		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
103
104	Dprintk("spp_getpage %p\n", ptr);
105	return ptr;
106} 
107
108static void set_pte_phys(unsigned long vaddr,
109			 unsigned long phys, pgprot_t prot)
110{
111	pgd_t *pgd;
112	pud_t *pud;
113	pmd_t *pmd;
114	pte_t *pte, new_pte;
115
116	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
117
118	pgd = pgd_offset_k(vaddr);
119	if (pgd_none(*pgd)) {
120		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
121		return;
122	}
123	pud = pud_offset(pgd, vaddr);
124	if (pud_none(*pud)) {
125		pmd = (pmd_t *) spp_getpage(); 
126		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
127		if (pmd != pmd_offset(pud, 0)) {
128			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
129			return;
130		}
131	}
132	pmd = pmd_offset(pud, vaddr);
133	if (pmd_none(*pmd)) {
134		pte = (pte_t *) spp_getpage();
135		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
136		if (pte != pte_offset_kernel(pmd, 0)) {
137			printk("PAGETABLE BUG #02!\n");
138			return;
139		}
140	}
141	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
142
143	pte = pte_offset_kernel(pmd, vaddr);
144	if (!pte_none(*pte) &&
145	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
146		pte_ERROR(*pte);
147	set_pte(pte, new_pte);
148
149	/*
150	 * It's enough to flush this one mapping.
151	 * (PGE mappings get flushed as well)
152	 */
153	__flush_tlb_one(vaddr);
154}
155
156/* NOTE: this is meant to be run only at boot */
157void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
158{
159	unsigned long address = __fix_to_virt(idx);
160
161	if (idx >= __end_of_fixed_addresses) {
162		printk("Invalid __set_fixmap\n");
163		return;
164	}
165	set_pte_phys(address, phys, prot);
166}
167
168unsigned long __initdata table_start, table_end; 
169
170extern pmd_t temp_boot_pmds[]; 
171
172static  struct temp_map { 
173	pmd_t *pmd;
174	void  *address; 
175	int    allocated; 
176} temp_mappings[] __initdata = { 
177	{ &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
178	{ &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, 
179	{}
180}; 
181
182static __init void *alloc_low_page(int *index, unsigned long *phys) 
183{ 
184	struct temp_map *ti;
185	int i; 
186	unsigned long pfn = table_end++, paddr; 
187	void *adr;
188
189	if (pfn >= end_pfn) 
190		panic("alloc_low_page: ran out of memory"); 
191	for (i = 0; temp_mappings[i].allocated; i++) {
192		if (!temp_mappings[i].pmd) 
193			panic("alloc_low_page: ran out of temp mappings"); 
194	} 
195	ti = &temp_mappings[i];
196	paddr = (pfn << PAGE_SHIFT) & PMD_MASK; 
197	set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); 
198	ti->allocated = 1; 
199	__flush_tlb(); 	       
200	adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 
201	*index = i; 
202	*phys  = pfn * PAGE_SIZE;  
203	return adr; 
204} 
205
206static __init void unmap_low_page(int i)
207{ 
208	struct temp_map *ti = &temp_mappings[i];
209	set_pmd(ti->pmd, __pmd(0));
210	ti->allocated = 0; 
211} 
212
213static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
214{ 
215	long i, j; 
216
217	i = pud_index(address);
218	pud = pud + i;
219	for (; i < PTRS_PER_PUD; pud++, i++) {
220		int map; 
221		unsigned long paddr, pmd_phys;
222		pmd_t *pmd;
223
224		paddr = address + i*PUD_SIZE;
225		if (paddr >= end) { 
226			for (; i < PTRS_PER_PUD; i++, pud++) 
227				set_pud(pud, __pud(0)); 
228			break;
229		} 
230
231		if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { 
232			set_pud(pud, __pud(0)); 
233			continue;
234		} 
235
236		pmd = alloc_low_page(&map, &pmd_phys);
237		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
238		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
239			unsigned long pe;
240
241			if (paddr >= end) { 
242				for (; j < PTRS_PER_PMD; j++, pmd++)
243					set_pmd(pmd,  __pmd(0)); 
244				break;
245		}
246			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
247			pe &= __supported_pte_mask;
248			set_pmd(pmd, __pmd(pe));
249		}
250		unmap_low_page(map);
251	}
252	__flush_tlb();
253} 
254
255static void __init find_early_table_space(unsigned long end)
256{
257	unsigned long puds, pmds, tables;
258
259	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
260	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
261	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
262		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
263
264	table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
265	if (table_start == -1UL)
266		panic("Cannot find space for the kernel page tables");
267
268	table_start >>= PAGE_SHIFT;
269	table_end = table_start;
270}
271
272/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
273   This runs before bootmem is initialized and gets pages directly from the 
274   physical memory. To access them they are temporarily mapped. */
275void __init init_memory_mapping(unsigned long start, unsigned long end)
276{ 
277	unsigned long next; 
278
279	Dprintk("init_memory_mapping\n");
280
281	/* 
282	 * Find space for the kernel direct mapping tables.
283	 * Later we should allocate these tables in the local node of the memory
284	 * mapped.  Unfortunately this is done currently before the nodes are 
285	 * discovered.
286	 */
287	find_early_table_space(end);
288
289	start = (unsigned long)__va(start);
290	end = (unsigned long)__va(end);
291
292	for (; start < end; start = next) {
293		int map;
294		unsigned long pud_phys; 
295		pud_t *pud = alloc_low_page(&map, &pud_phys);
296		next = start + PGDIR_SIZE;
297		if (next > end) 
298			next = end; 
299		phys_pud_init(pud, __pa(start), __pa(next));
300		set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
301		unmap_low_page(map);   
302	} 
303
304	asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
305	__flush_tlb_all();
306	early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
307	       table_start<<PAGE_SHIFT, 
308	       table_end<<PAGE_SHIFT);
309}
310
311extern struct x8664_pda cpu_pda[NR_CPUS];
312
313/* Assumes all CPUs still execute in init_mm */
314void zap_low_mappings(void)
315{
316	pgd_t *pgd = pgd_offset_k(0UL);
317	pgd_clear(pgd);
318	flush_tlb_all();
319}
320
321#ifndef CONFIG_DISCONTIGMEM
322void __init paging_init(void)
323{
324	{
325		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
326		unsigned int max_dma;
327
328		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
329
330		if (end_pfn < max_dma)
331			zones_size[ZONE_DMA] = end_pfn;
332		else {
333			zones_size[ZONE_DMA] = max_dma;
334			zones_size[ZONE_NORMAL] = end_pfn - max_dma;
335		}
336		free_area_init(zones_size);
337	}
338	return;
339}
340#endif
341
342/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
343   from the CPU leading to inconsistent cache lines. address and size
344   must be aligned to 2MB boundaries. 
345   Does nothing when the mapping doesn't exist. */
346void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
347{
348	unsigned long end = address + size;
349
350	BUG_ON(address & ~LARGE_PAGE_MASK);
351	BUG_ON(size & ~LARGE_PAGE_MASK); 
352	
353	for (; address < end; address += LARGE_PAGE_SIZE) { 
354		pgd_t *pgd = pgd_offset_k(address);
355		pud_t *pud;
356		pmd_t *pmd;
357		if (pgd_none(*pgd))
358			continue;
359		pud = pud_offset(pgd, address);
360		if (pud_none(*pud))
361			continue; 
362		pmd = pmd_offset(pud, address);
363		if (!pmd || pmd_none(*pmd))
364			continue; 
365		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
366			/* Could handle this, but it should not happen currently. */
367			printk(KERN_ERR 
368	       "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
369			pmd_ERROR(*pmd); 
370		}
371		set_pmd(pmd, __pmd(0)); 		
372	}
373	__flush_tlb_all();
374} 
375
376static inline int page_is_ram (unsigned long pagenr)
377{
378	int i;
379
380	for (i = 0; i < e820.nr_map; i++) {
381		unsigned long addr, end;
382
383		if (e820.map[i].type != E820_RAM)	/* not usable memory */
384			continue;
385		/*
386		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
387		 *	are not. Notably the 640->1Mb area. We need a sanity
388		 *	check here.
389		 */
390		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
391		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
392		if  ((pagenr >= addr) && (pagenr < end))
393			return 1;
394	}
395	return 0;
396}
397
398extern int swiotlb_force;
399
400static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
401			 kcore_vsyscall;
402
403void __init mem_init(void)
404{
405	int codesize, reservedpages, datasize, initsize;
406	int tmp;
407
408#ifdef CONFIG_SWIOTLB
409	if (swiotlb_force)
410		swiotlb = 1;
411	if (!iommu_aperture &&
412	    (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
413	       swiotlb = 1;
414	if (swiotlb)
415		swiotlb_init();	
416#endif
417
418	/* How many end-of-memory variables you have, grandma! */
419	max_low_pfn = end_pfn;
420	max_pfn = end_pfn;
421	num_physpages = end_pfn;
422	high_memory = (void *) __va(end_pfn * PAGE_SIZE);
423
424	/* clear the zero-page */
425	memset(empty_zero_page, 0, PAGE_SIZE);
426
427	reservedpages = 0;
428
429	/* this will put all low memory onto the freelists */
430#ifdef CONFIG_DISCONTIGMEM
431	totalram_pages += numa_free_all_bootmem();
432	tmp = 0;
433	/* should count reserved pages here for all nodes */ 
434#else
435	max_mapnr = end_pfn;
436	if (!mem_map) BUG();
437
438	totalram_pages += free_all_bootmem();
439
440	for (tmp = 0; tmp < end_pfn; tmp++)
441		/*
442		 * Only count reserved RAM pages
443		 */
444		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
445			reservedpages++;
446#endif
447
448	after_bootmem = 1;
449
450	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
451	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
452	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
453
454	/* Register memory areas for /proc/kcore */
455	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
456	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
457		   VMALLOC_END-VMALLOC_START);
458	kclist_add(&kcore_kernel, &_stext, _end - _stext);
459	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
460	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
461				 VSYSCALL_END - VSYSCALL_START);
462
463	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
464		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
465		end_pfn << (PAGE_SHIFT-10),
466		codesize >> 10,
467		reservedpages << (PAGE_SHIFT-10),
468		datasize >> 10,
469		initsize >> 10);
470
471	/*
472	 * Subtle. SMP is doing its boot stuff late (because it has to
473	 * fork idle threads) - but it also needs low mappings for the
474	 * protected-mode entry to work. We zap these entries only after
475	 * the WP-bit has been tested.
476	 */
477#ifndef CONFIG_SMP
478	zap_low_mappings();
479#endif
480}
481
482extern char __initdata_begin[], __initdata_end[];
483
484void free_initmem(void)
485{
486	unsigned long addr;
487
488	addr = (unsigned long)(&__init_begin);
489	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
490		ClearPageReserved(virt_to_page(addr));
491		set_page_count(virt_to_page(addr), 1);
492		memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 
493		free_page(addr);
494		totalram_pages++;
495	}
496	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
497	printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
498}
499
500#ifdef CONFIG_BLK_DEV_INITRD
501void free_initrd_mem(unsigned long start, unsigned long end)
502{
503	if (start < (unsigned long)&_end)
504		return;
505	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
506	for (; start < end; start += PAGE_SIZE) {
507		ClearPageReserved(virt_to_page(start));
508		set_page_count(virt_to_page(start), 1);
509		free_page(start);
510		totalram_pages++;
511	}
512}
513#endif
514
515void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
516{ 
517	/* Should check here against the e820 map to avoid double free */ 
518#ifdef CONFIG_DISCONTIGMEM
519	int nid = phys_to_nid(phys);
520  	reserve_bootmem_node(NODE_DATA(nid), phys, len);
521#else       		
522	reserve_bootmem(phys, len);    
523#endif
524}
525
526int kern_addr_valid(unsigned long addr) 
527{ 
528	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
529       pgd_t *pgd;
530       pud_t *pud;
531       pmd_t *pmd;
532       pte_t *pte;
533
534	if (above != 0 && above != -1UL)
535		return 0; 
536	
537	pgd = pgd_offset_k(addr);
538	if (pgd_none(*pgd))
539		return 0;
540
541	pud = pud_offset(pgd, addr);
542	if (pud_none(*pud))
543		return 0; 
544
545	pmd = pmd_offset(pud, addr);
546	if (pmd_none(*pmd))
547		return 0;
548	if (pmd_large(*pmd))
549		return pfn_valid(pmd_pfn(*pmd));
550
551	pte = pte_offset_kernel(pmd, addr);
552	if (pte_none(*pte))
553		return 0;
554	return pfn_valid(pte_pfn(*pte));
555}
556
557#ifdef CONFIG_SYSCTL
558#include <linux/sysctl.h>
559
560extern int exception_trace, page_fault_trace;
561
562static ctl_table debug_table2[] = {
563	{ 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
564	  proc_dointvec },
565#ifdef CONFIG_CHECKING
566	{ 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
567	  proc_dointvec },
568#endif
569	{ 0, }
570}; 
571
572static ctl_table debug_root_table2[] = { 
573	{ .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, 
574	   .child = debug_table2 }, 
575	{ 0 }, 
576}; 
577
578static __init int x8664_sysctl_init(void)
579{ 
580	register_sysctl_table(debug_root_table2, 1);
581	return 0;
582}
583__initcall(x8664_sysctl_init);
584#endif
585
586/* Pseudo VMAs to allow ptrace access for the vsyscall pages.  x86-64 has two
587   different ones: one for 32bit and one for 64bit. Use the appropiate
588   for the target task. */
589
590static struct vm_area_struct gate_vma = {
591	.vm_start = VSYSCALL_START,
592	.vm_end = VSYSCALL_END,
593	.vm_page_prot = PAGE_READONLY
594};
595
596static struct vm_area_struct gate32_vma = {
597	.vm_start = VSYSCALL32_BASE,
598	.vm_end = VSYSCALL32_END,
599	.vm_page_prot = PAGE_READONLY
600};
601
602struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
603{
604#ifdef CONFIG_IA32_EMULATION
605	if (test_tsk_thread_flag(tsk, TIF_IA32)) {
606		/* lookup code assumes the pages are present. set them up
607		   now */
608		if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0)
609			return NULL;
610		return &gate32_vma;
611	}
612#endif
613	return &gate_vma;
614}
615
616int in_gate_area(struct task_struct *task, unsigned long addr)
617{
618	struct vm_area_struct *vma = get_gate_vma(task);
619	return (addr >= vma->vm_start) && (addr < vma->vm_end);
620}
621
622/* Use this when you have no reliable task/vma, typically from interrupt
623 * context.  It is less reliable than using the task's vma and may give
624 * false positives.
625 */
626int in_gate_area_no_task(unsigned long addr)
627{
628	return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) ||
629		((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END)));
630}