PageRenderTime 41ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/arch/x86/entry/vdso/vma.c

https://gitlab.com/CadeLaRen/linux
C | 352 lines | 256 code | 53 blank | 43 comment | 44 complexity | bc77a981847be4da24a3e9499b7e92b0 MD5 | raw file
  1. /*
  2. * Copyright 2007 Andi Kleen, SUSE Labs.
  3. * Subject to the GPL, v.2
  4. *
  5. * This contains most of the x86 vDSO kernel-side code.
  6. */
  7. #include <linux/mm.h>
  8. #include <linux/err.h>
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. #include <linux/init.h>
  12. #include <linux/random.h>
  13. #include <linux/elf.h>
  14. #include <linux/cpu.h>
  15. #include <linux/ptrace.h>
  16. #include <asm/pvclock.h>
  17. #include <asm/vgtod.h>
  18. #include <asm/proto.h>
  19. #include <asm/vdso.h>
  20. #include <asm/vvar.h>
  21. #include <asm/page.h>
  22. #include <asm/desc.h>
  23. #include <asm/cpufeature.h>
  24. #if defined(CONFIG_X86_64)
  25. unsigned int __read_mostly vdso64_enabled = 1;
  26. #endif
  27. void __init init_vdso_image(const struct vdso_image *image)
  28. {
  29. BUG_ON(image->size % PAGE_SIZE != 0);
  30. apply_alternatives((struct alt_instr *)(image->data + image->alt),
  31. (struct alt_instr *)(image->data + image->alt +
  32. image->alt_len));
  33. }
  34. struct linux_binprm;
  35. /*
  36. * Put the vdso above the (randomized) stack with another randomized
  37. * offset. This way there is no hole in the middle of address space.
  38. * To save memory make sure it is still in the same PTE as the stack
  39. * top. This doesn't give that many random bits.
  40. *
  41. * Note that this algorithm is imperfect: the distribution of the vdso
  42. * start address within a PMD is biased toward the end.
  43. *
  44. * Only used for the 64-bit and x32 vdsos.
  45. */
  46. static unsigned long vdso_addr(unsigned long start, unsigned len)
  47. {
  48. #ifdef CONFIG_X86_32
  49. return 0;
  50. #else
  51. unsigned long addr, end;
  52. unsigned offset;
  53. /*
  54. * Round up the start address. It can start out unaligned as a result
  55. * of stack start randomization.
  56. */
  57. start = PAGE_ALIGN(start);
  58. /* Round the lowest possible end address up to a PMD boundary. */
  59. end = (start + len + PMD_SIZE - 1) & PMD_MASK;
  60. if (end >= TASK_SIZE_MAX)
  61. end = TASK_SIZE_MAX;
  62. end -= len;
  63. if (end > start) {
  64. offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
  65. addr = start + (offset << PAGE_SHIFT);
  66. } else {
  67. addr = start;
  68. }
  69. /*
  70. * Forcibly align the final address in case we have a hardware
  71. * issue that requires alignment for performance reasons.
  72. */
  73. addr = align_vdso_addr(addr);
  74. return addr;
  75. #endif
  76. }
  77. static int vdso_fault(const struct vm_special_mapping *sm,
  78. struct vm_area_struct *vma, struct vm_fault *vmf)
  79. {
  80. const struct vdso_image *image = vma->vm_mm->context.vdso_image;
  81. if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
  82. return VM_FAULT_SIGBUS;
  83. vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
  84. get_page(vmf->page);
  85. return 0;
  86. }
  87. static void vdso_fix_landing(const struct vdso_image *image,
  88. struct vm_area_struct *new_vma)
  89. {
  90. #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
  91. if (in_ia32_syscall() && image == &vdso_image_32) {
  92. struct pt_regs *regs = current_pt_regs();
  93. unsigned long vdso_land = image->sym_int80_landing_pad;
  94. unsigned long old_land_addr = vdso_land +
  95. (unsigned long)current->mm->context.vdso;
  96. /* Fixing userspace landing - look at do_fast_syscall_32 */
  97. if (regs->ip == old_land_addr)
  98. regs->ip = new_vma->vm_start + vdso_land;
  99. }
  100. #endif
  101. }
  102. static int vdso_mremap(const struct vm_special_mapping *sm,
  103. struct vm_area_struct *new_vma)
  104. {
  105. unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
  106. const struct vdso_image *image = current->mm->context.vdso_image;
  107. if (image->size != new_size)
  108. return -EINVAL;
  109. if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
  110. return -EFAULT;
  111. vdso_fix_landing(image, new_vma);
  112. current->mm->context.vdso = (void __user *)new_vma->vm_start;
  113. return 0;
  114. }
  115. static int vvar_fault(const struct vm_special_mapping *sm,
  116. struct vm_area_struct *vma, struct vm_fault *vmf)
  117. {
  118. const struct vdso_image *image = vma->vm_mm->context.vdso_image;
  119. long sym_offset;
  120. int ret = -EFAULT;
  121. if (!image)
  122. return VM_FAULT_SIGBUS;
  123. sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
  124. image->sym_vvar_start;
  125. /*
  126. * Sanity check: a symbol offset of zero means that the page
  127. * does not exist for this vdso image, not that the page is at
  128. * offset zero relative to the text mapping. This should be
  129. * impossible here, because sym_offset should only be zero for
  130. * the page past the end of the vvar mapping.
  131. */
  132. if (sym_offset == 0)
  133. return VM_FAULT_SIGBUS;
  134. if (sym_offset == image->sym_vvar_page) {
  135. ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
  136. __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
  137. } else if (sym_offset == image->sym_pvclock_page) {
  138. struct pvclock_vsyscall_time_info *pvti =
  139. pvclock_pvti_cpu0_va();
  140. if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
  141. ret = vm_insert_pfn(
  142. vma,
  143. (unsigned long)vmf->virtual_address,
  144. __pa(pvti) >> PAGE_SHIFT);
  145. }
  146. }
  147. if (ret == 0 || ret == -EBUSY)
  148. return VM_FAULT_NOPAGE;
  149. return VM_FAULT_SIGBUS;
  150. }
  151. static int map_vdso(const struct vdso_image *image, bool calculate_addr)
  152. {
  153. struct mm_struct *mm = current->mm;
  154. struct vm_area_struct *vma;
  155. unsigned long addr, text_start;
  156. int ret = 0;
  157. static const struct vm_special_mapping vdso_mapping = {
  158. .name = "[vdso]",
  159. .fault = vdso_fault,
  160. .mremap = vdso_mremap,
  161. };
  162. static const struct vm_special_mapping vvar_mapping = {
  163. .name = "[vvar]",
  164. .fault = vvar_fault,
  165. };
  166. if (calculate_addr) {
  167. addr = vdso_addr(current->mm->start_stack,
  168. image->size - image->sym_vvar_start);
  169. } else {
  170. addr = 0;
  171. }
  172. if (down_write_killable(&mm->mmap_sem))
  173. return -EINTR;
  174. addr = get_unmapped_area(NULL, addr,
  175. image->size - image->sym_vvar_start, 0, 0);
  176. if (IS_ERR_VALUE(addr)) {
  177. ret = addr;
  178. goto up_fail;
  179. }
  180. text_start = addr - image->sym_vvar_start;
  181. current->mm->context.vdso = (void __user *)text_start;
  182. current->mm->context.vdso_image = image;
  183. /*
  184. * MAYWRITE to allow gdb to COW and set breakpoints
  185. */
  186. vma = _install_special_mapping(mm,
  187. text_start,
  188. image->size,
  189. VM_READ|VM_EXEC|
  190. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  191. &vdso_mapping);
  192. if (IS_ERR(vma)) {
  193. ret = PTR_ERR(vma);
  194. goto up_fail;
  195. }
  196. vma = _install_special_mapping(mm,
  197. addr,
  198. -image->sym_vvar_start,
  199. VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
  200. VM_PFNMAP,
  201. &vvar_mapping);
  202. if (IS_ERR(vma)) {
  203. ret = PTR_ERR(vma);
  204. goto up_fail;
  205. }
  206. up_fail:
  207. if (ret)
  208. current->mm->context.vdso = NULL;
  209. up_write(&mm->mmap_sem);
  210. return ret;
  211. }
  212. #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
  213. static int load_vdso32(void)
  214. {
  215. if (vdso32_enabled != 1) /* Other values all mean "disabled" */
  216. return 0;
  217. return map_vdso(&vdso_image_32, false);
  218. }
  219. #endif
  220. #ifdef CONFIG_X86_64
  221. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  222. {
  223. if (!vdso64_enabled)
  224. return 0;
  225. return map_vdso(&vdso_image_64, true);
  226. }
  227. #ifdef CONFIG_COMPAT
  228. int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  229. int uses_interp)
  230. {
  231. #ifdef CONFIG_X86_X32_ABI
  232. if (test_thread_flag(TIF_X32)) {
  233. if (!vdso64_enabled)
  234. return 0;
  235. return map_vdso(&vdso_image_x32, true);
  236. }
  237. #endif
  238. #ifdef CONFIG_IA32_EMULATION
  239. return load_vdso32();
  240. #else
  241. return 0;
  242. #endif
  243. }
  244. #endif
  245. #else
  246. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  247. {
  248. return load_vdso32();
  249. }
  250. #endif
  251. #ifdef CONFIG_X86_64
  252. static __init int vdso_setup(char *s)
  253. {
  254. vdso64_enabled = simple_strtoul(s, NULL, 0);
  255. return 0;
  256. }
  257. __setup("vdso=", vdso_setup);
  258. #endif
  259. #ifdef CONFIG_X86_64
  260. static void vgetcpu_cpu_init(void *arg)
  261. {
  262. int cpu = smp_processor_id();
  263. struct desc_struct d = { };
  264. unsigned long node = 0;
  265. #ifdef CONFIG_NUMA
  266. node = cpu_to_node(cpu);
  267. #endif
  268. if (static_cpu_has(X86_FEATURE_RDTSCP))
  269. write_rdtscp_aux((node << 12) | cpu);
  270. /*
  271. * Store cpu number in limit so that it can be loaded
  272. * quickly in user space in vgetcpu. (12 bits for the CPU
  273. * and 8 bits for the node)
  274. */
  275. d.limit0 = cpu | ((node & 0xf) << 12);
  276. d.limit = node >> 4;
  277. d.type = 5; /* RO data, expand down, accessed */
  278. d.dpl = 3; /* Visible to user code */
  279. d.s = 1; /* Not a system segment */
  280. d.p = 1; /* Present */
  281. d.d = 1; /* 32-bit */
  282. write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
  283. }
  284. static int vgetcpu_online(unsigned int cpu)
  285. {
  286. return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
  287. }
  288. static int __init init_vdso(void)
  289. {
  290. init_vdso_image(&vdso_image_64);
  291. #ifdef CONFIG_X86_X32_ABI
  292. init_vdso_image(&vdso_image_x32);
  293. #endif
  294. /* notifier priority > KVM */
  295. return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
  296. "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
  297. }
  298. subsys_initcall(init_vdso);
  299. #endif /* CONFIG_X86_64 */