PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/drivers/base/memory.c

https://github.com/dmitriy103/bravo_kernel-2.6.35
C | 575 lines | 405 code | 81 blank | 89 comment | 43 complexity | 132d175410756d0a293063d70f13c429 MD5 | raw file
  1. /*
  2. * drivers/base/memory.c - basic Memory class support
  3. *
  4. * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
  5. * Dave Hansen <haveblue@us.ibm.com>
  6. *
  7. * This file provides the necessary infrastructure to represent
  8. * a SPARSEMEM-memory-model system's physical memory in /sysfs.
  9. * All arch-independent code that assumes MEMORY_HOTPLUG requires
  10. * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  11. */
  12. #include <linux/sysdev.h>
  13. #include <linux/module.h>
  14. #include <linux/init.h>
  15. #include <linux/topology.h>
  16. #include <linux/capability.h>
  17. #include <linux/device.h>
  18. #include <linux/memory.h>
  19. #include <linux/kobject.h>
  20. #include <linux/memory_hotplug.h>
  21. #include <linux/mm.h>
  22. #include <linux/mutex.h>
  23. #include <linux/stat.h>
  24. #include <linux/slab.h>
  25. #include <asm/atomic.h>
  26. #include <asm/uaccess.h>
  27. #define MEMORY_CLASS_NAME "memory"
  28. static struct sysdev_class memory_sysdev_class = {
  29. .name = MEMORY_CLASS_NAME,
  30. };
  31. static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
  32. {
  33. return MEMORY_CLASS_NAME;
  34. }
  35. static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
  36. {
  37. int retval = 0;
  38. return retval;
  39. }
  40. static const struct kset_uevent_ops memory_uevent_ops = {
  41. .name = memory_uevent_name,
  42. .uevent = memory_uevent,
  43. };
  44. static BLOCKING_NOTIFIER_HEAD(memory_chain);
  45. int register_memory_notifier(struct notifier_block *nb)
  46. {
  47. return blocking_notifier_chain_register(&memory_chain, nb);
  48. }
  49. EXPORT_SYMBOL(register_memory_notifier);
  50. void unregister_memory_notifier(struct notifier_block *nb)
  51. {
  52. blocking_notifier_chain_unregister(&memory_chain, nb);
  53. }
  54. EXPORT_SYMBOL(unregister_memory_notifier);
  55. static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
  56. int register_memory_isolate_notifier(struct notifier_block *nb)
  57. {
  58. return atomic_notifier_chain_register(&memory_isolate_chain, nb);
  59. }
  60. EXPORT_SYMBOL(register_memory_isolate_notifier);
  61. void unregister_memory_isolate_notifier(struct notifier_block *nb)
  62. {
  63. atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
  64. }
  65. EXPORT_SYMBOL(unregister_memory_isolate_notifier);
  66. /*
  67. * register_memory - Setup a sysfs device for a memory block
  68. */
  69. static
  70. int register_memory(struct memory_block *memory, struct mem_section *section)
  71. {
  72. int error;
  73. memory->sysdev.cls = &memory_sysdev_class;
  74. memory->sysdev.id = __section_nr(section);
  75. error = sysdev_register(&memory->sysdev);
  76. return error;
  77. }
  78. static void
  79. unregister_memory(struct memory_block *memory, struct mem_section *section)
  80. {
  81. BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
  82. BUG_ON(memory->sysdev.id != __section_nr(section));
  83. /* drop the ref. we got in remove_memory_block() */
  84. kobject_put(&memory->sysdev.kobj);
  85. sysdev_unregister(&memory->sysdev);
  86. }
  87. /*
  88. * use this as the physical section index that this memsection
  89. * uses.
  90. */
  91. static ssize_t show_mem_phys_index(struct sys_device *dev,
  92. struct sysdev_attribute *attr, char *buf)
  93. {
  94. struct memory_block *mem =
  95. container_of(dev, struct memory_block, sysdev);
  96. return sprintf(buf, "%08lx\n", mem->phys_index);
  97. }
  98. /*
  99. * Show whether the section of memory is likely to be hot-removable
  100. */
  101. static ssize_t show_mem_removable(struct sys_device *dev,
  102. struct sysdev_attribute *attr, char *buf)
  103. {
  104. unsigned long start_pfn;
  105. int ret;
  106. struct memory_block *mem =
  107. container_of(dev, struct memory_block, sysdev);
  108. start_pfn = section_nr_to_pfn(mem->phys_index);
  109. ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
  110. return sprintf(buf, "%d\n", ret);
  111. }
  112. /*
  113. * online, offline, going offline, etc.
  114. */
  115. static ssize_t show_mem_state(struct sys_device *dev,
  116. struct sysdev_attribute *attr, char *buf)
  117. {
  118. struct memory_block *mem =
  119. container_of(dev, struct memory_block, sysdev);
  120. ssize_t len = 0;
  121. /*
  122. * We can probably put these states in a nice little array
  123. * so that they're not open-coded
  124. */
  125. switch (mem->state) {
  126. case MEM_ONLINE:
  127. len = sprintf(buf, "online\n");
  128. break;
  129. case MEM_OFFLINE:
  130. len = sprintf(buf, "offline\n");
  131. break;
  132. case MEM_GOING_OFFLINE:
  133. len = sprintf(buf, "going-offline\n");
  134. break;
  135. default:
  136. len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
  137. mem->state);
  138. WARN_ON(1);
  139. break;
  140. }
  141. return len;
  142. }
  143. int memory_notify(unsigned long val, void *v)
  144. {
  145. return blocking_notifier_call_chain(&memory_chain, val, v);
  146. }
  147. int memory_isolate_notify(unsigned long val, void *v)
  148. {
  149. return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
  150. }
  151. /*
  152. * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
  153. * OK to have direct references to sparsemem variables in here.
  154. */
  155. static int
  156. memory_block_action(struct memory_block *mem, unsigned long action)
  157. {
  158. int i;
  159. unsigned long psection;
  160. unsigned long start_pfn, start_paddr;
  161. struct page *first_page;
  162. int ret;
  163. int old_state = mem->state;
  164. psection = mem->phys_index;
  165. first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
  166. /*
  167. * The probe routines leave the pages reserved, just
  168. * as the bootmem code does. Make sure they're still
  169. * that way.
  170. */
  171. if (action == MEM_ONLINE) {
  172. for (i = 0; i < PAGES_PER_SECTION; i++) {
  173. if (PageReserved(first_page+i))
  174. continue;
  175. printk(KERN_WARNING "section number %ld page number %d "
  176. "not reserved, was it already online? \n",
  177. psection, i);
  178. return -EBUSY;
  179. }
  180. }
  181. switch (action) {
  182. case MEM_ONLINE:
  183. start_pfn = page_to_pfn(first_page);
  184. ret = online_pages(start_pfn, PAGES_PER_SECTION);
  185. break;
  186. case MEM_OFFLINE:
  187. mem->state = MEM_GOING_OFFLINE;
  188. start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
  189. ret = remove_memory(start_paddr,
  190. PAGES_PER_SECTION << PAGE_SHIFT);
  191. if (ret) {
  192. mem->state = old_state;
  193. break;
  194. }
  195. break;
  196. default:
  197. WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
  198. __func__, mem, action, action);
  199. ret = -EINVAL;
  200. }
  201. return ret;
  202. }
  203. static int memory_block_change_state(struct memory_block *mem,
  204. unsigned long to_state, unsigned long from_state_req)
  205. {
  206. int ret = 0;
  207. mutex_lock(&mem->state_mutex);
  208. if (mem->state != from_state_req) {
  209. ret = -EINVAL;
  210. goto out;
  211. }
  212. ret = memory_block_action(mem, to_state);
  213. if (!ret)
  214. mem->state = to_state;
  215. out:
  216. mutex_unlock(&mem->state_mutex);
  217. return ret;
  218. }
  219. static ssize_t
  220. store_mem_state(struct sys_device *dev,
  221. struct sysdev_attribute *attr, const char *buf, size_t count)
  222. {
  223. struct memory_block *mem;
  224. unsigned int phys_section_nr;
  225. int ret = -EINVAL;
  226. mem = container_of(dev, struct memory_block, sysdev);
  227. phys_section_nr = mem->phys_index;
  228. if (!present_section_nr(phys_section_nr))
  229. goto out;
  230. if (!strncmp(buf, "online", min((int)count, 6)))
  231. ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
  232. else if(!strncmp(buf, "offline", min((int)count, 7)))
  233. ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
  234. out:
  235. if (ret)
  236. return ret;
  237. return count;
  238. }
  239. /*
  240. * phys_device is a bad name for this. What I really want
  241. * is a way to differentiate between memory ranges that
  242. * are part of physical devices that constitute
  243. * a complete removable unit or fru.
  244. * i.e. do these ranges belong to the same physical device,
  245. * s.t. if I offline all of these sections I can then
  246. * remove the physical device?
  247. */
  248. static ssize_t show_phys_device(struct sys_device *dev,
  249. struct sysdev_attribute *attr, char *buf)
  250. {
  251. struct memory_block *mem =
  252. container_of(dev, struct memory_block, sysdev);
  253. return sprintf(buf, "%d\n", mem->phys_device);
  254. }
  255. static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
  256. static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
  257. static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
  258. static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
  259. #define mem_create_simple_file(mem, attr_name) \
  260. sysdev_create_file(&mem->sysdev, &attr_##attr_name)
  261. #define mem_remove_simple_file(mem, attr_name) \
  262. sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
  263. /*
  264. * Block size attribute stuff
  265. */
  266. static ssize_t
  267. print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
  268. char *buf)
  269. {
  270. return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
  271. }
  272. static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
  273. static int block_size_init(void)
  274. {
  275. return sysfs_create_file(&memory_sysdev_class.kset.kobj,
  276. &attr_block_size_bytes.attr);
  277. }
  278. /*
  279. * Some architectures will have custom drivers to do this, and
  280. * will not need to do it from userspace. The fake hot-add code
  281. * as well as ppc64 will do all of their discovery in userspace
  282. * and will require this interface.
  283. */
  284. #ifdef CONFIG_ARCH_MEMORY_PROBE
  285. static ssize_t
  286. memory_probe_store(struct class *class, struct class_attribute *attr,
  287. const char *buf, size_t count)
  288. {
  289. u64 phys_addr;
  290. int nid;
  291. int ret;
  292. phys_addr = simple_strtoull(buf, NULL, 0);
  293. nid = memory_add_physaddr_to_nid(phys_addr);
  294. ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
  295. if (ret)
  296. count = ret;
  297. return count;
  298. }
  299. static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
  300. static int memory_probe_init(void)
  301. {
  302. return sysfs_create_file(&memory_sysdev_class.kset.kobj,
  303. &class_attr_probe.attr);
  304. }
  305. #else
  306. static inline int memory_probe_init(void)
  307. {
  308. return 0;
  309. }
  310. #endif
  311. #ifdef CONFIG_MEMORY_FAILURE
  312. /*
  313. * Support for offlining pages of memory
  314. */
  315. /* Soft offline a page */
  316. static ssize_t
  317. store_soft_offline_page(struct class *class,
  318. struct class_attribute *attr,
  319. const char *buf, size_t count)
  320. {
  321. int ret;
  322. u64 pfn;
  323. if (!capable(CAP_SYS_ADMIN))
  324. return -EPERM;
  325. if (strict_strtoull(buf, 0, &pfn) < 0)
  326. return -EINVAL;
  327. pfn >>= PAGE_SHIFT;
  328. if (!pfn_valid(pfn))
  329. return -ENXIO;
  330. ret = soft_offline_page(pfn_to_page(pfn), 0);
  331. return ret == 0 ? count : ret;
  332. }
  333. /* Forcibly offline a page, including killing processes. */
  334. static ssize_t
  335. store_hard_offline_page(struct class *class,
  336. struct class_attribute *attr,
  337. const char *buf, size_t count)
  338. {
  339. int ret;
  340. u64 pfn;
  341. if (!capable(CAP_SYS_ADMIN))
  342. return -EPERM;
  343. if (strict_strtoull(buf, 0, &pfn) < 0)
  344. return -EINVAL;
  345. pfn >>= PAGE_SHIFT;
  346. ret = __memory_failure(pfn, 0, 0);
  347. return ret ? ret : count;
  348. }
  349. static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
  350. static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
  351. static __init int memory_fail_init(void)
  352. {
  353. int err;
  354. err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
  355. &class_attr_soft_offline_page.attr);
  356. if (!err)
  357. err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
  358. &class_attr_hard_offline_page.attr);
  359. return err;
  360. }
  361. #else
  362. static inline int memory_fail_init(void)
  363. {
  364. return 0;
  365. }
  366. #endif
  367. /*
  368. * Note that phys_device is optional. It is here to allow for
  369. * differentiation between which *physical* devices each
  370. * section belongs to...
  371. */
  372. int __weak arch_get_memory_phys_device(unsigned long start_pfn)
  373. {
  374. return 0;
  375. }
  376. static int add_memory_block(int nid, struct mem_section *section,
  377. unsigned long state, enum mem_add_context context)
  378. {
  379. struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
  380. unsigned long start_pfn;
  381. int ret = 0;
  382. if (!mem)
  383. return -ENOMEM;
  384. mem->phys_index = __section_nr(section);
  385. mem->state = state;
  386. mutex_init(&mem->state_mutex);
  387. start_pfn = section_nr_to_pfn(mem->phys_index);
  388. mem->phys_device = arch_get_memory_phys_device(start_pfn);
  389. ret = register_memory(mem, section);
  390. if (!ret)
  391. ret = mem_create_simple_file(mem, phys_index);
  392. if (!ret)
  393. ret = mem_create_simple_file(mem, state);
  394. if (!ret)
  395. ret = mem_create_simple_file(mem, phys_device);
  396. if (!ret)
  397. ret = mem_create_simple_file(mem, removable);
  398. if (!ret) {
  399. if (context == HOTPLUG)
  400. ret = register_mem_sect_under_node(mem, nid);
  401. }
  402. return ret;
  403. }
  404. /*
  405. * For now, we have a linear search to go find the appropriate
  406. * memory_block corresponding to a particular phys_index. If
  407. * this gets to be a real problem, we can always use a radix
  408. * tree or something here.
  409. *
  410. * This could be made generic for all sysdev classes.
  411. */
  412. struct memory_block *find_memory_block(struct mem_section *section)
  413. {
  414. struct kobject *kobj;
  415. struct sys_device *sysdev;
  416. struct memory_block *mem;
  417. char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
  418. /*
  419. * This only works because we know that section == sysdev->id
  420. * slightly redundant with sysdev_register()
  421. */
  422. sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
  423. kobj = kset_find_obj(&memory_sysdev_class.kset, name);
  424. if (!kobj)
  425. return NULL;
  426. sysdev = container_of(kobj, struct sys_device, kobj);
  427. mem = container_of(sysdev, struct memory_block, sysdev);
  428. return mem;
  429. }
  430. int remove_memory_block(unsigned long node_id, struct mem_section *section,
  431. int phys_device)
  432. {
  433. struct memory_block *mem;
  434. mem = find_memory_block(section);
  435. unregister_mem_sect_under_nodes(mem);
  436. mem_remove_simple_file(mem, phys_index);
  437. mem_remove_simple_file(mem, state);
  438. mem_remove_simple_file(mem, phys_device);
  439. mem_remove_simple_file(mem, removable);
  440. unregister_memory(mem, section);
  441. return 0;
  442. }
  443. /*
  444. * need an interface for the VM to add new memory regions,
  445. * but without onlining it.
  446. */
  447. int register_new_memory(int nid, struct mem_section *section)
  448. {
  449. return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
  450. }
  451. int unregister_memory_section(struct mem_section *section)
  452. {
  453. if (!present_section(section))
  454. return -EINVAL;
  455. return remove_memory_block(0, section, 0);
  456. }
  457. /*
  458. * Initialize the sysfs support for memory devices...
  459. */
  460. int __init memory_dev_init(void)
  461. {
  462. unsigned int i;
  463. int ret;
  464. int err;
  465. memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
  466. ret = sysdev_class_register(&memory_sysdev_class);
  467. if (ret)
  468. goto out;
  469. /*
  470. * Create entries for memory sections that were found
  471. * during boot and have been initialized
  472. */
  473. for (i = 0; i < NR_MEM_SECTIONS; i++) {
  474. if (!present_section_nr(i))
  475. continue;
  476. err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
  477. BOOT);
  478. if (!ret)
  479. ret = err;
  480. }
  481. err = memory_probe_init();
  482. if (!ret)
  483. ret = err;
  484. err = memory_fail_init();
  485. if (!ret)
  486. ret = err;
  487. err = block_size_init();
  488. if (!ret)
  489. ret = err;
  490. out:
  491. if (ret)
  492. printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
  493. return ret;
  494. }