/arch/ia64/kernel/perfmon.c

https://bitbucket.org/evzijst/gittest · C · 6676 lines · 3710 code · 1108 blank · 1858 comment · 812 complexity · 1d0f29d4a70baa02df7e7151feb78ee8 MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. * This file implements the perfmon-2 subsystem which is used
  3. * to program the IA-64 Performance Monitoring Unit (PMU).
  4. *
  5. * The initial version of perfmon.c was written by
  6. * Ganesh Venkitachalam, IBM Corp.
  7. *
  8. * Then it was modified for perfmon-1.x by Stephane Eranian and
  9. * David Mosberger, Hewlett Packard Co.
  10. *
  11. * Version Perfmon-2.x is a rewrite of perfmon-1.x
  12. * by Stephane Eranian, Hewlett Packard Co.
  13. *
  14. * Copyright (C) 1999-2003, 2005 Hewlett Packard Co
  15. * Stephane Eranian <eranian@hpl.hp.com>
  16. * David Mosberger-Tang <davidm@hpl.hp.com>
  17. *
  18. * More information about perfmon available at:
  19. * http://www.hpl.hp.com/research/linux/perfmon
  20. */
  21. #include <linux/config.h>
  22. #include <linux/module.h>
  23. #include <linux/kernel.h>
  24. #include <linux/sched.h>
  25. #include <linux/interrupt.h>
  26. #include <linux/smp_lock.h>
  27. #include <linux/proc_fs.h>
  28. #include <linux/seq_file.h>
  29. #include <linux/init.h>
  30. #include <linux/vmalloc.h>
  31. #include <linux/mm.h>
  32. #include <linux/sysctl.h>
  33. #include <linux/list.h>
  34. #include <linux/file.h>
  35. #include <linux/poll.h>
  36. #include <linux/vfs.h>
  37. #include <linux/pagemap.h>
  38. #include <linux/mount.h>
  39. #include <linux/version.h>
  40. #include <linux/bitops.h>
  41. #include <asm/errno.h>
  42. #include <asm/intrinsics.h>
  43. #include <asm/page.h>
  44. #include <asm/perfmon.h>
  45. #include <asm/processor.h>
  46. #include <asm/signal.h>
  47. #include <asm/system.h>
  48. #include <asm/uaccess.h>
  49. #include <asm/delay.h>
  50. #ifdef CONFIG_PERFMON
  51. /*
  52. * perfmon context state
  53. */
  54. #define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
  55. #define PFM_CTX_LOADED 2 /* context is loaded onto a task */
  56. #define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
  57. #define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
  58. #define PFM_INVALID_ACTIVATION (~0UL)
  59. /*
  60. * depth of message queue
  61. */
  62. #define PFM_MAX_MSGS 32
  63. #define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
  64. /*
  65. * type of a PMU register (bitmask).
  66. * bitmask structure:
  67. * bit0 : register implemented
  68. * bit1 : end marker
  69. * bit2-3 : reserved
  70. * bit4 : pmc has pmc.pm
  71. * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
  72. * bit6-7 : register type
  73. * bit8-31: reserved
  74. */
  75. #define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
  76. #define PFM_REG_IMPL 0x1 /* register implemented */
  77. #define PFM_REG_END 0x2 /* end marker */
  78. #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
  79. #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
  80. #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
  81. #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
  82. #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
  83. #define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
  84. #define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
  85. #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
  86. /* i assumed unsigned */
  87. #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
  88. #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
  89. /* XXX: these assume that register i is implemented */
  90. #define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
  91. #define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
  92. #define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
  93. #define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
  94. #define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
  95. #define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
  96. #define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
  97. #define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
  98. #define PFM_NUM_IBRS IA64_NUM_DBG_REGS
  99. #define PFM_NUM_DBRS IA64_NUM_DBG_REGS
  100. #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
  101. #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
  102. #define PFM_CTX_TASK(h) (h)->ctx_task
  103. #define PMU_PMC_OI 5 /* position of pmc.oi bit */
  104. /* XXX: does not support more than 64 PMDs */
  105. #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
  106. #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
  107. #define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
  108. #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
  109. #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
  110. #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
  111. #define PFM_CODE_RR 0 /* requesting code range restriction */
  112. #define PFM_DATA_RR 1 /* requestion data range restriction */
  113. #define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
  114. #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
  115. #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
  116. #define RDEP(x) (1UL<<(x))
  117. /*
  118. * context protection macros
  119. * in SMP:
  120. * - we need to protect against CPU concurrency (spin_lock)
  121. * - we need to protect against PMU overflow interrupts (local_irq_disable)
  122. * in UP:
  123. * - we need to protect against PMU overflow interrupts (local_irq_disable)
  124. *
  125. * spin_lock_irqsave()/spin_lock_irqrestore():
  126. * in SMP: local_irq_disable + spin_lock
  127. * in UP : local_irq_disable
  128. *
  129. * spin_lock()/spin_lock():
  130. * in UP : removed automatically
  131. * in SMP: protect against context accesses from other CPU. interrupts
  132. * are not masked. This is useful for the PMU interrupt handler
  133. * because we know we will not get PMU concurrency in that code.
  134. */
  135. #define PROTECT_CTX(c, f) \
  136. do { \
  137. DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
  138. spin_lock_irqsave(&(c)->ctx_lock, f); \
  139. DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
  140. } while(0)
  141. #define UNPROTECT_CTX(c, f) \
  142. do { \
  143. DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
  144. spin_unlock_irqrestore(&(c)->ctx_lock, f); \
  145. } while(0)
  146. #define PROTECT_CTX_NOPRINT(c, f) \
  147. do { \
  148. spin_lock_irqsave(&(c)->ctx_lock, f); \
  149. } while(0)
  150. #define UNPROTECT_CTX_NOPRINT(c, f) \
  151. do { \
  152. spin_unlock_irqrestore(&(c)->ctx_lock, f); \
  153. } while(0)
  154. #define PROTECT_CTX_NOIRQ(c) \
  155. do { \
  156. spin_lock(&(c)->ctx_lock); \
  157. } while(0)
  158. #define UNPROTECT_CTX_NOIRQ(c) \
  159. do { \
  160. spin_unlock(&(c)->ctx_lock); \
  161. } while(0)
  162. #ifdef CONFIG_SMP
  163. #define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
  164. #define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
  165. #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
  166. #else /* !CONFIG_SMP */
  167. #define SET_ACTIVATION(t) do {} while(0)
  168. #define GET_ACTIVATION(t) do {} while(0)
  169. #define INC_ACTIVATION(t) do {} while(0)
  170. #endif /* CONFIG_SMP */
  171. #define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
  172. #define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
  173. #define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
  174. #define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
  175. #define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
  176. #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
  177. /*
  178. * cmp0 must be the value of pmc0
  179. */
  180. #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
  181. #define PFMFS_MAGIC 0xa0b4d889
  182. /*
  183. * debugging
  184. */
  185. #define PFM_DEBUGGING 1
  186. #ifdef PFM_DEBUGGING
  187. #define DPRINT(a) \
  188. do { \
  189. if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
  190. } while (0)
  191. #define DPRINT_ovfl(a) \
  192. do { \
  193. if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
  194. } while (0)
  195. #endif
  196. /*
  197. * 64-bit software counter structure
  198. *
  199. * the next_reset_type is applied to the next call to pfm_reset_regs()
  200. */
  201. typedef struct {
  202. unsigned long val; /* virtual 64bit counter value */
  203. unsigned long lval; /* last reset value */
  204. unsigned long long_reset; /* reset value on sampling overflow */
  205. unsigned long short_reset; /* reset value on overflow */
  206. unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
  207. unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
  208. unsigned long seed; /* seed for random-number generator */
  209. unsigned long mask; /* mask for random-number generator */
  210. unsigned int flags; /* notify/do not notify */
  211. unsigned long eventid; /* overflow event identifier */
  212. } pfm_counter_t;
  213. /*
  214. * context flags
  215. */
  216. typedef struct {
  217. unsigned int block:1; /* when 1, task will blocked on user notifications */
  218. unsigned int system:1; /* do system wide monitoring */
  219. unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
  220. unsigned int is_sampling:1; /* true if using a custom format */
  221. unsigned int excl_idle:1; /* exclude idle task in system wide session */
  222. unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
  223. unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
  224. unsigned int no_msg:1; /* no message sent on overflow */
  225. unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
  226. unsigned int reserved:22;
  227. } pfm_context_flags_t;
  228. #define PFM_TRAP_REASON_NONE 0x0 /* default value */
  229. #define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
  230. #define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
  231. /*
  232. * perfmon context: encapsulates all the state of a monitoring session
  233. */
  234. typedef struct pfm_context {
  235. spinlock_t ctx_lock; /* context protection */
  236. pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
  237. unsigned int ctx_state; /* state: active/inactive (no bitfield) */
  238. struct task_struct *ctx_task; /* task to which context is attached */
  239. unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
  240. struct semaphore ctx_restart_sem; /* use for blocking notification mode */
  241. unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
  242. unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
  243. unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
  244. unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
  245. unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
  246. unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
  247. unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
  248. unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
  249. unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
  250. unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
  251. unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
  252. pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
  253. u64 ctx_saved_psr_up; /* only contains psr.up value */
  254. unsigned long ctx_last_activation; /* context last activation number for last_cpu */
  255. unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
  256. unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
  257. int ctx_fd; /* file descriptor used my this context */
  258. pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
  259. pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
  260. void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
  261. unsigned long ctx_smpl_size; /* size of sampling buffer */
  262. void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
  263. wait_queue_head_t ctx_msgq_wait;
  264. pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
  265. int ctx_msgq_head;
  266. int ctx_msgq_tail;
  267. struct fasync_struct *ctx_async_queue;
  268. wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
  269. } pfm_context_t;
  270. /*
  271. * magic number used to verify that structure is really
  272. * a perfmon context
  273. */
  274. #define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
  275. #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
  276. #ifdef CONFIG_SMP
  277. #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
  278. #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
  279. #else
  280. #define SET_LAST_CPU(ctx, v) do {} while(0)
  281. #define GET_LAST_CPU(ctx) do {} while(0)
  282. #endif
  283. #define ctx_fl_block ctx_flags.block
  284. #define ctx_fl_system ctx_flags.system
  285. #define ctx_fl_using_dbreg ctx_flags.using_dbreg
  286. #define ctx_fl_is_sampling ctx_flags.is_sampling
  287. #define ctx_fl_excl_idle ctx_flags.excl_idle
  288. #define ctx_fl_going_zombie ctx_flags.going_zombie
  289. #define ctx_fl_trap_reason ctx_flags.trap_reason
  290. #define ctx_fl_no_msg ctx_flags.no_msg
  291. #define ctx_fl_can_restart ctx_flags.can_restart
  292. #define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
  293. #define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
  294. /*
  295. * global information about all sessions
  296. * mostly used to synchronize between system wide and per-process
  297. */
  298. typedef struct {
  299. spinlock_t pfs_lock; /* lock the structure */
  300. unsigned int pfs_task_sessions; /* number of per task sessions */
  301. unsigned int pfs_sys_sessions; /* number of per system wide sessions */
  302. unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
  303. unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
  304. struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
  305. } pfm_session_t;
  306. /*
  307. * information about a PMC or PMD.
  308. * dep_pmd[]: a bitmask of dependent PMD registers
  309. * dep_pmc[]: a bitmask of dependent PMC registers
  310. */
  311. typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
  312. typedef struct {
  313. unsigned int type;
  314. int pm_pos;
  315. unsigned long default_value; /* power-on default value */
  316. unsigned long reserved_mask; /* bitmask of reserved bits */
  317. pfm_reg_check_t read_check;
  318. pfm_reg_check_t write_check;
  319. unsigned long dep_pmd[4];
  320. unsigned long dep_pmc[4];
  321. } pfm_reg_desc_t;
  322. /* assume cnum is a valid monitor */
  323. #define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
  324. /*
  325. * This structure is initialized at boot time and contains
  326. * a description of the PMU main characteristics.
  327. *
  328. * If the probe function is defined, detection is based
  329. * on its return value:
  330. * - 0 means recognized PMU
  331. * - anything else means not supported
  332. * When the probe function is not defined, then the pmu_family field
  333. * is used and it must match the host CPU family such that:
  334. * - cpu->family & config->pmu_family != 0
  335. */
  336. typedef struct {
  337. unsigned long ovfl_val; /* overflow value for counters */
  338. pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
  339. pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
  340. unsigned int num_pmcs; /* number of PMCS: computed at init time */
  341. unsigned int num_pmds; /* number of PMDS: computed at init time */
  342. unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
  343. unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
  344. char *pmu_name; /* PMU family name */
  345. unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
  346. unsigned int flags; /* pmu specific flags */
  347. unsigned int num_ibrs; /* number of IBRS: computed at init time */
  348. unsigned int num_dbrs; /* number of DBRS: computed at init time */
  349. unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
  350. int (*probe)(void); /* customized probe routine */
  351. unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
  352. } pmu_config_t;
  353. /*
  354. * PMU specific flags
  355. */
  356. #define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
  357. /*
  358. * debug register related type definitions
  359. */
  360. typedef struct {
  361. unsigned long ibr_mask:56;
  362. unsigned long ibr_plm:4;
  363. unsigned long ibr_ig:3;
  364. unsigned long ibr_x:1;
  365. } ibr_mask_reg_t;
  366. typedef struct {
  367. unsigned long dbr_mask:56;
  368. unsigned long dbr_plm:4;
  369. unsigned long dbr_ig:2;
  370. unsigned long dbr_w:1;
  371. unsigned long dbr_r:1;
  372. } dbr_mask_reg_t;
  373. typedef union {
  374. unsigned long val;
  375. ibr_mask_reg_t ibr;
  376. dbr_mask_reg_t dbr;
  377. } dbreg_t;
  378. /*
  379. * perfmon command descriptions
  380. */
  381. typedef struct {
  382. int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
  383. char *cmd_name;
  384. int cmd_flags;
  385. unsigned int cmd_narg;
  386. size_t cmd_argsize;
  387. int (*cmd_getsize)(void *arg, size_t *sz);
  388. } pfm_cmd_desc_t;
  389. #define PFM_CMD_FD 0x01 /* command requires a file descriptor */
  390. #define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
  391. #define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
  392. #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
  393. #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
  394. #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
  395. #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
  396. #define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
  397. #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
  398. #define PFM_CMD_ARG_MANY -1 /* cannot be zero */
  399. typedef struct {
  400. int debug; /* turn on/off debugging via syslog */
  401. int debug_ovfl; /* turn on/off debug printk in overflow handler */
  402. int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
  403. int expert_mode; /* turn on/off value checking */
  404. int debug_pfm_read;
  405. } pfm_sysctl_t;
  406. typedef struct {
  407. unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
  408. unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
  409. unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
  410. unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
  411. unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
  412. unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
  413. unsigned long pfm_smpl_handler_calls;
  414. unsigned long pfm_smpl_handler_cycles;
  415. char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
  416. } pfm_stats_t;
  417. /*
  418. * perfmon internal variables
  419. */
  420. static pfm_stats_t pfm_stats[NR_CPUS];
  421. static pfm_session_t pfm_sessions; /* global sessions information */
  422. static struct proc_dir_entry *perfmon_dir;
  423. static pfm_uuid_t pfm_null_uuid = {0,};
  424. static spinlock_t pfm_buffer_fmt_lock;
  425. static LIST_HEAD(pfm_buffer_fmt_list);
  426. static pmu_config_t *pmu_conf;
  427. /* sysctl() controls */
  428. static pfm_sysctl_t pfm_sysctl;
  429. int pfm_debug_var;
  430. static ctl_table pfm_ctl_table[]={
  431. {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
  432. {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
  433. {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
  434. {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
  435. { 0, },
  436. };
  437. static ctl_table pfm_sysctl_dir[] = {
  438. {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
  439. {0,},
  440. };
  441. static ctl_table pfm_sysctl_root[] = {
  442. {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
  443. {0,},
  444. };
  445. static struct ctl_table_header *pfm_sysctl_header;
  446. static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
  447. static int pfm_flush(struct file *filp);
  448. #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
  449. #define pfm_get_cpu_data(a,b) per_cpu(a, b)
  450. static inline void
  451. pfm_put_task(struct task_struct *task)
  452. {
  453. if (task != current) put_task_struct(task);
  454. }
  455. static inline void
  456. pfm_set_task_notify(struct task_struct *task)
  457. {
  458. struct thread_info *info;
  459. info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
  460. set_bit(TIF_NOTIFY_RESUME, &info->flags);
  461. }
  462. static inline void
  463. pfm_clear_task_notify(void)
  464. {
  465. clear_thread_flag(TIF_NOTIFY_RESUME);
  466. }
  467. static inline void
  468. pfm_reserve_page(unsigned long a)
  469. {
  470. SetPageReserved(vmalloc_to_page((void *)a));
  471. }
  472. static inline void
  473. pfm_unreserve_page(unsigned long a)
  474. {
  475. ClearPageReserved(vmalloc_to_page((void*)a));
  476. }
  477. static inline unsigned long
  478. pfm_protect_ctx_ctxsw(pfm_context_t *x)
  479. {
  480. spin_lock(&(x)->ctx_lock);
  481. return 0UL;
  482. }
  483. static inline unsigned long
  484. pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
  485. {
  486. spin_unlock(&(x)->ctx_lock);
  487. }
  488. static inline unsigned int
  489. pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
  490. {
  491. return do_munmap(mm, addr, len);
  492. }
  493. static inline unsigned long
  494. pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
  495. {
  496. return get_unmapped_area(file, addr, len, pgoff, flags);
  497. }
  498. static struct super_block *
  499. pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
  500. {
  501. return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
  502. }
  503. static struct file_system_type pfm_fs_type = {
  504. .name = "pfmfs",
  505. .get_sb = pfmfs_get_sb,
  506. .kill_sb = kill_anon_super,
  507. };
  508. DEFINE_PER_CPU(unsigned long, pfm_syst_info);
  509. DEFINE_PER_CPU(struct task_struct *, pmu_owner);
  510. DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
  511. DEFINE_PER_CPU(unsigned long, pmu_activation_number);
  512. /* forward declaration */
  513. static struct file_operations pfm_file_ops;
  514. /*
  515. * forward declarations
  516. */
  517. #ifndef CONFIG_SMP
  518. static void pfm_lazy_save_regs (struct task_struct *ta);
  519. #endif
  520. void dump_pmu_state(const char *);
  521. static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
  522. #include "perfmon_itanium.h"
  523. #include "perfmon_mckinley.h"
  524. #include "perfmon_generic.h"
  525. static pmu_config_t *pmu_confs[]={
  526. &pmu_conf_mck,
  527. &pmu_conf_ita,
  528. &pmu_conf_gen, /* must be last */
  529. NULL
  530. };
  531. static int pfm_end_notify_user(pfm_context_t *ctx);
  532. static inline void
  533. pfm_clear_psr_pp(void)
  534. {
  535. ia64_rsm(IA64_PSR_PP);
  536. ia64_srlz_i();
  537. }
  538. static inline void
  539. pfm_set_psr_pp(void)
  540. {
  541. ia64_ssm(IA64_PSR_PP);
  542. ia64_srlz_i();
  543. }
  544. static inline void
  545. pfm_clear_psr_up(void)
  546. {
  547. ia64_rsm(IA64_PSR_UP);
  548. ia64_srlz_i();
  549. }
  550. static inline void
  551. pfm_set_psr_up(void)
  552. {
  553. ia64_ssm(IA64_PSR_UP);
  554. ia64_srlz_i();
  555. }
  556. static inline unsigned long
  557. pfm_get_psr(void)
  558. {
  559. unsigned long tmp;
  560. tmp = ia64_getreg(_IA64_REG_PSR);
  561. ia64_srlz_i();
  562. return tmp;
  563. }
  564. static inline void
  565. pfm_set_psr_l(unsigned long val)
  566. {
  567. ia64_setreg(_IA64_REG_PSR_L, val);
  568. ia64_srlz_i();
  569. }
  570. static inline void
  571. pfm_freeze_pmu(void)
  572. {
  573. ia64_set_pmc(0,1UL);
  574. ia64_srlz_d();
  575. }
  576. static inline void
  577. pfm_unfreeze_pmu(void)
  578. {
  579. ia64_set_pmc(0,0UL);
  580. ia64_srlz_d();
  581. }
  582. static inline void
  583. pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
  584. {
  585. int i;
  586. for (i=0; i < nibrs; i++) {
  587. ia64_set_ibr(i, ibrs[i]);
  588. ia64_dv_serialize_instruction();
  589. }
  590. ia64_srlz_i();
  591. }
  592. static inline void
  593. pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
  594. {
  595. int i;
  596. for (i=0; i < ndbrs; i++) {
  597. ia64_set_dbr(i, dbrs[i]);
  598. ia64_dv_serialize_data();
  599. }
  600. ia64_srlz_d();
  601. }
  602. /*
  603. * PMD[i] must be a counter. no check is made
  604. */
  605. static inline unsigned long
  606. pfm_read_soft_counter(pfm_context_t *ctx, int i)
  607. {
  608. return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
  609. }
  610. /*
  611. * PMD[i] must be a counter. no check is made
  612. */
  613. static inline void
  614. pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
  615. {
  616. unsigned long ovfl_val = pmu_conf->ovfl_val;
  617. ctx->ctx_pmds[i].val = val & ~ovfl_val;
  618. /*
  619. * writing to unimplemented part is ignore, so we do not need to
  620. * mask off top part
  621. */
  622. ia64_set_pmd(i, val & ovfl_val);
  623. }
  624. static pfm_msg_t *
  625. pfm_get_new_msg(pfm_context_t *ctx)
  626. {
  627. int idx, next;
  628. next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
  629. DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
  630. if (next == ctx->ctx_msgq_head) return NULL;
  631. idx = ctx->ctx_msgq_tail;
  632. ctx->ctx_msgq_tail = next;
  633. DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
  634. return ctx->ctx_msgq+idx;
  635. }
  636. static pfm_msg_t *
  637. pfm_get_next_msg(pfm_context_t *ctx)
  638. {
  639. pfm_msg_t *msg;
  640. DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
  641. if (PFM_CTXQ_EMPTY(ctx)) return NULL;
  642. /*
  643. * get oldest message
  644. */
  645. msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
  646. /*
  647. * and move forward
  648. */
  649. ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
  650. DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
  651. return msg;
  652. }
  653. static void
  654. pfm_reset_msgq(pfm_context_t *ctx)
  655. {
  656. ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
  657. DPRINT(("ctx=%p msgq reset\n", ctx));
  658. }
  659. static void *
  660. pfm_rvmalloc(unsigned long size)
  661. {
  662. void *mem;
  663. unsigned long addr;
  664. size = PAGE_ALIGN(size);
  665. mem = vmalloc(size);
  666. if (mem) {
  667. //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
  668. memset(mem, 0, size);
  669. addr = (unsigned long)mem;
  670. while (size > 0) {
  671. pfm_reserve_page(addr);
  672. addr+=PAGE_SIZE;
  673. size-=PAGE_SIZE;
  674. }
  675. }
  676. return mem;
  677. }
  678. static void
  679. pfm_rvfree(void *mem, unsigned long size)
  680. {
  681. unsigned long addr;
  682. if (mem) {
  683. DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
  684. addr = (unsigned long) mem;
  685. while ((long) size > 0) {
  686. pfm_unreserve_page(addr);
  687. addr+=PAGE_SIZE;
  688. size-=PAGE_SIZE;
  689. }
  690. vfree(mem);
  691. }
  692. return;
  693. }
  694. static pfm_context_t *
  695. pfm_context_alloc(void)
  696. {
  697. pfm_context_t *ctx;
  698. /*
  699. * allocate context descriptor
  700. * must be able to free with interrupts disabled
  701. */
  702. ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
  703. if (ctx) {
  704. memset(ctx, 0, sizeof(pfm_context_t));
  705. DPRINT(("alloc ctx @%p\n", ctx));
  706. }
  707. return ctx;
  708. }
  709. static void
  710. pfm_context_free(pfm_context_t *ctx)
  711. {
  712. if (ctx) {
  713. DPRINT(("free ctx @%p\n", ctx));
  714. kfree(ctx);
  715. }
  716. }
  717. static void
  718. pfm_mask_monitoring(struct task_struct *task)
  719. {
  720. pfm_context_t *ctx = PFM_GET_CTX(task);
  721. struct thread_struct *th = &task->thread;
  722. unsigned long mask, val, ovfl_mask;
  723. int i;
  724. DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
  725. ovfl_mask = pmu_conf->ovfl_val;
  726. /*
  727. * monitoring can only be masked as a result of a valid
  728. * counter overflow. In UP, it means that the PMU still
  729. * has an owner. Note that the owner can be different
  730. * from the current task. However the PMU state belongs
  731. * to the owner.
  732. * In SMP, a valid overflow only happens when task is
  733. * current. Therefore if we come here, we know that
  734. * the PMU state belongs to the current task, therefore
  735. * we can access the live registers.
  736. *
  737. * So in both cases, the live register contains the owner's
  738. * state. We can ONLY touch the PMU registers and NOT the PSR.
  739. *
  740. * As a consequence to this call, the thread->pmds[] array
  741. * contains stale information which must be ignored
  742. * when context is reloaded AND monitoring is active (see
  743. * pfm_restart).
  744. */
  745. mask = ctx->ctx_used_pmds[0];
  746. for (i = 0; mask; i++, mask>>=1) {
  747. /* skip non used pmds */
  748. if ((mask & 0x1) == 0) continue;
  749. val = ia64_get_pmd(i);
  750. if (PMD_IS_COUNTING(i)) {
  751. /*
  752. * we rebuild the full 64 bit value of the counter
  753. */
  754. ctx->ctx_pmds[i].val += (val & ovfl_mask);
  755. } else {
  756. ctx->ctx_pmds[i].val = val;
  757. }
  758. DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
  759. i,
  760. ctx->ctx_pmds[i].val,
  761. val & ovfl_mask));
  762. }
  763. /*
  764. * mask monitoring by setting the privilege level to 0
  765. * we cannot use psr.pp/psr.up for this, it is controlled by
  766. * the user
  767. *
  768. * if task is current, modify actual registers, otherwise modify
  769. * thread save state, i.e., what will be restored in pfm_load_regs()
  770. */
  771. mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
  772. for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
  773. if ((mask & 0x1) == 0UL) continue;
  774. ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
  775. th->pmcs[i] &= ~0xfUL;
  776. DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
  777. }
  778. /*
  779. * make all of this visible
  780. */
  781. ia64_srlz_d();
  782. }
  783. /*
  784. * must always be done with task == current
  785. *
  786. * context must be in MASKED state when calling
  787. */
  788. static void
  789. pfm_restore_monitoring(struct task_struct *task)
  790. {
  791. pfm_context_t *ctx = PFM_GET_CTX(task);
  792. struct thread_struct *th = &task->thread;
  793. unsigned long mask, ovfl_mask;
  794. unsigned long psr, val;
  795. int i, is_system;
  796. is_system = ctx->ctx_fl_system;
  797. ovfl_mask = pmu_conf->ovfl_val;
  798. if (task != current) {
  799. printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
  800. return;
  801. }
  802. if (ctx->ctx_state != PFM_CTX_MASKED) {
  803. printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
  804. task->pid, current->pid, ctx->ctx_state);
  805. return;
  806. }
  807. psr = pfm_get_psr();
  808. /*
  809. * monitoring is masked via the PMC.
  810. * As we restore their value, we do not want each counter to
  811. * restart right away. We stop monitoring using the PSR,
  812. * restore the PMC (and PMD) and then re-establish the psr
  813. * as it was. Note that there can be no pending overflow at
  814. * this point, because monitoring was MASKED.
  815. *
  816. * system-wide session are pinned and self-monitoring
  817. */
  818. if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
  819. /* disable dcr pp */
  820. ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
  821. pfm_clear_psr_pp();
  822. } else {
  823. pfm_clear_psr_up();
  824. }
  825. /*
  826. * first, we restore the PMD
  827. */
  828. mask = ctx->ctx_used_pmds[0];
  829. for (i = 0; mask; i++, mask>>=1) {
  830. /* skip non used pmds */
  831. if ((mask & 0x1) == 0) continue;
  832. if (PMD_IS_COUNTING(i)) {
  833. /*
  834. * we split the 64bit value according to
  835. * counter width
  836. */
  837. val = ctx->ctx_pmds[i].val & ovfl_mask;
  838. ctx->ctx_pmds[i].val &= ~ovfl_mask;
  839. } else {
  840. val = ctx->ctx_pmds[i].val;
  841. }
  842. ia64_set_pmd(i, val);
  843. DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
  844. i,
  845. ctx->ctx_pmds[i].val,
  846. val));
  847. }
  848. /*
  849. * restore the PMCs
  850. */
  851. mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
  852. for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
  853. if ((mask & 0x1) == 0UL) continue;
  854. th->pmcs[i] = ctx->ctx_pmcs[i];
  855. ia64_set_pmc(i, th->pmcs[i]);
  856. DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
  857. }
  858. ia64_srlz_d();
  859. /*
  860. * must restore DBR/IBR because could be modified while masked
  861. * XXX: need to optimize
  862. */
  863. if (ctx->ctx_fl_using_dbreg) {
  864. pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
  865. pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
  866. }
  867. /*
  868. * now restore PSR
  869. */
  870. if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
  871. /* enable dcr pp */
  872. ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
  873. ia64_srlz_i();
  874. }
  875. pfm_set_psr_l(psr);
  876. }
  877. static inline void
  878. pfm_save_pmds(unsigned long *pmds, unsigned long mask)
  879. {
  880. int i;
  881. ia64_srlz_d();
  882. for (i=0; mask; i++, mask>>=1) {
  883. if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
  884. }
  885. }
  886. /*
  887. * reload from thread state (used for ctxw only)
  888. */
  889. static inline void
  890. pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
  891. {
  892. int i;
  893. unsigned long val, ovfl_val = pmu_conf->ovfl_val;
  894. for (i=0; mask; i++, mask>>=1) {
  895. if ((mask & 0x1) == 0) continue;
  896. val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
  897. ia64_set_pmd(i, val);
  898. }
  899. ia64_srlz_d();
  900. }
  901. /*
  902. * propagate PMD from context to thread-state
  903. */
  904. static inline void
  905. pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
  906. {
  907. struct thread_struct *thread = &task->thread;
  908. unsigned long ovfl_val = pmu_conf->ovfl_val;
  909. unsigned long mask = ctx->ctx_all_pmds[0];
  910. unsigned long val;
  911. int i;
  912. DPRINT(("mask=0x%lx\n", mask));
  913. for (i=0; mask; i++, mask>>=1) {
  914. val = ctx->ctx_pmds[i].val;
  915. /*
  916. * We break up the 64 bit value into 2 pieces
  917. * the lower bits go to the machine state in the
  918. * thread (will be reloaded on ctxsw in).
  919. * The upper part stays in the soft-counter.
  920. */
  921. if (PMD_IS_COUNTING(i)) {
  922. ctx->ctx_pmds[i].val = val & ~ovfl_val;
  923. val &= ovfl_val;
  924. }
  925. thread->pmds[i] = val;
  926. DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
  927. i,
  928. thread->pmds[i],
  929. ctx->ctx_pmds[i].val));
  930. }
  931. }
  932. /*
  933. * propagate PMC from context to thread-state
  934. */
  935. static inline void
  936. pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
  937. {
  938. struct thread_struct *thread = &task->thread;
  939. unsigned long mask = ctx->ctx_all_pmcs[0];
  940. int i;
  941. DPRINT(("mask=0x%lx\n", mask));
  942. for (i=0; mask; i++, mask>>=1) {
  943. /* masking 0 with ovfl_val yields 0 */
  944. thread->pmcs[i] = ctx->ctx_pmcs[i];
  945. DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
  946. }
  947. }
  948. static inline void
  949. pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
  950. {
  951. int i;
  952. for (i=0; mask; i++, mask>>=1) {
  953. if ((mask & 0x1) == 0) continue;
  954. ia64_set_pmc(i, pmcs[i]);
  955. }
  956. ia64_srlz_d();
  957. }
  958. static inline int
  959. pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
  960. {
  961. return memcmp(a, b, sizeof(pfm_uuid_t));
  962. }
  963. static inline int
  964. pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
  965. {
  966. int ret = 0;
  967. if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
  968. return ret;
  969. }
  970. static inline int
  971. pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
  972. {
  973. int ret = 0;
  974. if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
  975. return ret;
  976. }
  977. static inline int
  978. pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
  979. int cpu, void *arg)
  980. {
  981. int ret = 0;
  982. if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
  983. return ret;
  984. }
  985. static inline int
  986. pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
  987. int cpu, void *arg)
  988. {
  989. int ret = 0;
  990. if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
  991. return ret;
  992. }
  993. static inline int
  994. pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
  995. {
  996. int ret = 0;
  997. if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
  998. return ret;
  999. }
  1000. static inline int
  1001. pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
  1002. {
  1003. int ret = 0;
  1004. if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
  1005. return ret;
  1006. }
  1007. static pfm_buffer_fmt_t *
  1008. __pfm_find_buffer_fmt(pfm_uuid_t uuid)
  1009. {
  1010. struct list_head * pos;
  1011. pfm_buffer_fmt_t * entry;
  1012. list_for_each(pos, &pfm_buffer_fmt_list) {
  1013. entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
  1014. if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
  1015. return entry;
  1016. }
  1017. return NULL;
  1018. }
  1019. /*
  1020. * find a buffer format based on its uuid
  1021. */
  1022. static pfm_buffer_fmt_t *
  1023. pfm_find_buffer_fmt(pfm_uuid_t uuid)
  1024. {
  1025. pfm_buffer_fmt_t * fmt;
  1026. spin_lock(&pfm_buffer_fmt_lock);
  1027. fmt = __pfm_find_buffer_fmt(uuid);
  1028. spin_unlock(&pfm_buffer_fmt_lock);
  1029. return fmt;
  1030. }
  1031. int
  1032. pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
  1033. {
  1034. int ret = 0;
  1035. /* some sanity checks */
  1036. if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
  1037. /* we need at least a handler */
  1038. if (fmt->fmt_handler == NULL) return -EINVAL;
  1039. /*
  1040. * XXX: need check validity of fmt_arg_size
  1041. */
  1042. spin_lock(&pfm_buffer_fmt_lock);
  1043. if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
  1044. printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
  1045. ret = -EBUSY;
  1046. goto out;
  1047. }
  1048. list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
  1049. printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
  1050. out:
  1051. spin_unlock(&pfm_buffer_fmt_lock);
  1052. return ret;
  1053. }
  1054. EXPORT_SYMBOL(pfm_register_buffer_fmt);
  1055. int
  1056. pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
  1057. {
  1058. pfm_buffer_fmt_t *fmt;
  1059. int ret = 0;
  1060. spin_lock(&pfm_buffer_fmt_lock);
  1061. fmt = __pfm_find_buffer_fmt(uuid);
  1062. if (!fmt) {
  1063. printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
  1064. ret = -EINVAL;
  1065. goto out;
  1066. }
  1067. list_del_init(&fmt->fmt_list);
  1068. printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
  1069. out:
  1070. spin_unlock(&pfm_buffer_fmt_lock);
  1071. return ret;
  1072. }
  1073. EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
  1074. static int
  1075. pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
  1076. {
  1077. unsigned long flags;
  1078. /*
  1079. * validy checks on cpu_mask have been done upstream
  1080. */
  1081. LOCK_PFS(flags);
  1082. DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  1083. pfm_sessions.pfs_sys_sessions,
  1084. pfm_sessions.pfs_task_sessions,
  1085. pfm_sessions.pfs_sys_use_dbregs,
  1086. is_syswide,
  1087. cpu));
  1088. if (is_syswide) {
  1089. /*
  1090. * cannot mix system wide and per-task sessions
  1091. */
  1092. if (pfm_sessions.pfs_task_sessions > 0UL) {
  1093. DPRINT(("system wide not possible, %u conflicting task_sessions\n",
  1094. pfm_sessions.pfs_task_sessions));
  1095. goto abort;
  1096. }
  1097. if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
  1098. DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
  1099. pfm_sessions.pfs_sys_session[cpu] = task;
  1100. pfm_sessions.pfs_sys_sessions++ ;
  1101. } else {
  1102. if (pfm_sessions.pfs_sys_sessions) goto abort;
  1103. pfm_sessions.pfs_task_sessions++;
  1104. }
  1105. DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  1106. pfm_sessions.pfs_sys_sessions,
  1107. pfm_sessions.pfs_task_sessions,
  1108. pfm_sessions.pfs_sys_use_dbregs,
  1109. is_syswide,
  1110. cpu));
  1111. UNLOCK_PFS(flags);
  1112. return 0;
  1113. error_conflict:
  1114. DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
  1115. pfm_sessions.pfs_sys_session[cpu]->pid,
  1116. smp_processor_id()));
  1117. abort:
  1118. UNLOCK_PFS(flags);
  1119. return -EBUSY;
  1120. }
  1121. static int
  1122. pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
  1123. {
  1124. unsigned long flags;
  1125. /*
  1126. * validy checks on cpu_mask have been done upstream
  1127. */
  1128. LOCK_PFS(flags);
  1129. DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  1130. pfm_sessions.pfs_sys_sessions,
  1131. pfm_sessions.pfs_task_sessions,
  1132. pfm_sessions.pfs_sys_use_dbregs,
  1133. is_syswide,
  1134. cpu));
  1135. if (is_syswide) {
  1136. pfm_sessions.pfs_sys_session[cpu] = NULL;
  1137. /*
  1138. * would not work with perfmon+more than one bit in cpu_mask
  1139. */
  1140. if (ctx && ctx->ctx_fl_using_dbreg) {
  1141. if (pfm_sessions.pfs_sys_use_dbregs == 0) {
  1142. printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
  1143. } else {
  1144. pfm_sessions.pfs_sys_use_dbregs--;
  1145. }
  1146. }
  1147. pfm_sessions.pfs_sys_sessions--;
  1148. } else {
  1149. pfm_sessions.pfs_task_sessions--;
  1150. }
  1151. DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  1152. pfm_sessions.pfs_sys_sessions,
  1153. pfm_sessions.pfs_task_sessions,
  1154. pfm_sessions.pfs_sys_use_dbregs,
  1155. is_syswide,
  1156. cpu));
  1157. UNLOCK_PFS(flags);
  1158. return 0;
  1159. }
  1160. /*
  1161. * removes virtual mapping of the sampling buffer.
  1162. * IMPORTANT: cannot be called with interrupts disable, e.g. inside
  1163. * a PROTECT_CTX() section.
  1164. */
  1165. static int
  1166. pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
  1167. {
  1168. int r;
  1169. /* sanity checks */
  1170. if (task->mm == NULL || size == 0UL || vaddr == NULL) {
  1171. printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
  1172. return -EINVAL;
  1173. }
  1174. DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
  1175. /*
  1176. * does the actual unmapping
  1177. */
  1178. down_write(&task->mm->mmap_sem);
  1179. DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
  1180. r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
  1181. up_write(&task->mm->mmap_sem);
  1182. if (r !=0) {
  1183. printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
  1184. }
  1185. DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
  1186. return 0;
  1187. }
  1188. /*
  1189. * free actual physical storage used by sampling buffer
  1190. */
  1191. #if 0
  1192. static int
  1193. pfm_free_smpl_buffer(pfm_context_t *ctx)
  1194. {
  1195. pfm_buffer_fmt_t *fmt;
  1196. if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
  1197. /*
  1198. * we won't use the buffer format anymore
  1199. */
  1200. fmt = ctx->ctx_buf_fmt;
  1201. DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
  1202. ctx->ctx_smpl_hdr,
  1203. ctx->ctx_smpl_size,
  1204. ctx->ctx_smpl_vaddr));
  1205. pfm_buf_fmt_exit(fmt, current, NULL, NULL);
  1206. /*
  1207. * free the buffer
  1208. */
  1209. pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
  1210. ctx->ctx_smpl_hdr = NULL;
  1211. ctx->ctx_smpl_size = 0UL;
  1212. return 0;
  1213. invalid_free:
  1214. printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
  1215. return -EINVAL;
  1216. }
  1217. #endif
  1218. static inline void
  1219. pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
  1220. {
  1221. if (fmt == NULL) return;
  1222. pfm_buf_fmt_exit(fmt, current, NULL, NULL);
  1223. }
  1224. /*
  1225. * pfmfs should _never_ be mounted by userland - too much of security hassle,
  1226. * no real gain from having the whole whorehouse mounted. So we don't need
  1227. * any operations on the root directory. However, we need a non-trivial
  1228. * d_name - pfm: will go nicely and kill the special-casing in procfs.
  1229. */
  1230. static struct vfsmount *pfmfs_mnt;
  1231. static int __init
  1232. init_pfm_fs(void)
  1233. {
  1234. int err = register_filesystem(&pfm_fs_type);
  1235. if (!err) {
  1236. pfmfs_mnt = kern_mount(&pfm_fs_type);
  1237. err = PTR_ERR(pfmfs_mnt);
  1238. if (IS_ERR(pfmfs_mnt))
  1239. unregister_filesystem(&pfm_fs_type);
  1240. else
  1241. err = 0;
  1242. }
  1243. return err;
  1244. }
  1245. static void __exit
  1246. exit_pfm_fs(void)
  1247. {
  1248. unregister_filesystem(&pfm_fs_type);
  1249. mntput(pfmfs_mnt);
  1250. }
  1251. static ssize_t
  1252. pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
  1253. {
  1254. pfm_context_t *ctx;
  1255. pfm_msg_t *msg;
  1256. ssize_t ret;
  1257. unsigned long flags;
  1258. DECLARE_WAITQUEUE(wait, current);
  1259. if (PFM_IS_FILE(filp) == 0) {
  1260. printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
  1261. return -EINVAL;
  1262. }
  1263. ctx = (pfm_context_t *)filp->private_data;
  1264. if (ctx == NULL) {
  1265. printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
  1266. return -EINVAL;
  1267. }
  1268. /*
  1269. * check even when there is no message
  1270. */
  1271. if (size < sizeof(pfm_msg_t)) {
  1272. DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
  1273. return -EINVAL;
  1274. }
  1275. PROTECT_CTX(ctx, flags);
  1276. /*
  1277. * put ourselves on the wait queue
  1278. */
  1279. add_wait_queue(&ctx->ctx_msgq_wait, &wait);
  1280. for(;;) {
  1281. /*
  1282. * check wait queue
  1283. */
  1284. set_current_state(TASK_INTERRUPTIBLE);
  1285. DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
  1286. ret = 0;
  1287. if(PFM_CTXQ_EMPTY(ctx) == 0) break;
  1288. UNPROTECT_CTX(ctx, flags);
  1289. /*
  1290. * check non-blocking read
  1291. */
  1292. ret = -EAGAIN;
  1293. if(filp->f_flags & O_NONBLOCK) break;
  1294. /*
  1295. * check pending signals
  1296. */
  1297. if(signal_pending(current)) {
  1298. ret = -EINTR;
  1299. break;
  1300. }
  1301. /*
  1302. * no message, so wait
  1303. */
  1304. schedule();
  1305. PROTECT_CTX(ctx, flags);
  1306. }
  1307. DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
  1308. set_current_state(TASK_RUNNING);
  1309. remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
  1310. if (ret < 0) goto abort;
  1311. ret = -EINVAL;
  1312. msg = pfm_get_next_msg(ctx);
  1313. if (msg == NULL) {
  1314. printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
  1315. goto abort_locked;
  1316. }
  1317. DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
  1318. ret = -EFAULT;
  1319. if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
  1320. abort_locked:
  1321. UNPROTECT_CTX(ctx, flags);
  1322. abort:
  1323. return ret;
  1324. }
  1325. static ssize_t
  1326. pfm_write(struct file *file, const char __user *ubuf,
  1327. size_t size, loff_t *ppos)
  1328. {
  1329. DPRINT(("pfm_write called\n"));
  1330. return -EINVAL;
  1331. }
  1332. static unsigned int
  1333. pfm_poll(struct file *filp, poll_table * wait)
  1334. {
  1335. pfm_context_t *ctx;
  1336. unsigned long flags;
  1337. unsigned int mask = 0;
  1338. if (PFM_IS_FILE(filp) == 0) {
  1339. printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
  1340. return 0;
  1341. }
  1342. ctx = (pfm_context_t *)filp->private_data;
  1343. if (ctx == NULL) {
  1344. printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
  1345. return 0;
  1346. }
  1347. DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
  1348. poll_wait(filp, &ctx->ctx_msgq_wait, wait);
  1349. PROTECT_CTX(ctx, flags);
  1350. if (PFM_CTXQ_EMPTY(ctx) == 0)
  1351. mask = POLLIN | POLLRDNORM;
  1352. UNPROTECT_CTX(ctx, flags);
  1353. DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
  1354. return mask;
  1355. }
  1356. static int
  1357. pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
  1358. {
  1359. DPRINT(("pfm_ioctl called\n"));
  1360. return -EINVAL;
  1361. }
  1362. /*
  1363. * interrupt cannot be masked when coming here
  1364. */
  1365. static inline int
  1366. pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
  1367. {
  1368. int ret;
  1369. ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
  1370. DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
  1371. current->pid,
  1372. fd,
  1373. on,
  1374. ctx->ctx_async_queue, ret));
  1375. return ret;
  1376. }
  1377. static int
  1378. pfm_fasync(int fd, struct file *filp, int on)
  1379. {
  1380. pfm_context_t *ctx;
  1381. int ret;
  1382. if (PFM_IS_FILE(filp) == 0) {
  1383. printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
  1384. return -EBADF;
  1385. }
  1386. ctx = (pfm_context_t *)filp->private_data;
  1387. if (ctx == NULL) {
  1388. printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
  1389. return -EBADF;
  1390. }
  1391. /*
  1392. * we cannot mask interrupts during this call because this may
  1393. * may go to sleep if memory is not readily avalaible.
  1394. *
  1395. * We are protected from the conetxt disappearing by the get_fd()/put_fd()
  1396. * done in caller. Serialization of this function is ensured by caller.
  1397. */
  1398. ret = pfm_do_fasync(fd, filp, ctx, on);
  1399. DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
  1400. fd,
  1401. on,
  1402. ctx->ctx_async_queue, ret));
  1403. return ret;
  1404. }
  1405. #ifdef CONFIG_SMP
  1406. /*
  1407. * this function is exclusively called from pfm_close().
  1408. * The context is not protected at that time, nor are interrupts
  1409. * on the remote CPU. That's necessary to avoid deadlocks.
  1410. */
  1411. static void
  1412. pfm_syswide_force_stop(void *info)
  1413. {
  1414. pfm_context_t *ctx = (pfm_context_t *)info;
  1415. struct pt_regs *regs = ia64_task_regs(current);
  1416. struct task_struct *owner;
  1417. unsigned long flags;
  1418. int ret;
  1419. if (ctx->ctx_cpu != smp_processor_id()) {
  1420. printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n",
  1421. ctx->ctx_cpu,
  1422. smp_processor_id());
  1423. return;
  1424. }
  1425. owner = GET_PMU_OWNER();
  1426. if (owner != ctx->ctx_task) {
  1427. printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
  1428. smp_processor_id(),
  1429. owner->pid, ctx->ctx_task->pid);
  1430. return;
  1431. }
  1432. if (GET_PMU_CTX() != ctx) {
  1433. printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
  1434. smp_processor_id(),
  1435. GET_PMU_CTX(), ctx);
  1436. return;
  1437. }
  1438. DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));
  1439. /*
  1440. * the context is already protected in pfm_close(), we simply
  1441. * need to mask interrupts to avoid a PMU interrupt race on
  1442. * this CPU
  1443. */
  1444. local_irq_save(flags);
  1445. ret = pfm_context_unload(ctx, NULL, 0, regs);
  1446. if (ret) {
  1447. DPRINT(("context_unload returned %d\n", ret));
  1448. }
  1449. /*
  1450. * unmask interrupts, PMU interrupts are now spurious here
  1451. */
  1452. local_irq_restore(flags);
  1453. }
  1454. static void
  1455. pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
  1456. {
  1457. int ret;
  1458. DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
  1459. ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
  1460. DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
  1461. }
  1462. #endif /* CONFIG_SMP */
  1463. /*
  1464. * called for each close(). Partially free resources.
  1465. * When caller is self-monitoring, the context is unloaded.
  1466. */
  1467. static int
  1468. pfm_flush(struct file *filp)
  1469. {
  1470. pfm_context_t *ctx;
  1471. struct task_struct *task;
  1472. struct pt_regs *regs;
  1473. unsigned long flags;
  1474. unsigned long smpl_buf_size = 0UL;
  1475. void *smpl_buf_vaddr = NULL;
  1476. int state, is_system;
  1477. if (PFM_IS_FILE(filp) == 0) {
  1478. DPRINT(("bad magic for\n"));
  1479. return -EBADF;
  1480. }
  1481. ctx = (pfm_context_t *)filp->private_data;
  1482. if (ctx == NULL) {
  1483. printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
  1484. return -EBADF;
  1485. }
  1486. /*
  1487. * remove our file from the async queue, if we use this mode.
  1488. * This can be done without the context being protected. We come
  1489. * here when the context has become unreacheable by other tasks.
  1490. *
  1491. * We may still have active monitoring at this point and we may
  1492. * end up in pfm_overflow_handler(). However, fasync_helper()
  1493. * operates with interrupts disabled and it cleans up the
  1494. * queue. If the PMU handler is called prior to entering
  1495. * fasync_helper() then it will send a signal. If it is
  1496. * invoked after, it will find an empty queue and no
  1497. * signal will be sent. In both case, we are safe
  1498. */
  1499. if (filp->f_flags & FASYNC) {
  1500. DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
  1501. pfm_do_fasync (-1, filp, ctx, 0);
  1502. }
  1503. PROTECT_CTX(ctx, flags);
  1504. state = ctx->ctx_state;
  1505. is_system = ctx->ctx_fl_system;
  1506. task = PFM_CTX_TASK(ctx);
  1507. regs = ia64_task_regs(task);
  1508. DPRINT(("ctx_state=%d is_current=%d\n",
  1509. state,
  1510. task == current ? 1 : 0));
  1511. /*
  1512. * if state == UNLOADED, then task is NULL
  1513. */
  1514. /*
  1515. * we must stop and unload because we are losing access to the context.
  1516. */
  1517. if (task == current) {
  1518. #ifdef CONFIG_SMP
  1519. /*
  1520. * the task IS the owner but it migrated to another CPU: that's bad
  1521. * but we must handle this cleanly. Unfortunately, the kernel does
  1522. * not provide a mechanism to block migration (while the context is loaded).
  1523. *
  1524. * We need to release the resource on the ORIGINAL cpu.
  1525. */
  1526. if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  1527. DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  1528. /*
  1529. * keep context protected but unmask interrupt for IPI
  1530. */
  1531. local_irq_restore(flags);
  1532. pfm_syswide_cleanup_other_cpu(ctx);
  1533. /*
  1534. * restore interrupt masking
  1535. */
  1536. local_irq_save(flags);
  1537. /*
  1538. * context is unloaded at this point
  1539. */
  1540. } else
  1541. #endif /* CONFIG_SMP */
  1542. {
  1543. DPRINT(("forcing unload\n"));
  1544. /*
  1545. * stop and unload, returning with state UNLOADED
  1546. * and session unreserved.
  1547. */
  1548. pfm_context_unload(ctx, NULL, 0, regs);
  1549. DPRINT(("ctx_state=%d\n", ctx->ctx_state));
  1550. }
  1551. }
  1552. /*
  1553. * remove virtual mapping, if any, for the calling task.
  1554. * cannot reset ctx field until last user is calling close().
  1555. *
  1556. * ctx_smpl_vaddr must never be cleared because it is ne…