PageRenderTime 121ms CodeModel.GetById 17ms app.highlight 83ms RepoModel.GetById 1ms app.codeStats 2ms

/arch/ia64/kernel/perfmon.c

https://bitbucket.org/evzijst/gittest
C | 6676 lines | 3710 code | 1108 blank | 1858 comment | 795 complexity | 1d0f29d4a70baa02df7e7151feb78ee8 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * This file implements the perfmon-2 subsystem which is used
   3 * to program the IA-64 Performance Monitoring Unit (PMU).
   4 *
   5 * The initial version of perfmon.c was written by
   6 * Ganesh Venkitachalam, IBM Corp.
   7 *
   8 * Then it was modified for perfmon-1.x by Stephane Eranian and
   9 * David Mosberger, Hewlett Packard Co.
  10 *
  11 * Version Perfmon-2.x is a rewrite of perfmon-1.x
  12 * by Stephane Eranian, Hewlett Packard Co.
  13 *
  14 * Copyright (C) 1999-2003, 2005  Hewlett Packard Co
  15 *               Stephane Eranian <eranian@hpl.hp.com>
  16 *               David Mosberger-Tang <davidm@hpl.hp.com>
  17 *
  18 * More information about perfmon available at:
  19 * 	http://www.hpl.hp.com/research/linux/perfmon
  20 */
  21
  22#include <linux/config.h>
  23#include <linux/module.h>
  24#include <linux/kernel.h>
  25#include <linux/sched.h>
  26#include <linux/interrupt.h>
  27#include <linux/smp_lock.h>
  28#include <linux/proc_fs.h>
  29#include <linux/seq_file.h>
  30#include <linux/init.h>
  31#include <linux/vmalloc.h>
  32#include <linux/mm.h>
  33#include <linux/sysctl.h>
  34#include <linux/list.h>
  35#include <linux/file.h>
  36#include <linux/poll.h>
  37#include <linux/vfs.h>
  38#include <linux/pagemap.h>
  39#include <linux/mount.h>
  40#include <linux/version.h>
  41#include <linux/bitops.h>
  42
  43#include <asm/errno.h>
  44#include <asm/intrinsics.h>
  45#include <asm/page.h>
  46#include <asm/perfmon.h>
  47#include <asm/processor.h>
  48#include <asm/signal.h>
  49#include <asm/system.h>
  50#include <asm/uaccess.h>
  51#include <asm/delay.h>
  52
  53#ifdef CONFIG_PERFMON
  54/*
  55 * perfmon context state
  56 */
  57#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
  58#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
  59#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
  60#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
  61
  62#define PFM_INVALID_ACTIVATION	(~0UL)
  63
  64/*
  65 * depth of message queue
  66 */
  67#define PFM_MAX_MSGS		32
  68#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
  69
  70/*
  71 * type of a PMU register (bitmask).
  72 * bitmask structure:
  73 * 	bit0   : register implemented
  74 * 	bit1   : end marker
  75 * 	bit2-3 : reserved
  76 * 	bit4   : pmc has pmc.pm
  77 * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
  78 * 	bit6-7 : register type
  79 * 	bit8-31: reserved
  80 */
  81#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
  82#define PFM_REG_IMPL		0x1 /* register implemented */
  83#define PFM_REG_END		0x2 /* end marker */
  84#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
  85#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
  86#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
  87#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
  88#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
  89
  90#define PMC_IS_LAST(i)	(pmu_conf->pmc_desc[i].type & PFM_REG_END)
  91#define PMD_IS_LAST(i)	(pmu_conf->pmd_desc[i].type & PFM_REG_END)
  92
  93#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
  94
  95/* i assumed unsigned */
  96#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
  97#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
  98
  99/* XXX: these assume that register i is implemented */
 100#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
 101#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
 102#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
 103#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)
 104
 105#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
 106#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
 107#define PMD_PMD_DEP(i)	   pmu_conf->pmd_desc[i].dep_pmd[0]
 108#define PMC_PMD_DEP(i)	   pmu_conf->pmc_desc[i].dep_pmd[0]
 109
 110#define PFM_NUM_IBRS	  IA64_NUM_DBG_REGS
 111#define PFM_NUM_DBRS	  IA64_NUM_DBG_REGS
 112
 113#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
 114#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
 115#define PFM_CTX_TASK(h)		(h)->ctx_task
 116
 117#define PMU_PMC_OI		5 /* position of pmc.oi bit */
 118
 119/* XXX: does not support more than 64 PMDs */
 120#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
 121#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
 122
 123#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
 124
 125#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
 126#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
 127#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
 128#define PFM_CODE_RR	0	/* requesting code range restriction */
 129#define PFM_DATA_RR	1	/* requestion data range restriction */
 130
 131#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
 132#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
 133#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)
 134
 135#define RDEP(x)	(1UL<<(x))
 136
 137/*
 138 * context protection macros
 139 * in SMP:
 140 * 	- we need to protect against CPU concurrency (spin_lock)
 141 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 142 * in UP:
 143 * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
 144 *
 145 * spin_lock_irqsave()/spin_lock_irqrestore():
 146 * 	in SMP: local_irq_disable + spin_lock
 147 * 	in UP : local_irq_disable
 148 *
 149 * spin_lock()/spin_lock():
 150 * 	in UP : removed automatically
 151 * 	in SMP: protect against context accesses from other CPU. interrupts
 152 * 	        are not masked. This is useful for the PMU interrupt handler
 153 * 	        because we know we will not get PMU concurrency in that code.
 154 */
 155#define PROTECT_CTX(c, f) \
 156	do {  \
 157		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
 158		spin_lock_irqsave(&(c)->ctx_lock, f); \
 159		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
 160	} while(0)
 161
 162#define UNPROTECT_CTX(c, f) \
 163	do { \
 164		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
 165		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
 166	} while(0)
 167
 168#define PROTECT_CTX_NOPRINT(c, f) \
 169	do {  \
 170		spin_lock_irqsave(&(c)->ctx_lock, f); \
 171	} while(0)
 172
 173
 174#define UNPROTECT_CTX_NOPRINT(c, f) \
 175	do { \
 176		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
 177	} while(0)
 178
 179
 180#define PROTECT_CTX_NOIRQ(c) \
 181	do {  \
 182		spin_lock(&(c)->ctx_lock); \
 183	} while(0)
 184
 185#define UNPROTECT_CTX_NOIRQ(c) \
 186	do { \
 187		spin_unlock(&(c)->ctx_lock); \
 188	} while(0)
 189
 190
 191#ifdef CONFIG_SMP
 192
 193#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
 194#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
 195#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
 196
 197#else /* !CONFIG_SMP */
 198#define SET_ACTIVATION(t) 	do {} while(0)
 199#define GET_ACTIVATION(t) 	do {} while(0)
 200#define INC_ACTIVATION(t) 	do {} while(0)
 201#endif /* CONFIG_SMP */
 202
 203#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
 204#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
 205#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)
 206
 207#define LOCK_PFS(g)	    	spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
 208#define UNLOCK_PFS(g)	    	spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
 209
 210#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
 211
 212/*
 213 * cmp0 must be the value of pmc0
 214 */
 215#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
 216
 217#define PFMFS_MAGIC 0xa0b4d889
 218
 219/*
 220 * debugging
 221 */
 222#define PFM_DEBUGGING 1
 223#ifdef PFM_DEBUGGING
 224#define DPRINT(a) \
 225	do { \
 226		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
 227	} while (0)
 228
 229#define DPRINT_ovfl(a) \
 230	do { \
 231		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
 232	} while (0)
 233#endif
 234
 235/*
 236 * 64-bit software counter structure
 237 *
 238 * the next_reset_type is applied to the next call to pfm_reset_regs()
 239 */
 240typedef struct {
 241	unsigned long	val;		/* virtual 64bit counter value */
 242	unsigned long	lval;		/* last reset value */
 243	unsigned long	long_reset;	/* reset value on sampling overflow */
 244	unsigned long	short_reset;    /* reset value on overflow */
 245	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
 246	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
 247	unsigned long	seed;		/* seed for random-number generator */
 248	unsigned long	mask;		/* mask for random-number generator */
 249	unsigned int 	flags;		/* notify/do not notify */
 250	unsigned long	eventid;	/* overflow event identifier */
 251} pfm_counter_t;
 252
 253/*
 254 * context flags
 255 */
 256typedef struct {
 257	unsigned int block:1;		/* when 1, task will blocked on user notifications */
 258	unsigned int system:1;		/* do system wide monitoring */
 259	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
 260	unsigned int is_sampling:1;	/* true if using a custom format */
 261	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
 262	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
 263	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
 264	unsigned int no_msg:1;		/* no message sent on overflow */
 265	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
 266	unsigned int reserved:22;
 267} pfm_context_flags_t;
 268
 269#define PFM_TRAP_REASON_NONE		0x0	/* default value */
 270#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
 271#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */
 272
 273
 274/*
 275 * perfmon context: encapsulates all the state of a monitoring session
 276 */
 277
 278typedef struct pfm_context {
 279	spinlock_t		ctx_lock;		/* context protection */
 280
 281	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
 282	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
 283
 284	struct task_struct 	*ctx_task;		/* task to which context is attached */
 285
 286	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
 287
 288	struct semaphore	ctx_restart_sem;   	/* use for blocking notification mode */
 289
 290	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
 291	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
 292	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */
 293
 294	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
 295	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
 296	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
 297
 298	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
 299
 300	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
 301	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
 302	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
 303	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
 304
 305	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
 306
 307	u64			ctx_saved_psr_up;	/* only contains psr.up value */
 308
 309	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
 310	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
 311	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
 312
 313	int			ctx_fd;			/* file descriptor used my this context */
 314	pfm_ovfl_arg_t		ctx_ovfl_arg;		/* argument to custom buffer format handler */
 315
 316	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
 317	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
 318	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
 319	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */
 320
 321	wait_queue_head_t 	ctx_msgq_wait;
 322	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
 323	int			ctx_msgq_head;
 324	int			ctx_msgq_tail;
 325	struct fasync_struct	*ctx_async_queue;
 326
 327	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
 328} pfm_context_t;
 329
 330/*
 331 * magic number used to verify that structure is really
 332 * a perfmon context
 333 */
 334#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)
 335
 336#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)
 337
 338#ifdef CONFIG_SMP
 339#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
 340#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
 341#else
 342#define SET_LAST_CPU(ctx, v)	do {} while(0)
 343#define GET_LAST_CPU(ctx)	do {} while(0)
 344#endif
 345
 346
 347#define ctx_fl_block		ctx_flags.block
 348#define ctx_fl_system		ctx_flags.system
 349#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
 350#define ctx_fl_is_sampling	ctx_flags.is_sampling
 351#define ctx_fl_excl_idle	ctx_flags.excl_idle
 352#define ctx_fl_going_zombie	ctx_flags.going_zombie
 353#define ctx_fl_trap_reason	ctx_flags.trap_reason
 354#define ctx_fl_no_msg		ctx_flags.no_msg
 355#define ctx_fl_can_restart	ctx_flags.can_restart
 356
 357#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
 358#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
 359
 360/*
 361 * global information about all sessions
 362 * mostly used to synchronize between system wide and per-process
 363 */
 364typedef struct {
 365	spinlock_t		pfs_lock;		   /* lock the structure */
 366
 367	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
 368	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
 369	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
 370	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
 371	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
 372} pfm_session_t;
 373
 374/*
 375 * information about a PMC or PMD.
 376 * dep_pmd[]: a bitmask of dependent PMD registers
 377 * dep_pmc[]: a bitmask of dependent PMC registers
 378 */
 379typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 380typedef struct {
 381	unsigned int		type;
 382	int			pm_pos;
 383	unsigned long		default_value;	/* power-on default value */
 384	unsigned long		reserved_mask;	/* bitmask of reserved bits */
 385	pfm_reg_check_t		read_check;
 386	pfm_reg_check_t		write_check;
 387	unsigned long		dep_pmd[4];
 388	unsigned long		dep_pmc[4];
 389} pfm_reg_desc_t;
 390
 391/* assume cnum is a valid monitor */
 392#define PMC_PM(cnum, val)	(((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
 393
 394/*
 395 * This structure is initialized at boot time and contains
 396 * a description of the PMU main characteristics.
 397 *
 398 * If the probe function is defined, detection is based
 399 * on its return value: 
 400 * 	- 0 means recognized PMU
 401 * 	- anything else means not supported
 402 * When the probe function is not defined, then the pmu_family field
 403 * is used and it must match the host CPU family such that:
 404 * 	- cpu->family & config->pmu_family != 0
 405 */
 406typedef struct {
 407	unsigned long  ovfl_val;	/* overflow value for counters */
 408
 409	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
 410	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
 411
 412	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
 413	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
 414	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
 415	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */
 416
 417	char	      *pmu_name;	/* PMU family name */
 418	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */
 419	unsigned int  flags;		/* pmu specific flags */
 420	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
 421	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
 422	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */
 423	int           (*probe)(void);   /* customized probe routine */
 424	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
 425} pmu_config_t;
 426/*
 427 * PMU specific flags
 428 */
 429#define PFM_PMU_IRQ_RESEND	1	/* PMU needs explicit IRQ resend */
 430
 431/*
 432 * debug register related type definitions
 433 */
 434typedef struct {
 435	unsigned long ibr_mask:56;
 436	unsigned long ibr_plm:4;
 437	unsigned long ibr_ig:3;
 438	unsigned long ibr_x:1;
 439} ibr_mask_reg_t;
 440
 441typedef struct {
 442	unsigned long dbr_mask:56;
 443	unsigned long dbr_plm:4;
 444	unsigned long dbr_ig:2;
 445	unsigned long dbr_w:1;
 446	unsigned long dbr_r:1;
 447} dbr_mask_reg_t;
 448
 449typedef union {
 450	unsigned long  val;
 451	ibr_mask_reg_t ibr;
 452	dbr_mask_reg_t dbr;
 453} dbreg_t;
 454
 455
 456/*
 457 * perfmon command descriptions
 458 */
 459typedef struct {
 460	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
 461	char		*cmd_name;
 462	int		cmd_flags;
 463	unsigned int	cmd_narg;
 464	size_t		cmd_argsize;
 465	int		(*cmd_getsize)(void *arg, size_t *sz);
 466} pfm_cmd_desc_t;
 467
 468#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
 469#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
 470#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
 471#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
 472
 473
 474#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[(cmd)].cmd_name
 475#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
 476#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
 477#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
 478#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
 479
 480#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
 481
 482typedef struct {
 483	int	debug;		/* turn on/off debugging via syslog */
 484	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
 485	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
 486	int	expert_mode;	/* turn on/off value checking */
 487	int 	debug_pfm_read;
 488} pfm_sysctl_t;
 489
 490typedef struct {
 491	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
 492	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
 493	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
 494	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
 495	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
 496	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
 497	unsigned long pfm_smpl_handler_calls;
 498	unsigned long pfm_smpl_handler_cycles;
 499	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
 500} pfm_stats_t;
 501
 502/*
 503 * perfmon internal variables
 504 */
 505static pfm_stats_t		pfm_stats[NR_CPUS];
 506static pfm_session_t		pfm_sessions;	/* global sessions information */
 507
 508static struct proc_dir_entry 	*perfmon_dir;
 509static pfm_uuid_t		pfm_null_uuid = {0,};
 510
 511static spinlock_t		pfm_buffer_fmt_lock;
 512static LIST_HEAD(pfm_buffer_fmt_list);
 513
 514static pmu_config_t		*pmu_conf;
 515
 516/* sysctl() controls */
 517static pfm_sysctl_t pfm_sysctl;
 518int pfm_debug_var;
 519
 520static ctl_table pfm_ctl_table[]={
 521	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
 522	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
 523	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
 524	{4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
 525	{ 0, },
 526};
 527static ctl_table pfm_sysctl_dir[] = {
 528	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
 529 	{0,},
 530};
 531static ctl_table pfm_sysctl_root[] = {
 532	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
 533 	{0,},
 534};
 535static struct ctl_table_header *pfm_sysctl_header;
 536
 537static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
 538static int pfm_flush(struct file *filp);
 539
 540#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
 541#define pfm_get_cpu_data(a,b)		per_cpu(a, b)
 542
 543static inline void
 544pfm_put_task(struct task_struct *task)
 545{
 546	if (task != current) put_task_struct(task);
 547}
 548
 549static inline void
 550pfm_set_task_notify(struct task_struct *task)
 551{
 552	struct thread_info *info;
 553
 554	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
 555	set_bit(TIF_NOTIFY_RESUME, &info->flags);
 556}
 557
 558static inline void
 559pfm_clear_task_notify(void)
 560{
 561	clear_thread_flag(TIF_NOTIFY_RESUME);
 562}
 563
 564static inline void
 565pfm_reserve_page(unsigned long a)
 566{
 567	SetPageReserved(vmalloc_to_page((void *)a));
 568}
 569static inline void
 570pfm_unreserve_page(unsigned long a)
 571{
 572	ClearPageReserved(vmalloc_to_page((void*)a));
 573}
 574
 575static inline unsigned long
 576pfm_protect_ctx_ctxsw(pfm_context_t *x)
 577{
 578	spin_lock(&(x)->ctx_lock);
 579	return 0UL;
 580}
 581
 582static inline unsigned long
 583pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
 584{
 585	spin_unlock(&(x)->ctx_lock);
 586}
 587
 588static inline unsigned int
 589pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
 590{
 591	return do_munmap(mm, addr, len);
 592}
 593
 594static inline unsigned long 
 595pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
 596{
 597	return get_unmapped_area(file, addr, len, pgoff, flags);
 598}
 599
 600
 601static struct super_block *
 602pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
 603{
 604	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
 605}
 606
 607static struct file_system_type pfm_fs_type = {
 608	.name     = "pfmfs",
 609	.get_sb   = pfmfs_get_sb,
 610	.kill_sb  = kill_anon_super,
 611};
 612
 613DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 614DEFINE_PER_CPU(struct task_struct *, pmu_owner);
 615DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
 616DEFINE_PER_CPU(unsigned long, pmu_activation_number);
 617
 618
 619/* forward declaration */
 620static struct file_operations pfm_file_ops;
 621
 622/*
 623 * forward declarations
 624 */
 625#ifndef CONFIG_SMP
 626static void pfm_lazy_save_regs (struct task_struct *ta);
 627#endif
 628
 629void dump_pmu_state(const char *);
 630static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
 631
 632#include "perfmon_itanium.h"
 633#include "perfmon_mckinley.h"
 634#include "perfmon_generic.h"
 635
 636static pmu_config_t *pmu_confs[]={
 637	&pmu_conf_mck,
 638	&pmu_conf_ita,
 639	&pmu_conf_gen, /* must be last */
 640	NULL
 641};
 642
 643
 644static int pfm_end_notify_user(pfm_context_t *ctx);
 645
 646static inline void
 647pfm_clear_psr_pp(void)
 648{
 649	ia64_rsm(IA64_PSR_PP);
 650	ia64_srlz_i();
 651}
 652
 653static inline void
 654pfm_set_psr_pp(void)
 655{
 656	ia64_ssm(IA64_PSR_PP);
 657	ia64_srlz_i();
 658}
 659
 660static inline void
 661pfm_clear_psr_up(void)
 662{
 663	ia64_rsm(IA64_PSR_UP);
 664	ia64_srlz_i();
 665}
 666
 667static inline void
 668pfm_set_psr_up(void)
 669{
 670	ia64_ssm(IA64_PSR_UP);
 671	ia64_srlz_i();
 672}
 673
 674static inline unsigned long
 675pfm_get_psr(void)
 676{
 677	unsigned long tmp;
 678	tmp = ia64_getreg(_IA64_REG_PSR);
 679	ia64_srlz_i();
 680	return tmp;
 681}
 682
 683static inline void
 684pfm_set_psr_l(unsigned long val)
 685{
 686	ia64_setreg(_IA64_REG_PSR_L, val);
 687	ia64_srlz_i();
 688}
 689
 690static inline void
 691pfm_freeze_pmu(void)
 692{
 693	ia64_set_pmc(0,1UL);
 694	ia64_srlz_d();
 695}
 696
 697static inline void
 698pfm_unfreeze_pmu(void)
 699{
 700	ia64_set_pmc(0,0UL);
 701	ia64_srlz_d();
 702}
 703
 704static inline void
 705pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
 706{
 707	int i;
 708
 709	for (i=0; i < nibrs; i++) {
 710		ia64_set_ibr(i, ibrs[i]);
 711		ia64_dv_serialize_instruction();
 712	}
 713	ia64_srlz_i();
 714}
 715
 716static inline void
 717pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
 718{
 719	int i;
 720
 721	for (i=0; i < ndbrs; i++) {
 722		ia64_set_dbr(i, dbrs[i]);
 723		ia64_dv_serialize_data();
 724	}
 725	ia64_srlz_d();
 726}
 727
 728/*
 729 * PMD[i] must be a counter. no check is made
 730 */
 731static inline unsigned long
 732pfm_read_soft_counter(pfm_context_t *ctx, int i)
 733{
 734	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
 735}
 736
 737/*
 738 * PMD[i] must be a counter. no check is made
 739 */
 740static inline void
 741pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
 742{
 743	unsigned long ovfl_val = pmu_conf->ovfl_val;
 744
 745	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
 746	/*
 747	 * writing to unimplemented part is ignore, so we do not need to
 748	 * mask off top part
 749	 */
 750	ia64_set_pmd(i, val & ovfl_val);
 751}
 752
 753static pfm_msg_t *
 754pfm_get_new_msg(pfm_context_t *ctx)
 755{
 756	int idx, next;
 757
 758	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
 759
 760	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
 761	if (next == ctx->ctx_msgq_head) return NULL;
 762
 763 	idx = 	ctx->ctx_msgq_tail;
 764	ctx->ctx_msgq_tail = next;
 765
 766	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
 767
 768	return ctx->ctx_msgq+idx;
 769}
 770
 771static pfm_msg_t *
 772pfm_get_next_msg(pfm_context_t *ctx)
 773{
 774	pfm_msg_t *msg;
 775
 776	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
 777
 778	if (PFM_CTXQ_EMPTY(ctx)) return NULL;
 779
 780	/*
 781	 * get oldest message
 782	 */
 783	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
 784
 785	/*
 786	 * and move forward
 787	 */
 788	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
 789
 790	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
 791
 792	return msg;
 793}
 794
 795static void
 796pfm_reset_msgq(pfm_context_t *ctx)
 797{
 798	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
 799	DPRINT(("ctx=%p msgq reset\n", ctx));
 800}
 801
 802static void *
 803pfm_rvmalloc(unsigned long size)
 804{
 805	void *mem;
 806	unsigned long addr;
 807
 808	size = PAGE_ALIGN(size);
 809	mem  = vmalloc(size);
 810	if (mem) {
 811		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
 812		memset(mem, 0, size);
 813		addr = (unsigned long)mem;
 814		while (size > 0) {
 815			pfm_reserve_page(addr);
 816			addr+=PAGE_SIZE;
 817			size-=PAGE_SIZE;
 818		}
 819	}
 820	return mem;
 821}
 822
 823static void
 824pfm_rvfree(void *mem, unsigned long size)
 825{
 826	unsigned long addr;
 827
 828	if (mem) {
 829		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
 830		addr = (unsigned long) mem;
 831		while ((long) size > 0) {
 832			pfm_unreserve_page(addr);
 833			addr+=PAGE_SIZE;
 834			size-=PAGE_SIZE;
 835		}
 836		vfree(mem);
 837	}
 838	return;
 839}
 840
 841static pfm_context_t *
 842pfm_context_alloc(void)
 843{
 844	pfm_context_t *ctx;
 845
 846	/* 
 847	 * allocate context descriptor 
 848	 * must be able to free with interrupts disabled
 849	 */
 850	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
 851	if (ctx) {
 852		memset(ctx, 0, sizeof(pfm_context_t));
 853		DPRINT(("alloc ctx @%p\n", ctx));
 854	}
 855	return ctx;
 856}
 857
 858static void
 859pfm_context_free(pfm_context_t *ctx)
 860{
 861	if (ctx) {
 862		DPRINT(("free ctx @%p\n", ctx));
 863		kfree(ctx);
 864	}
 865}
 866
 867static void
 868pfm_mask_monitoring(struct task_struct *task)
 869{
 870	pfm_context_t *ctx = PFM_GET_CTX(task);
 871	struct thread_struct *th = &task->thread;
 872	unsigned long mask, val, ovfl_mask;
 873	int i;
 874
 875	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
 876
 877	ovfl_mask = pmu_conf->ovfl_val;
 878	/*
 879	 * monitoring can only be masked as a result of a valid
 880	 * counter overflow. In UP, it means that the PMU still
 881	 * has an owner. Note that the owner can be different
 882	 * from the current task. However the PMU state belongs
 883	 * to the owner.
 884	 * In SMP, a valid overflow only happens when task is
 885	 * current. Therefore if we come here, we know that
 886	 * the PMU state belongs to the current task, therefore
 887	 * we can access the live registers.
 888	 *
 889	 * So in both cases, the live register contains the owner's
 890	 * state. We can ONLY touch the PMU registers and NOT the PSR.
 891	 *
 892	 * As a consequence to this call, the thread->pmds[] array
 893	 * contains stale information which must be ignored
 894	 * when context is reloaded AND monitoring is active (see
 895	 * pfm_restart).
 896	 */
 897	mask = ctx->ctx_used_pmds[0];
 898	for (i = 0; mask; i++, mask>>=1) {
 899		/* skip non used pmds */
 900		if ((mask & 0x1) == 0) continue;
 901		val = ia64_get_pmd(i);
 902
 903		if (PMD_IS_COUNTING(i)) {
 904			/*
 905		 	 * we rebuild the full 64 bit value of the counter
 906		 	 */
 907			ctx->ctx_pmds[i].val += (val & ovfl_mask);
 908		} else {
 909			ctx->ctx_pmds[i].val = val;
 910		}
 911		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
 912			i,
 913			ctx->ctx_pmds[i].val,
 914			val & ovfl_mask));
 915	}
 916	/*
 917	 * mask monitoring by setting the privilege level to 0
 918	 * we cannot use psr.pp/psr.up for this, it is controlled by
 919	 * the user
 920	 *
 921	 * if task is current, modify actual registers, otherwise modify
 922	 * thread save state, i.e., what will be restored in pfm_load_regs()
 923	 */
 924	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 925	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 926		if ((mask & 0x1) == 0UL) continue;
 927		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
 928		th->pmcs[i] &= ~0xfUL;
 929		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
 930	}
 931	/*
 932	 * make all of this visible
 933	 */
 934	ia64_srlz_d();
 935}
 936
 937/*
 938 * must always be done with task == current
 939 *
 940 * context must be in MASKED state when calling
 941 */
 942static void
 943pfm_restore_monitoring(struct task_struct *task)
 944{
 945	pfm_context_t *ctx = PFM_GET_CTX(task);
 946	struct thread_struct *th = &task->thread;
 947	unsigned long mask, ovfl_mask;
 948	unsigned long psr, val;
 949	int i, is_system;
 950
 951	is_system = ctx->ctx_fl_system;
 952	ovfl_mask = pmu_conf->ovfl_val;
 953
 954	if (task != current) {
 955		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
 956		return;
 957	}
 958	if (ctx->ctx_state != PFM_CTX_MASKED) {
 959		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
 960			task->pid, current->pid, ctx->ctx_state);
 961		return;
 962	}
 963	psr = pfm_get_psr();
 964	/*
 965	 * monitoring is masked via the PMC.
 966	 * As we restore their value, we do not want each counter to
 967	 * restart right away. We stop monitoring using the PSR,
 968	 * restore the PMC (and PMD) and then re-establish the psr
 969	 * as it was. Note that there can be no pending overflow at
 970	 * this point, because monitoring was MASKED.
 971	 *
 972	 * system-wide session are pinned and self-monitoring
 973	 */
 974	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
 975		/* disable dcr pp */
 976		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
 977		pfm_clear_psr_pp();
 978	} else {
 979		pfm_clear_psr_up();
 980	}
 981	/*
 982	 * first, we restore the PMD
 983	 */
 984	mask = ctx->ctx_used_pmds[0];
 985	for (i = 0; mask; i++, mask>>=1) {
 986		/* skip non used pmds */
 987		if ((mask & 0x1) == 0) continue;
 988
 989		if (PMD_IS_COUNTING(i)) {
 990			/*
 991			 * we split the 64bit value according to
 992			 * counter width
 993			 */
 994			val = ctx->ctx_pmds[i].val & ovfl_mask;
 995			ctx->ctx_pmds[i].val &= ~ovfl_mask;
 996		} else {
 997			val = ctx->ctx_pmds[i].val;
 998		}
 999		ia64_set_pmd(i, val);
1000
1001		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
1002			i,
1003			ctx->ctx_pmds[i].val,
1004			val));
1005	}
1006	/*
1007	 * restore the PMCs
1008	 */
1009	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
1010	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
1011		if ((mask & 0x1) == 0UL) continue;
1012		th->pmcs[i] = ctx->ctx_pmcs[i];
1013		ia64_set_pmc(i, th->pmcs[i]);
1014		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
1015	}
1016	ia64_srlz_d();
1017
1018	/*
1019	 * must restore DBR/IBR because could be modified while masked
1020	 * XXX: need to optimize 
1021	 */
1022	if (ctx->ctx_fl_using_dbreg) {
1023		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
1024		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
1025	}
1026
1027	/*
1028	 * now restore PSR
1029	 */
1030	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
1031		/* enable dcr pp */
1032		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
1033		ia64_srlz_i();
1034	}
1035	pfm_set_psr_l(psr);
1036}
1037
1038static inline void
1039pfm_save_pmds(unsigned long *pmds, unsigned long mask)
1040{
1041	int i;
1042
1043	ia64_srlz_d();
1044
1045	for (i=0; mask; i++, mask>>=1) {
1046		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
1047	}
1048}
1049
1050/*
1051 * reload from thread state (used for ctxw only)
1052 */
1053static inline void
1054pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
1055{
1056	int i;
1057	unsigned long val, ovfl_val = pmu_conf->ovfl_val;
1058
1059	for (i=0; mask; i++, mask>>=1) {
1060		if ((mask & 0x1) == 0) continue;
1061		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
1062		ia64_set_pmd(i, val);
1063	}
1064	ia64_srlz_d();
1065}
1066
1067/*
1068 * propagate PMD from context to thread-state
1069 */
1070static inline void
1071pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
1072{
1073	struct thread_struct *thread = &task->thread;
1074	unsigned long ovfl_val = pmu_conf->ovfl_val;
1075	unsigned long mask = ctx->ctx_all_pmds[0];
1076	unsigned long val;
1077	int i;
1078
1079	DPRINT(("mask=0x%lx\n", mask));
1080
1081	for (i=0; mask; i++, mask>>=1) {
1082
1083		val = ctx->ctx_pmds[i].val;
1084
1085		/*
1086		 * We break up the 64 bit value into 2 pieces
1087		 * the lower bits go to the machine state in the
1088		 * thread (will be reloaded on ctxsw in).
1089		 * The upper part stays in the soft-counter.
1090		 */
1091		if (PMD_IS_COUNTING(i)) {
1092			ctx->ctx_pmds[i].val = val & ~ovfl_val;
1093			 val &= ovfl_val;
1094		}
1095		thread->pmds[i] = val;
1096
1097		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
1098			i,
1099			thread->pmds[i],
1100			ctx->ctx_pmds[i].val));
1101	}
1102}
1103
1104/*
1105 * propagate PMC from context to thread-state
1106 */
1107static inline void
1108pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
1109{
1110	struct thread_struct *thread = &task->thread;
1111	unsigned long mask = ctx->ctx_all_pmcs[0];
1112	int i;
1113
1114	DPRINT(("mask=0x%lx\n", mask));
1115
1116	for (i=0; mask; i++, mask>>=1) {
1117		/* masking 0 with ovfl_val yields 0 */
1118		thread->pmcs[i] = ctx->ctx_pmcs[i];
1119		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
1120	}
1121}
1122
1123
1124
1125static inline void
1126pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
1127{
1128	int i;
1129
1130	for (i=0; mask; i++, mask>>=1) {
1131		if ((mask & 0x1) == 0) continue;
1132		ia64_set_pmc(i, pmcs[i]);
1133	}
1134	ia64_srlz_d();
1135}
1136
1137static inline int
1138pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
1139{
1140	return memcmp(a, b, sizeof(pfm_uuid_t));
1141}
1142
1143static inline int
1144pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
1145{
1146	int ret = 0;
1147	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
1148	return ret;
1149}
1150
1151static inline int
1152pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
1153{
1154	int ret = 0;
1155	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
1156	return ret;
1157}
1158
1159
1160static inline int
1161pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
1162		     int cpu, void *arg)
1163{
1164	int ret = 0;
1165	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
1166	return ret;
1167}
1168
1169static inline int
1170pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
1171		     int cpu, void *arg)
1172{
1173	int ret = 0;
1174	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
1175	return ret;
1176}
1177
1178static inline int
1179pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
1180{
1181	int ret = 0;
1182	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
1183	return ret;
1184}
1185
1186static inline int
1187pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
1188{
1189	int ret = 0;
1190	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
1191	return ret;
1192}
1193
1194static pfm_buffer_fmt_t *
1195__pfm_find_buffer_fmt(pfm_uuid_t uuid)
1196{
1197	struct list_head * pos;
1198	pfm_buffer_fmt_t * entry;
1199
1200	list_for_each(pos, &pfm_buffer_fmt_list) {
1201		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
1202		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
1203			return entry;
1204	}
1205	return NULL;
1206}
1207 
1208/*
1209 * find a buffer format based on its uuid
1210 */
1211static pfm_buffer_fmt_t *
1212pfm_find_buffer_fmt(pfm_uuid_t uuid)
1213{
1214	pfm_buffer_fmt_t * fmt;
1215	spin_lock(&pfm_buffer_fmt_lock);
1216	fmt = __pfm_find_buffer_fmt(uuid);
1217	spin_unlock(&pfm_buffer_fmt_lock);
1218	return fmt;
1219}
1220 
1221int
1222pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
1223{
1224	int ret = 0;
1225
1226	/* some sanity checks */
1227	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
1228
1229	/* we need at least a handler */
1230	if (fmt->fmt_handler == NULL) return -EINVAL;
1231
1232	/*
1233	 * XXX: need check validity of fmt_arg_size
1234	 */
1235
1236	spin_lock(&pfm_buffer_fmt_lock);
1237
1238	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
1239		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
1240		ret = -EBUSY;
1241		goto out;
1242	} 
1243	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
1244	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
1245
1246out:
1247	spin_unlock(&pfm_buffer_fmt_lock);
1248 	return ret;
1249}
1250EXPORT_SYMBOL(pfm_register_buffer_fmt);
1251
1252int
1253pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
1254{
1255	pfm_buffer_fmt_t *fmt;
1256	int ret = 0;
1257
1258	spin_lock(&pfm_buffer_fmt_lock);
1259
1260	fmt = __pfm_find_buffer_fmt(uuid);
1261	if (!fmt) {
1262		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
1263		ret = -EINVAL;
1264		goto out;
1265	}
1266	list_del_init(&fmt->fmt_list);
1267	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
1268
1269out:
1270	spin_unlock(&pfm_buffer_fmt_lock);
1271	return ret;
1272
1273}
1274EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
1275
1276static int
1277pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1278{
1279	unsigned long flags;
1280	/*
1281	 * validy checks on cpu_mask have been done upstream
1282	 */
1283	LOCK_PFS(flags);
1284
1285	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1286		pfm_sessions.pfs_sys_sessions,
1287		pfm_sessions.pfs_task_sessions,
1288		pfm_sessions.pfs_sys_use_dbregs,
1289		is_syswide,
1290		cpu));
1291
1292	if (is_syswide) {
1293		/*
1294		 * cannot mix system wide and per-task sessions
1295		 */
1296		if (pfm_sessions.pfs_task_sessions > 0UL) {
1297			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
1298			  	pfm_sessions.pfs_task_sessions));
1299			goto abort;
1300		}
1301
1302		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
1303
1304		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
1305
1306		pfm_sessions.pfs_sys_session[cpu] = task;
1307
1308		pfm_sessions.pfs_sys_sessions++ ;
1309
1310	} else {
1311		if (pfm_sessions.pfs_sys_sessions) goto abort;
1312		pfm_sessions.pfs_task_sessions++;
1313	}
1314
1315	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1316		pfm_sessions.pfs_sys_sessions,
1317		pfm_sessions.pfs_task_sessions,
1318		pfm_sessions.pfs_sys_use_dbregs,
1319		is_syswide,
1320		cpu));
1321
1322	UNLOCK_PFS(flags);
1323
1324	return 0;
1325
1326error_conflict:
1327	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
1328  		pfm_sessions.pfs_sys_session[cpu]->pid,
1329		smp_processor_id()));
1330abort:
1331	UNLOCK_PFS(flags);
1332
1333	return -EBUSY;
1334
1335}
1336
1337static int
1338pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
1339{
1340	unsigned long flags;
1341	/*
1342	 * validy checks on cpu_mask have been done upstream
1343	 */
1344	LOCK_PFS(flags);
1345
1346	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1347		pfm_sessions.pfs_sys_sessions,
1348		pfm_sessions.pfs_task_sessions,
1349		pfm_sessions.pfs_sys_use_dbregs,
1350		is_syswide,
1351		cpu));
1352
1353
1354	if (is_syswide) {
1355		pfm_sessions.pfs_sys_session[cpu] = NULL;
1356		/*
1357		 * would not work with perfmon+more than one bit in cpu_mask
1358		 */
1359		if (ctx && ctx->ctx_fl_using_dbreg) {
1360			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
1361				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
1362			} else {
1363				pfm_sessions.pfs_sys_use_dbregs--;
1364			}
1365		}
1366		pfm_sessions.pfs_sys_sessions--;
1367	} else {
1368		pfm_sessions.pfs_task_sessions--;
1369	}
1370	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1371		pfm_sessions.pfs_sys_sessions,
1372		pfm_sessions.pfs_task_sessions,
1373		pfm_sessions.pfs_sys_use_dbregs,
1374		is_syswide,
1375		cpu));
1376
1377	UNLOCK_PFS(flags);
1378
1379	return 0;
1380}
1381
1382/*
1383 * removes virtual mapping of the sampling buffer.
1384 * IMPORTANT: cannot be called with interrupts disable, e.g. inside
1385 * a PROTECT_CTX() section.
1386 */
1387static int
1388pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
1389{
1390	int r;
1391
1392	/* sanity checks */
1393	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
1394		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
1395		return -EINVAL;
1396	}
1397
1398	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
1399
1400	/*
1401	 * does the actual unmapping
1402	 */
1403	down_write(&task->mm->mmap_sem);
1404
1405	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
1406
1407	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
1408
1409	up_write(&task->mm->mmap_sem);
1410	if (r !=0) {
1411		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
1412	}
1413
1414	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
1415
1416	return 0;
1417}
1418
1419/*
1420 * free actual physical storage used by sampling buffer
1421 */
1422#if 0
1423static int
1424pfm_free_smpl_buffer(pfm_context_t *ctx)
1425{
1426	pfm_buffer_fmt_t *fmt;
1427
1428	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
1429
1430	/*
1431	 * we won't use the buffer format anymore
1432	 */
1433	fmt = ctx->ctx_buf_fmt;
1434
1435	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
1436		ctx->ctx_smpl_hdr,
1437		ctx->ctx_smpl_size,
1438		ctx->ctx_smpl_vaddr));
1439
1440	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
1441
1442	/*
1443	 * free the buffer
1444	 */
1445	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
1446
1447	ctx->ctx_smpl_hdr  = NULL;
1448	ctx->ctx_smpl_size = 0UL;
1449
1450	return 0;
1451
1452invalid_free:
1453	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
1454	return -EINVAL;
1455}
1456#endif
1457
1458static inline void
1459pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
1460{
1461	if (fmt == NULL) return;
1462
1463	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
1464
1465}
1466
1467/*
1468 * pfmfs should _never_ be mounted by userland - too much of security hassle,
1469 * no real gain from having the whole whorehouse mounted. So we don't need
1470 * any operations on the root directory. However, we need a non-trivial
1471 * d_name - pfm: will go nicely and kill the special-casing in procfs.
1472 */
1473static struct vfsmount *pfmfs_mnt;
1474
1475static int __init
1476init_pfm_fs(void)
1477{
1478	int err = register_filesystem(&pfm_fs_type);
1479	if (!err) {
1480		pfmfs_mnt = kern_mount(&pfm_fs_type);
1481		err = PTR_ERR(pfmfs_mnt);
1482		if (IS_ERR(pfmfs_mnt))
1483			unregister_filesystem(&pfm_fs_type);
1484		else
1485			err = 0;
1486	}
1487	return err;
1488}
1489
1490static void __exit
1491exit_pfm_fs(void)
1492{
1493	unregister_filesystem(&pfm_fs_type);
1494	mntput(pfmfs_mnt);
1495}
1496
1497static ssize_t
1498pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1499{
1500	pfm_context_t *ctx;
1501	pfm_msg_t *msg;
1502	ssize_t ret;
1503	unsigned long flags;
1504  	DECLARE_WAITQUEUE(wait, current);
1505	if (PFM_IS_FILE(filp) == 0) {
1506		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
1507		return -EINVAL;
1508	}
1509
1510	ctx = (pfm_context_t *)filp->private_data;
1511	if (ctx == NULL) {
1512		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
1513		return -EINVAL;
1514	}
1515
1516	/*
1517	 * check even when there is no message
1518	 */
1519	if (size < sizeof(pfm_msg_t)) {
1520		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
1521		return -EINVAL;
1522	}
1523
1524	PROTECT_CTX(ctx, flags);
1525
1526  	/*
1527	 * put ourselves on the wait queue
1528	 */
1529  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);
1530
1531
1532  	for(;;) {
1533		/*
1534		 * check wait queue
1535		 */
1536
1537  		set_current_state(TASK_INTERRUPTIBLE);
1538
1539		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
1540
1541		ret = 0;
1542		if(PFM_CTXQ_EMPTY(ctx) == 0) break;
1543
1544		UNPROTECT_CTX(ctx, flags);
1545
1546		/*
1547		 * check non-blocking read
1548		 */
1549      		ret = -EAGAIN;
1550		if(filp->f_flags & O_NONBLOCK) break;
1551
1552		/*
1553		 * check pending signals
1554		 */
1555		if(signal_pending(current)) {
1556			ret = -EINTR;
1557			break;
1558		}
1559      		/*
1560		 * no message, so wait
1561		 */
1562      		schedule();
1563
1564		PROTECT_CTX(ctx, flags);
1565	}
1566	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
1567  	set_current_state(TASK_RUNNING);
1568	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
1569
1570	if (ret < 0) goto abort;
1571
1572	ret = -EINVAL;
1573	msg = pfm_get_next_msg(ctx);
1574	if (msg == NULL) {
1575		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
1576		goto abort_locked;
1577	}
1578
1579	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
1580
1581	ret = -EFAULT;
1582  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
1583
1584abort_locked:
1585	UNPROTECT_CTX(ctx, flags);
1586abort:
1587	return ret;
1588}
1589
1590static ssize_t
1591pfm_write(struct file *file, const char __user *ubuf,
1592			  size_t size, loff_t *ppos)
1593{
1594	DPRINT(("pfm_write called\n"));
1595	return -EINVAL;
1596}
1597
1598static unsigned int
1599pfm_poll(struct file *filp, poll_table * wait)
1600{
1601	pfm_context_t *ctx;
1602	unsigned long flags;
1603	unsigned int mask = 0;
1604
1605	if (PFM_IS_FILE(filp) == 0) {
1606		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
1607		return 0;
1608	}
1609
1610	ctx = (pfm_context_t *)filp->private_data;
1611	if (ctx == NULL) {
1612		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
1613		return 0;
1614	}
1615
1616
1617	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
1618
1619	poll_wait(filp, &ctx->ctx_msgq_wait, wait);
1620
1621	PROTECT_CTX(ctx, flags);
1622
1623	if (PFM_CTXQ_EMPTY(ctx) == 0)
1624		mask =  POLLIN | POLLRDNORM;
1625
1626	UNPROTECT_CTX(ctx, flags);
1627
1628	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
1629
1630	return mask;
1631}
1632
1633static int
1634pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
1635{
1636	DPRINT(("pfm_ioctl called\n"));
1637	return -EINVAL;
1638}
1639
1640/*
1641 * interrupt cannot be masked when coming here
1642 */
1643static inline int
1644pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
1645{
1646	int ret;
1647
1648	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
1649
1650	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1651		current->pid,
1652		fd,
1653		on,
1654		ctx->ctx_async_queue, ret));
1655
1656	return ret;
1657}
1658
1659static int
1660pfm_fasync(int fd, struct file *filp, int on)
1661{
1662	pfm_context_t *ctx;
1663	int ret;
1664
1665	if (PFM_IS_FILE(filp) == 0) {
1666		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
1667		return -EBADF;
1668	}
1669
1670	ctx = (pfm_context_t *)filp->private_data;
1671	if (ctx == NULL) {
1672		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
1673		return -EBADF;
1674	}
1675	/*
1676	 * we cannot mask interrupts during this call because this may
1677	 * may go to sleep if memory is not readily avalaible.
1678	 *
1679	 * We are protected from the conetxt disappearing by the get_fd()/put_fd()
1680	 * done in caller. Serialization of this function is ensured by caller.
1681	 */
1682	ret = pfm_do_fasync(fd, filp, ctx, on);
1683
1684
1685	DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1686		fd,
1687		on,
1688		ctx->ctx_async_queue, ret));
1689
1690	return ret;
1691}
1692
1693#ifdef CONFIG_SMP
1694/*
1695 * this function is exclusively called from pfm_close().
1696 * The context is not protected at that time, nor are interrupts
1697 * on the remote CPU. That's necessary to avoid deadlocks.
1698 */
1699static void
1700pfm_syswide_force_stop(void *info)
1701{
1702	pfm_context_t   *ctx = (pfm_context_t *)info;
1703	struct pt_regs *regs = ia64_task_regs(current);
1704	struct task_struct *owner;
1705	unsigned long flags;
1706	int ret;
1707
1708	if (ctx->ctx_cpu != smp_processor_id()) {
1709		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
1710			ctx->ctx_cpu,
1711			smp_processor_id());
1712		return;
1713	}
1714	owner = GET_PMU_OWNER();
1715	if (owner != ctx->ctx_task) {
1716		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
1717			smp_processor_id(),
1718			owner->pid, ctx->ctx_task->pid);
1719		return;
1720	}
1721	if (GET_PMU_CTX() != ctx) {
1722		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
1723			smp_processor_id(),
1724			GET_PMU_CTX(), ctx);
1725		return;
1726	}
1727
1728	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
1729	/*
1730	 * the context is already protected in pfm_close(), we simply
1731	 * need to mask interrupts to avoid a PMU interrupt race on
1732	 * this CPU
1733	 */
1734	local_irq_save(flags);
1735
1736	ret = pfm_context_unload(ctx, NULL, 0, regs);
1737	if (ret) {
1738		DPRINT(("context_unload returned %d\n", ret));
1739	}
1740
1741	/*
1742	 * unmask interrupts, PMU interrupts are now spurious here
1743	 */
1744	local_irq_restore(flags);
1745}
1746
1747static void
1748pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
1749{
1750	int ret;
1751
1752	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
1753	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
1754	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
1755}
1756#endif /* CONFIG_SMP */
1757
1758/*
1759 * called for each close(). Partially free resources.
1760 * When caller is self-monitoring, the context is unloaded.
1761 */
1762static int
1763pfm_flush(struct file *filp)
1764{
1765	pfm_context_t *ctx;
1766	struct task_struct *task;
1767	struct pt_regs *regs;
1768	unsigned long flags;
1769	unsigned long smpl_buf_size = 0UL;
1770	void *smpl_buf_vaddr = NULL;
1771	int state, is_system;
1772
1773	if (PFM_IS_FILE(filp) == 0) {
1774		DPRINT(("bad magic for\n"));
1775		return -EBADF;
1776	}
1777
1778	ctx = (pfm_context_t *)filp->private_data;
1779	if (ctx == NULL) {
1780		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
1781		return -EBADF;
1782	}
1783
1784	/*
1785	 * remove our file from the async queue, if we use this mode.
1786	 * This can be done without the context being protected. We come
1787	 * here when the context has become unreacheable by other tasks.
1788	 *
1789	 * We may still have active monitoring at this point and we may
1790	 * end up in pfm_overflow_handler(). However, fasync_helper()
1791	 * operates with interrupts disabled and it cleans up the
1792	 * queue. If the PMU handler is called prior to entering
1793	 * fasync_helper() then it will send a signal. If it is
1794	 * invoked after, it will find an empty queue and no
1795	 * signal will be sent. In both case, we are safe
1796	 */
1797	if (filp->f_flags & FASYNC) {
1798		DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
1799		pfm_do_fasync (-1, filp, ctx, 0);
1800	}
1801
1802	PROTECT_CTX(ctx, flags);
1803
1804	state     = ctx->ctx_state;
1805	is_system = ctx->ctx_fl_system;
1806
1807	task = PFM_CTX_TASK(ctx);
1808	regs = ia64_task_regs(task);
1809
1810	DPRINT(("ctx_state=%d is_current=%d\n",
1811		state,
1812		task == current ? 1 : 0));
1813
1814	/*
1815	 * if state == UNLOADED, then task is NULL
1816	 */
1817
1818	/*
1819	 * we must stop and unload because we are losing access to the context.
1820	 */
1821	if (task == current) {
1822#ifdef CONFIG_SMP
1823		/*
1824		 * the task IS the owner but it migrated to another CPU: that's bad
1825		 * but we must handle this cleanly. Unfortunately, the kernel does
1826		 * not provide a mechanism to block migration (while the context is loaded).
1827		 *
1828		 * We need to release the resource on the ORIGINAL cpu.
1829		 */
1830		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
1831
1832			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
1833			/*
1834			 * keep context protected but unmask interrupt for IPI
1835			 */
1836			local_irq_restore(flags);
1837
1838			pfm_syswide_cleanup_other_cpu(ctx);
1839
1840			/*
1841			 * restore interrupt masking
1842			 */
1843			local_irq_save(flags);
1844
1845			/*
1846			 * context is unloaded at this point
1847			 */
1848		} else
1849#endif /* CONFIG_SMP */
1850		{
1851
1852			DPRINT(("forcing unload\n"));
1853			/*
1854		 	* stop and unload, returning with state UNLOADED
1855		 	* and session unreserved.
1856		 	*/
1857			pfm_context_unload(ctx, NULL, 0, regs);
1858
1859			DPRINT(("ctx_state=%d\n", ctx->ctx_state));
1860		}
1861	}
1862
1863	/*
1864	 * remove virtual mapping, if any, for the calling task.
1865	 * cannot reset ctx field until last user is calling close().
1866	 *
1867	 * ctx_smpl_vaddr must never be cleared because it is ne…

Large files files are truncated, but you can click here to view the full file