PageRenderTime 37ms CodeModel.GetById 39ms RepoModel.GetById 0ms app.codeStats 0ms

/papi-4.4.0/src/libpfm-3.y/examples_v3.x/task_smpl_user.c

#
C | 534 lines | 309 code | 82 blank | 143 comment | 50 complexity | 69b134c66746fc321684d89bfa102871 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /*
  2. * task_smpl_user.c - example of a task collecting a profile from user level
  3. *
  4. * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
  5. * Contributed by Stephane Eranian <eranian@hpl.hp.com>
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  11. * of the Software, and to permit persons to whom the Software is furnished to do so,
  12. * subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in all
  15. * copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  18. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  19. * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  20. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  21. * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  22. * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. */
  24. #include <sys/types.h>
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <stdarg.h>
  28. #include <errno.h>
  29. #include <unistd.h>
  30. #include <string.h>
  31. #include <signal.h>
  32. #include <stdarg.h>
  33. #include <getopt.h>
  34. #include <time.h>
  35. #include <sys/ptrace.h>
  36. #include <sys/wait.h>
  37. #include <sys/time.h>
  38. #include <syscall.h>
  39. #include <perfmon/perfmon.h>
  40. #include <perfmon/pfmlib.h>
  41. #include "detect_pmcs.h"
  42. #define SAMPLING_PERIOD 100000
  43. #define NUM_PMCS PFMLIB_MAX_PMCS
  44. #define NUM_PMDS PFMLIB_MAX_PMDS
  45. typedef struct {
  46. int opt_no_show;
  47. int opt_block;
  48. int opt_sys;
  49. } options_t;
  50. static uint64_t collected_samples;
  51. static pfarg_pmd_attr_t pd[NUM_PMDS];
  52. static unsigned int num_pmds;
  53. static options_t options;
  54. static volatile int terminate;
  55. static struct option the_options[]={
  56. { "help", 0, 0, 1},
  57. { "ovfl-block", 0, &options.opt_block, 1},
  58. { "no-show", 0, &options.opt_no_show, 1},
  59. { "system-wide", 0, &options.opt_sys, 1},
  60. { 0, 0, 0, 0}
  61. };
  62. static void fatal_error(char *fmt,...) __attribute__((noreturn));
  63. #define BPL (sizeof(uint64_t )<<3)
  64. #define LBPL 6
  65. static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum)
  66. {
  67. bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1));
  68. }
  69. static inline int pfm_bv_isset(uint64_t *bv, uint16_t rnum)
  70. {
  71. return bv[rnum>>LBPL] & (1UL <<(rnum&(BPL-1))) ? 1 : 0;
  72. }
  73. static inline void pfm_bv_copy(uint64_t *d, uint64_t *j, uint16_t n)
  74. {
  75. if (n <= BPL)
  76. *d = *j;
  77. else {
  78. memcpy(d, j, (n>>LBPL)*sizeof(uint64_t));
  79. }
  80. }
  81. /*
  82. * pin task to CPU
  83. */
  84. #ifndef __NR_sched_setaffinity
  85. #error "you need to define __NR_sched_setaffinity"
  86. #endif
  87. #define MAX_CPUS 2048
  88. #define NR_CPU_BITS (MAX_CPUS>>3)
  89. int
  90. pin_cpu(pid_t pid, unsigned int cpu)
  91. {
  92. uint64_t my_mask[NR_CPU_BITS];
  93. if (cpu >= MAX_CPUS)
  94. fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS);
  95. my_mask[cpu>>6] = 1ULL << (cpu&63);
  96. return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask);
  97. }
  98. static void
  99. warning(char *fmt, ...)
  100. {
  101. va_list ap;
  102. va_start(ap, fmt);
  103. vfprintf(stderr, fmt, ap);
  104. va_end(ap);
  105. }
  106. static void
  107. fatal_error(char *fmt, ...)
  108. {
  109. va_list ap;
  110. va_start(ap, fmt);
  111. vfprintf(stderr, fmt, ap);
  112. va_end(ap);
  113. exit(1);
  114. }
  115. int
  116. child(char **arg)
  117. {
  118. if (options.opt_sys) {
  119. printf("child pinned on CPU0\n");
  120. pin_cpu(getpid(), 0);
  121. }
  122. /*
  123. * force the task to stop before executing the first
  124. * user level instruction
  125. */
  126. ptrace(PTRACE_TRACEME, 0, NULL, NULL);
  127. execvp(arg[0], arg);
  128. /* not reached */
  129. exit(1);
  130. }
  131. void
  132. show_task_rusage(const struct timeval *start, const struct timeval *end, const struct rusage *ru)
  133. {
  134. long secs, suseconds, end_usec;
  135. secs = end->tv_sec - start->tv_sec;
  136. end_usec = end->tv_usec;
  137. if (end_usec < start->tv_usec) {
  138. end_usec += 1000000;
  139. secs--;
  140. }
  141. suseconds = end_usec - start->tv_usec;
  142. printf ("real %ldh%02ldm%02ld.%03lds user %ldh%02ldm%02ld.%03lds sys %ldh%02ldm%02ld.%03lds\n",
  143. secs / 3600,
  144. (secs % 3600) / 60,
  145. secs % 60,
  146. suseconds / 1000,
  147. ru->ru_utime.tv_sec / 3600,
  148. (ru->ru_utime.tv_sec % 3600) / 60,
  149. ru->ru_utime.tv_sec% 60,
  150. (long)(ru->ru_utime.tv_usec / 1000),
  151. ru->ru_stime.tv_sec / 3600,
  152. (ru->ru_stime.tv_sec % 3600) / 60,
  153. ru->ru_stime.tv_sec% 60,
  154. (long)(ru->ru_stime.tv_usec / 1000)
  155. );
  156. }
  157. static void
  158. process_sample(int fd, unsigned long ip, pid_t pid, pid_t tid, uint16_t cpu)
  159. {
  160. unsigned int j;
  161. if (pfm_read(fd, 0, PFM_RW_PMD_ATTR, pd, num_pmds * sizeof(*pd)))
  162. fatal_error("pfm_read(PMD) error errno %d\n",errno);
  163. if (options.opt_no_show) goto done;
  164. printf("entry %"PRIu64" PID:%d TID: %d CPU:%u LAST_VAL: %"PRIu64" IIP:0x%lx\n",
  165. collected_samples,
  166. pid,
  167. tid,
  168. cpu,
  169. - pd[0].reg_last_value,
  170. ip);
  171. for(j=1; j < num_pmds; j++) {
  172. printf("PMD%-2d = %"PRIu64"\n", pd[j].reg_num, pd[j].reg_value);
  173. }
  174. done:
  175. collected_samples++;
  176. }
  177. static void
  178. cld_handler(int n)
  179. {
  180. terminate = 1;
  181. }
  182. int
  183. mainloop(char **arg)
  184. {
  185. pfmlib_input_param_t inp;
  186. pfmlib_output_param_t outp;
  187. pfarg_pmr_t pc[NUM_PMCS];
  188. pfarg_sinfo_t sif;
  189. struct timeval start_time, end_time;
  190. struct rusage rusage;
  191. pfarg_msg_t msg;
  192. uint64_t ovfl_count = 0;
  193. uint32_t ctx_flags = 0;
  194. pid_t pid;
  195. int status, ret, fd;
  196. unsigned int i, num_counters;
  197. /*
  198. * intialize all locals
  199. */
  200. memset(&inp,0, sizeof(inp));
  201. memset(&outp,0, sizeof(outp));
  202. memset(pc, 0, sizeof(pc));
  203. memset(&sif,0, sizeof(sif));
  204. pfm_get_num_counters(&num_counters);
  205. /*
  206. * locate events
  207. */
  208. if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
  209. fatal_error("cannot find cycle event\n");
  210. if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
  211. fatal_error("cannot find inst retired event\n");
  212. i = 2;
  213. /*
  214. * set the privilege mode:
  215. * PFM_PLM3 : user level
  216. * PFM_PLM0 : kernel level
  217. */
  218. inp.pfp_dfl_plm = PFM_PLM3;
  219. printf("measuring at plm=0x%x\n", inp.pfp_dfl_plm);
  220. if (i > num_counters) {
  221. i = num_counters;
  222. printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
  223. }
  224. /*
  225. * how many counters we use
  226. */
  227. inp.pfp_event_count = i;
  228. inp.pfp_flags = options.opt_sys ? PFMLIB_PFP_SYSTEMWIDE : 0;
  229. /*
  230. * build the pfp_unavail_pmcs bitmask by looking
  231. * at what perfmon has available. It is not always
  232. * the case that all PMU registers are actually available
  233. * to applications. For instance, on IA-32 platforms, some
  234. * registers may be reserved for the NMI watchdog timer.
  235. *
  236. * With this bitmap, the library knows which registers NOT to
  237. * use. Of source, it is possible that no valid assignement may
  238. * be possible if certina PMU registers are not available.
  239. */
  240. get_sif(options.opt_sys? PFM_FL_SYSTEM_WIDE:0, &sif);
  241. detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL);
  242. /*
  243. * let the library figure out the values for the PMCS
  244. */
  245. if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS)
  246. fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
  247. /*
  248. * Now prepare the argument to initialize the PMDs and PMCS.
  249. * We use pfp_pmc_count to determine the number of PMC to intialize.
  250. * We use pfp_pmd_count to determine the number of PMD to initialize.
  251. * Some events/features may cause extra PMCs to be used, leading to:
  252. * - pfp_pmc_count may be >= pfp_event_count
  253. * - pfp_pmd_count may be >= pfp_event_count
  254. */
  255. for (i=0; i < outp.pfp_pmc_count; i++) {
  256. pc[i].reg_num = outp.pfp_pmcs[i].reg_num;
  257. pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
  258. }
  259. for (i=0; i < outp.pfp_pmd_count; i++) {
  260. pd[i].reg_num = outp.pfp_pmds[i].reg_num;
  261. /*
  262. * we also want to reset the other PMDs on
  263. * every overflow. If we do not set
  264. * this, the non-overflowed counters
  265. * will be untouched.
  266. */
  267. if (i)
  268. pfm_bv_set(pd[0].reg_reset_pmds, pd[i].reg_num);
  269. }
  270. /*
  271. * we our sampling counter overflow, we want to be notified.
  272. * The notification will come ONLY when the sampling buffer
  273. * becomes full.
  274. *
  275. * We also activate randomization of the sampling period.
  276. */
  277. pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM;
  278. pd[0].reg_value = - SAMPLING_PERIOD;
  279. pd[0].reg_short_reset = - SAMPLING_PERIOD;
  280. pd[0].reg_long_reset = - SAMPLING_PERIOD;
  281. /*
  282. * setup randomization parameters, we allow a range of up to +256 here.
  283. */
  284. pd[0].reg_random_mask = 0xff;
  285. printf("programming %u PMCS and %u PMDS\n", outp.pfp_pmc_count, inp.pfp_event_count);
  286. /*
  287. * prepare session flags
  288. */
  289. if (options.opt_sys) {
  290. if (options.opt_block)
  291. fatal_error("blocking mode not supported in system-wide\n");
  292. printf("system-wide monitoring on CPU0\n");
  293. pin_cpu(getpid(), 0);
  294. ctx_flags |= PFM_FL_SYSTEM_WIDE;
  295. }
  296. if (options.opt_block)
  297. ctx_flags |= PFM_FL_NOTIFY_BLOCK;
  298. /*
  299. * now create perfmon session
  300. */
  301. fd = pfm_create(ctx_flags, NULL);
  302. if (fd == -1) {
  303. if (errno == ENOSYS) {
  304. fatal_error("Your kernel does not have performance monitoring support!\n");
  305. }
  306. fatal_error("cannot create session %s\n", strerror(errno));
  307. }
  308. /*
  309. * Now program the registers
  310. */
  311. if (pfm_write(fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)))
  312. fatal_error("pfm_write error errno %d\n",errno);
  313. /*
  314. * initialize the PMDs
  315. * To be read, each PMD must be either written or declared
  316. * as being part of a sample (reg_smpl_pmds)
  317. */
  318. if (pfm_write(fd, 0, PFM_RW_PMD_ATTR, pd, outp.pfp_pmd_count * sizeof(*pd)))
  319. fatal_error("pfm_write(PMD) error errno %d\n",errno);
  320. num_pmds = outp.pfp_pmd_count;
  321. signal(SIGCHLD, SIG_IGN);
  322. /*
  323. * Create the child task
  324. */
  325. if ((pid=fork()) == -1)
  326. fatal_error("Cannot fork process\n");
  327. /*
  328. * In order to get the PFM_END_MSG message, it is important
  329. * to ensure that the child task does not inherit the file
  330. * descriptor of the session. By default, file descriptor
  331. * are inherited during exec(). We explicitely close it
  332. * here. We could have set it up through fcntl(FD_CLOEXEC)
  333. * to achieve the same thing.
  334. */
  335. if (pid == 0) {
  336. close(fd);
  337. child(arg);
  338. }
  339. /*
  340. * wait for the child to exec
  341. */
  342. waitpid(pid, &status, WUNTRACED);
  343. /*
  344. * process is stopped at this point
  345. */
  346. if (WIFEXITED(status)) {
  347. warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status));
  348. goto terminate_session;
  349. }
  350. /*
  351. * attach to either pid or CPU0
  352. */
  353. if (pfm_attach(fd, 0, options.opt_sys ? 0 : pid))
  354. fatal_error("pfm_attach error errno %d\n",errno);
  355. /*
  356. * activate monitoring for stopped task.
  357. * (nothing will be measured at this point
  358. */
  359. if (pfm_set_state(fd, 0, PFM_ST_START))
  360. fatal_error("pfm_set_state(start) error errno %d\n",errno);
  361. if (options.opt_sys)
  362. signal(SIGCHLD, cld_handler);
  363. /*
  364. * detach child. Side effect includes
  365. * activation of monitoring.
  366. */
  367. ptrace(PTRACE_DETACH, pid, NULL, 0);
  368. gettimeofday(&start_time, NULL);
  369. /*
  370. * core loop
  371. */
  372. while(terminate == 0) {
  373. /*
  374. * wait for overflow/end notification messages
  375. */
  376. ret = read(fd, &msg, sizeof(msg));
  377. if (ret == -1) {
  378. if (errno != EINTR) fatal_error("cannot read perfmon msg: %s\n", strerror(errno));
  379. continue;
  380. }
  381. switch(msg.type) {
  382. case PFM_MSG_OVFL: /* one sample to process */
  383. process_sample(fd, msg.pfm_ovfl_msg.msg_ovfl_ip,
  384. msg.pfm_ovfl_msg.msg_ovfl_pid,
  385. msg.pfm_ovfl_msg.msg_ovfl_tid,
  386. msg.pfm_ovfl_msg.msg_ovfl_cpu);
  387. ovfl_count++;
  388. if (pfm_set_state(fd, 0, PFM_ST_RESTART) == -1) {
  389. if (errno != EBUSY)
  390. fatal_error("pfm_set_state(restart) error errno %d\n",errno);
  391. }
  392. break;
  393. case PFM_MSG_END: /* monitored task terminated (not for system-wide) */
  394. printf("task terminated\n");
  395. terminate = 1;
  396. break;
  397. default: fatal_error("unknown message type %d\n", msg.type);
  398. }
  399. }
  400. terminate_session:
  401. /*
  402. * cleanup child
  403. */
  404. wait4(pid, &status, 0, &rusage);
  405. gettimeofday(&end_time, NULL);
  406. /*
  407. * destroy perfmon session
  408. */
  409. close(fd);
  410. printf("%"PRIu64" samples collected in %"PRIu64" buffer overflows\n", collected_samples, ovfl_count);
  411. show_task_rusage(&start_time, &end_time, &rusage);
  412. return 0;
  413. }
  414. static void
  415. usage(void)
  416. {
  417. printf("usage: task_smpl [-h] [--help] [--no-show] [--ovfl-block] cmd\n");
  418. }
  419. int
  420. main(int argc, char **argv)
  421. {
  422. pfmlib_options_t pfmlib_options;
  423. int c;
  424. while ((c=getopt_long(argc, argv,"h", the_options, 0)) != -1) {
  425. switch(c) {
  426. case 0: continue;
  427. case 1:
  428. case 'h':
  429. usage();
  430. exit(0);
  431. default:
  432. fatal_error("");
  433. }
  434. }
  435. if (argv[optind] == NULL) {
  436. fatal_error("You must specify a command to execute\n");
  437. }
  438. /*
  439. * pass options to library (optional)
  440. */
  441. memset(&pfmlib_options, 0, sizeof(pfmlib_options));
  442. pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
  443. pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */
  444. pfm_set_options(&pfmlib_options);
  445. /*
  446. * Initialize pfm library (required before we can use it)
  447. */
  448. if (pfm_initialize() != PFMLIB_SUCCESS) {
  449. fatal_error("Can't initialize library\n");
  450. }
  451. return mainloop(argv+optind);
  452. }