PageRenderTime 43ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/cddl/contrib/dtracetoolkit/dexplorer

https://bitbucket.org/freebsd/freebsd-head/
Korn Shell | 547 lines | 408 code | 39 blank | 100 comment | 22 complexity | bc8c8bbceac569786aa810f8a83ad9e5 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, LGPL-2.0, LGPL-2.1, BSD-2-Clause, 0BSD, JSON, AGPL-1.0, GPL-2.0
  1. #!/usr/bin/ksh
  2. #
  3. # dexplorer - DTrace system explorer, runs a collection of scripts.
  4. # Written using DTrace (Solaris 10 3/05).
  5. #
  6. # This program automatically runs a collection of DTrace scripts to examine
  7. # many areas of the system, and places the output in a meaningful directory
  8. # structure that is tar'd and gzip'd.
  9. #
  10. # $Id: dexplorer 3 2007-08-01 10:50:08Z brendan $
  11. #
  12. # USAGE: dexplorer [-yDT] [-d outputdir] [-i interval]
  13. #
  14. # -q # quiet mode
  15. # -y # "yes", don't prompt for confirmation
  16. # -D # don't delete output dir
  17. # -T # don't create output tar.gz
  18. # -d outputdir # output directory
  19. # -i interval # interval for each sample
  20. # eg,
  21. # dexplorer # default is 5 second samples
  22. # dexplorer -y -i30 # no prompting, with 30 second samples
  23. #
  24. # SEE ALSO: DTraceToolkit
  25. #
  26. # THANKS: David Visser, et all. for the idea and encouragement.
  27. #
  28. # COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
  29. #
  30. # CDDL HEADER START
  31. #
  32. # The contents of this file are subject to the terms of the
  33. # Common Development and Distribution License, Version 1.0 only
  34. # (the "License"). You may not use this file except in compliance
  35. # with the License.
  36. #
  37. # You can obtain a copy of the license at Docs/cddl1.txt
  38. # or http://www.opensolaris.org/os/licensing.
  39. # See the License for the specific language governing permissions
  40. # and limitations under the License.
  41. #
  42. # CDDL HEADER END
  43. #
  44. # CODE:
  45. #
  46. # This is currently a monolithic script, and while it contains only
  47. # a few dozen straigftforward DTrace scripts I think it's desirable to
  48. # keep it that way. The scripts themselves have designed to be very
  49. # generic (eg, switching on all sdt:::), and are aggregations to keep a
  50. # limit on the size of the output.
  51. #
  52. # Author: Brendan Gregg [Sydney, Australia]
  53. #
  54. # 23-Jun-2005 Brendan Gregg Created this.
  55. # 28-Jun-2005 " " Last update.
  56. #
  57. # Default variables
  58. #
  59. interval=5 # time of each sample
  60. verbose=1 # print screen output
  61. prompt=1 # prompt before run
  62. tar=1 # create tar file
  63. delete=1 # delete output dirs
  64. dtrace=/usr/sbin/dtrace # path to dtrace
  65. root=. # default output dir
  66. PATH=/usr/bin:/usr/sbin # safe path
  67. dir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME
  68. samples=20 # max number of tests
  69. current=0 # current sample
  70. #
  71. # Process options
  72. #
  73. while getopts d:hi:qyDT name
  74. do
  75. case $name in
  76. d) root=$OPTARG ;;
  77. i) interval=$OPTARG ;;
  78. q) verbose=0 ;;
  79. y) prompt=0 ;;
  80. D) delete=0 ;;
  81. T) tar=0 ;;
  82. h|?) cat <<-END >&2
  83. USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval]
  84. -q # quiet mode
  85. -y # "yes", don't prompt for confirmation
  86. -D # don't delete output dir
  87. -T # don't create output tar.gz
  88. -d outputdir # output directory
  89. -i interval # interval for each sample
  90. eg,
  91. dexplorer # default is 5 second samples
  92. dexplorer -y -i30 # no prompting, with 30 second samples
  93. END
  94. exit 1
  95. esac
  96. done
  97. shift $(( OPTIND - 1 ))
  98. #
  99. # Confirm path
  100. #
  101. if [[ "$prompt" == "1" ]] ; then
  102. if [[ "$root" == "." ]]; then
  103. print "Output dir will be the current dir ($PWD)."
  104. else
  105. print "Output dir will be $root"
  106. fi
  107. print -n "Hit enter for yes, or type path: "
  108. read ans junk
  109. if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then
  110. print "WARNING: I didn't ask for \"$ans\"!"
  111. print "\tI was asking for the path or just enter."
  112. print "\tignoring \"$ans\"..."
  113. fi
  114. if [[ "$ans" != "" ]]; then
  115. root=$ans
  116. print "Output is now $root."
  117. fi
  118. fi
  119. #
  120. # Sanity checks
  121. #
  122. if [[ "$interval" == *[a-zA-Z]* ]]; then
  123. print "ERROR2: Invalid interval $interval.\n"
  124. print "Please use a number of seconds."
  125. exit 2
  126. fi
  127. if (( ${#interval} < 1 )); then
  128. print "ERROR3: Length of interval $interval too short.\n"
  129. print "Minimum 1 second."
  130. exit 3
  131. fi
  132. if [[ ! -d "$root" ]]; then
  133. print "ERROR4: Output directory \"$root\" does not exist.\n"
  134. print "Perhaps try a mkdir first?"
  135. print "or use an existing dir, eg \"/tmp\""
  136. exit 4
  137. fi
  138. if [[ ! -w "$root" ]]; then
  139. print "ERROR5: Can't write to output directory \"$root\".\n"
  140. print "Are you logged in as root?"
  141. print "Perhaps try another directory, eg \"/tmp\""
  142. exit 5
  143. fi
  144. if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then
  145. print "ERROR6: Unable to run dtrace!\n"
  146. print "Perhaps this is a permission problem? Try running as root."
  147. exit 6
  148. fi
  149. # calculate total time
  150. (( total = interval * samples ))
  151. if (( total > 180 )); then
  152. (( total = total / 60 ))
  153. total="$total minutes"
  154. else
  155. total="$total seconds"
  156. fi
  157. #
  158. # Common Functions
  159. #
  160. function decho {
  161. if (( verbose )); then print "$*"; fi
  162. }
  163. clean="sed /^\$/d"
  164. header='dtrace:::BEGIN {
  165. printf("%Y, ", walltimestamp);
  166. printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename,
  167. `utsname.release, `utsname.version, `utsname.machine);
  168. printf("%d secs\n",'$interval');
  169. }
  170. profile:::tick-'$interval'sec { exit(0); }
  171. '
  172. function dstatus {
  173. if (( verbose )); then
  174. (( percent = current * 100 / samples ))
  175. printf "%3d%% $*\n" $percent
  176. (( current = current + 1 ))
  177. fi
  178. }
  179. ########################################
  180. # START #
  181. ########################################
  182. #
  183. # Make dirs
  184. #
  185. err=0
  186. cd $root
  187. (( err = err + $? ))
  188. mkdir $dir
  189. (( err = err + $? ))
  190. cd $dir
  191. (( err = err + $? ))
  192. base1=${PWD##*/}
  193. base2=${dir##*/}
  194. if [[ "$base1" != "$base2" || "$err" != "0" ]]; then
  195. print "ERROR7: tried to mkdir $dir from $root, but something failed.\n"
  196. print "Check directories before rerunning."
  197. exit 7
  198. fi
  199. mkdir Cpu
  200. mkdir Disk
  201. mkdir Mem
  202. mkdir Net
  203. mkdir Proc
  204. mkdir Info
  205. #
  206. # Create Log
  207. #
  208. decho "Starting dexplorer ver 0.76."
  209. decho "Sample interval is $interval seconds. Total run is > $total."
  210. ( print "dexplorer ver 0.76\n------------------"
  211. print -n "System: "
  212. uname -a
  213. print -n "Start: "
  214. date ) > log
  215. #
  216. # Capture Standard Info
  217. #
  218. args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,'
  219. args=$args'class,vsz,rss,time,pcpu,pmem,args'
  220. uname -a > Info/uname-a # System
  221. psrinfo -v > Info/psrinfo-v # CPU
  222. prtconf > Info/prtconf # Memory (+ devices)
  223. df -k > Info/df-k # Disk
  224. ifconfig -a > Info/ifconfig-a # Network
  225. ps -eo $args > Info/ps-o # Processes
  226. uptime > Info/uptime # Load
  227. #
  228. # Cpu Tests, DTrace
  229. #
  230. dstatus "Interrupts by CPU..."
  231. $dtrace -qn "$header"'
  232. sdt:::interrupt-start { @num[cpu] = count(); }
  233. dtrace:::END
  234. {
  235. printf("%-16s %16s\n", "CPU", "INTERRUPTS");
  236. printa("%-16d %@16d\n", @num);
  237. }
  238. ' | $clean > Cpu/interrupt_by_cpu
  239. dstatus "Interrupt times..."
  240. $dtrace -qn "$header"'
  241. sdt:::interrupt-start { self->ts = vtimestamp; }
  242. sdt:::interrupt-complete
  243. /self->ts && arg0 != 0/
  244. {
  245. this->devi = (struct dev_info *)arg0;
  246. self->name = this->devi != 0 ?
  247. stringof(`devnamesp[this->devi->devi_major].dn_name) : "?";
  248. this->inst = this->devi != 0 ? this->devi->devi_instance : 0;
  249. @num[self->name, this->inst] = sum(vtimestamp - self->ts);
  250. self->name = 0;
  251. }
  252. sdt:::interrupt-complete { self->ts = 0; }
  253. dtrace:::END
  254. {
  255. printf("%11s %16s\n", "DEVICE", "TIME (ns)");
  256. printa("%10s%-3d %@16d\n", @num);
  257. }
  258. ' | $clean > Cpu/interrupt_time
  259. dstatus "Dispatcher queue length by CPU..."
  260. $dtrace -qn "$header"'
  261. profile:::profile-1000
  262. {
  263. this->num = curthread->t_cpu->cpu_disp->disp_nrunnable;
  264. @length[cpu] = lquantize(this->num, 0, 100, 1);
  265. }
  266. dtrace:::END { printa(" CPU %d%@d\n", @length); }
  267. ' | $clean > Cpu/dispqlen_by_cpu
  268. dstatus "Sdt counts..."
  269. $dtrace -qn "$header"'
  270. sdt:::{ @num[probefunc, probename] = count(); }
  271. dtrace:::END
  272. {
  273. printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT");
  274. printa("%-32s %-32s %@10d\n", @num);
  275. }
  276. ' | $clean > Cpu/sdt_count
  277. #
  278. # Disk Tests, DTrace
  279. #
  280. dstatus "Pages paged in by process..."
  281. $dtrace -qn "$header"'
  282. vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); }
  283. dtrace:::END
  284. {
  285. printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES");
  286. printa("%6d %-16s %@16d\n", @pg);
  287. }
  288. ' | $clean > Disk/pgpgin_by_process
  289. dstatus "Files opened successfully count..."
  290. $dtrace -qn "$header"'
  291. syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; }
  292. syscall::open*:return /self->ok && arg0 != -1/
  293. {
  294. @num[self->file] = count();
  295. }
  296. syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; }
  297. dtrace:::END
  298. {
  299. printf("%-64s %8s\n", "FILE", "COUNT");
  300. printa("%-64s %@8d\n", @num);
  301. }
  302. ' | $clean > Disk/fileopen_count
  303. dstatus "Disk I/O size distribution by process..."
  304. $dtrace -qn "$header"'
  305. io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); }
  306. ' | $clean > Disk/sizedist_by_process
  307. #
  308. # Mem Tests, DTrace
  309. #
  310. dstatus "Minor faults by process..."
  311. $dtrace -qn "$header"'
  312. vminfo:::as_fault { @mem[pid, execname] = sum(arg0); }
  313. dtrace:::END
  314. {
  315. printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS");
  316. printa("%6d %-16s %@16d\n", @mem);
  317. }
  318. ' | $clean > Mem/minf_by_process
  319. dstatus "Vminfo data by process..."
  320. $dtrace -qn "$header"'
  321. vminfo::: { @data[pid, execname, probename] = sum(arg0); }
  322. dtrace:::END
  323. {
  324. printf("%6s %-16s %-16s %16s\n",
  325. "PID", "CMD", "STATISTIC", "VALUE");
  326. printa("%6d %-16s %-16s %@16d\n", @data);
  327. }
  328. ' | $clean > Mem/vminfo_by_process
  329. #
  330. # Net Tests, DTrace
  331. #
  332. dstatus "Mib data by mib statistic..."
  333. $dtrace -qn "$header"'
  334. mib::: { @data[probename] = sum(arg0); }
  335. dtrace:::END
  336. {
  337. printf("%-32s %16s\n", "STATISTIC", "VALUE");
  338. printa("%-32s %@16d\n", @data);
  339. }
  340. ' | $clean > Net/mib_data
  341. dstatus "TCP write bytes by process..."
  342. $dtrace -qn "$header"'
  343. fbt:ip:tcp_output:entry
  344. {
  345. this->size = msgdsize(args[1]);
  346. @size[pid, execname] = sum(this->size);
  347. }
  348. dtrace:::END
  349. {
  350. printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES");
  351. printa("%6d %-16s %@12d\n", @size);
  352. }
  353. ' | $clean > Net/tcpw_by_process
  354. #
  355. # Proc Tests, DTrace
  356. #
  357. dstatus "Sample process @ 1000 Hz..."
  358. $dtrace -qn "$header"'
  359. profile:::profile-1000
  360. {
  361. @num[pid, curpsinfo->pr_psargs] = count();
  362. }
  363. dtrace:::END
  364. {
  365. printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS");
  366. printa("%6d %@12d %S\n", @num);
  367. }
  368. ' | $clean > Proc/sample_process
  369. dstatus "Syscall count by process..."
  370. $dtrace -qn "$header"'
  371. syscall:::entry { @num[pid, execname, probefunc] = count(); }
  372. dtrace:::END
  373. {
  374. printf("%6s %-24s %-24s %8s\n",
  375. "PID", "CMD", "SYSCALL", "COUNT");
  376. printa("%6d %-24s %-24s %@8d\n", @num);
  377. }
  378. ' | $clean > Proc/syscall_by_process
  379. dstatus "Syscall count by syscall..."
  380. $dtrace -qn "$header"'
  381. syscall:::entry { @num[probefunc] = count(); }
  382. dtrace:::END
  383. {
  384. printf("%-32s %16s\n", "SYSCALL", "COUNT");
  385. printa("%-32s %@16d\n", @num);
  386. }
  387. ' | $clean > Proc/syscall_count
  388. dstatus "Read bytes by process..."
  389. $dtrace -qn "$header"'
  390. sysinfo:::readch { @bytes[pid, execname] = sum(arg0); }
  391. dtrace:::END
  392. {
  393. printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
  394. printa("%6d %-16s %@16d\n", @bytes);
  395. }
  396. ' | $clean > Proc/readb_by_process
  397. dstatus "Write bytes by process..."
  398. $dtrace -qn "$header"'
  399. sysinfo:::writech { @bytes[pid, execname] = sum(arg0); }
  400. dtrace:::END
  401. {
  402. printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
  403. printa("%6d %-16s %@16d\n", @bytes);
  404. }
  405. ' | $clean > Proc/writeb_by_process
  406. dstatus "Sysinfo counts by process..."
  407. $dtrace -qn "$header"'
  408. sysinfo::: { @num[pid, execname, probename] = sum(arg0); }
  409. dtrace:::END
  410. {
  411. printf("%6s %-16s %-16s %16s\n",
  412. "PID", "CMD", "STATISTIC", "COUNT");
  413. printa("%6d %-16s %-16s %@16d\n", @num);
  414. }
  415. ' | $clean > Proc/sysinfo_by_process
  416. dstatus "New process counts with arguments..."
  417. $dtrace -qn "$header"'
  418. proc:::exec-success
  419. {
  420. @num[pid, ppid, curpsinfo->pr_psargs] = count();
  421. }
  422. dtrace:::END
  423. {
  424. printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS");
  425. printa("%6d %6d %@8d %S\n", @num);
  426. }
  427. ' | $clean > Proc/newprocess_count
  428. dstatus "Signal counts..."
  429. $dtrace -qn "$header"'
  430. proc:::signal-send {
  431. @num[execname,args[2],stringof(args[1]->pr_fname)] = count();
  432. }
  433. dtrace:::END
  434. {
  435. printf("%-16s %-8s %-16s %8s\n",
  436. "FROM", "SIG", "TO", "COUNT");
  437. printa("%-16s %-8d %-16s %@8d\n", @num);
  438. }
  439. ' | $clean > Proc/signal_count
  440. dstatus "Syscall error counts..."
  441. $dtrace -qn "$header"'
  442. syscall:::return /(int)arg0 == -1/
  443. {
  444. @num[pid, execname, probefunc, errno] = count();
  445. }
  446. dtrace:::END
  447. {
  448. printf("%6s %-16s %-32s %-6s %8s\n",
  449. "PID", "CMD", "SYSCALL", "ERRNO", "COUNT");
  450. printa("%6d %-16s %-32s %-6d %@8d\n", @num);
  451. }
  452. ' | $clean > Proc/syscall_errors
  453. ###########
  454. # Done
  455. #
  456. ( print -n "End: "
  457. date ) >> log
  458. decho "100% Done."
  459. if (( tar )); then
  460. cd ..
  461. tar cf $dir.tar $dir
  462. gzip $dir.tar
  463. decho "File is $dir.tar.gz"
  464. fi
  465. if (( delete && tar )); then
  466. cd $dir
  467. # this could be all an "rm -r $dir", but since it will be run
  468. # as root on production servers - lets be analy cautious,
  469. rm Cpu/interrupt_by_cpu
  470. rm Cpu/interrupt_time
  471. rm Cpu/dispqlen_by_cpu
  472. rm Cpu/sdt_count
  473. rm Disk/pgpgin_by_process
  474. rm Disk/fileopen_count
  475. rm Disk/sizedist_by_process
  476. rm Mem/minf_by_process
  477. rm Mem/vminfo_by_process
  478. rm Net/mib_data
  479. rm Net/tcpw_by_process
  480. rm Proc/sample_process
  481. rm Proc/syscall_by_process
  482. rm Proc/syscall_count
  483. rm Proc/readb_by_process
  484. rm Proc/writeb_by_process
  485. rm Proc/sysinfo_by_process
  486. rm Proc/newprocess_count
  487. rm Proc/signal_count
  488. rm Proc/syscall_errors
  489. rmdir Cpu
  490. rmdir Disk
  491. rmdir Mem
  492. rmdir Net
  493. rmdir Proc
  494. rm Info/uname-a
  495. rm Info/psrinfo-v
  496. rm Info/prtconf
  497. rm Info/df-k
  498. rm Info/ifconfig-a
  499. rm Info/ps-o
  500. rm Info/uptime
  501. rmdir Info
  502. rm log
  503. cd ..
  504. rmdir $dir
  505. else
  506. decho "Directory is $dir"
  507. fi