PageRenderTime 70ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/mprime-phc-setup

https://bitbucket.org/stqn/shell-tools
#! | 441 lines | 366 code | 75 blank | 0 comment | 0 complexity | 61bed70cd3376c293204c3e391f38cf8 MD5 | raw file
  1. #!/bin/bash
  2. # Find lowest vids for PHC so that mprime doesn't find errors.
  3. # Shouldn't crash the computer, but might.
  4. #####################################
  5. # Parameters.
  6. # short_test_length should be between 15 and 60 s.
  7. # Use a longer length to avoid crashing during the test.
  8. short_test_length=20
  9. # long_test_length should be between 60 and 7200 s or more.
  10. # Bigger values are safer, but increase the test's length.
  11. long_test_length=320
  12. # safety_vid_delta should be between 1 and 4. Bigger values are safer.
  13. # Suggestions:
  14. # - use 4 if long_test_length < 60
  15. # - use 3 if long_test_length >= 60 and < 240
  16. # - use 2 if long_test_length >= 240
  17. # - use 1 only if long_test_length >= 3600
  18. safety_vid_delta=2
  19. debug=0
  20. amd_max_vid=124
  21. # Tolerance for inaccurate frequencies during the test (in percent)
  22. # Added because of https://bbs.archlinux.org/viewtopic.php?pid=1301126#p1301126
  23. # NOTE: actually it seems that the problem is different: some CPUs have a bogus 1st
  24. # frequency that is 1 MHz higher than the real top frequency (the 2nd one).
  25. wrong_freq_tolerance=0
  26. #####################################
  27. # Check that settings are sane
  28. if (( short_test_length < 15 )); then
  29. echo "Forcing short_test_length to 15 seconds."
  30. short_test_length=15
  31. fi
  32. if (( long_test_length < 30 )); then
  33. echo "Forcing long_test_length to 30 seconds."
  34. long_test_length=30
  35. fi
  36. if (( safety_vid_delta < 1 )); then
  37. echo "Forcing safety vid delta to 1."
  38. safety_vid_delta=1
  39. fi
  40. # Need root privileges to change the vids
  41. if [[ `whoami` != root ]]; then
  42. echo "Run me as root."
  43. exit 1
  44. fi
  45. # Check that mprime is available
  46. which mprime &>/dev/null
  47. if (( $? != 0 )); then
  48. echo "mprime is not in the path."
  49. if [[ ! -e ./mprime ]]; then
  50. echo "No mprime in the current directory either... Aborting."
  51. exit 1
  52. fi
  53. echo "Using mprime from the current directory."
  54. mp="./mprime -t"
  55. else
  56. mp="mprime -t"
  57. fi
  58. # Check that PHC is active
  59. cpuf=/sys/devices/system/cpu/cpu0/cpufreq
  60. if [[ ! -e $cpuf/phc_default_vids ]]; then
  61. echo "The PHC module doesn't seem to be loaded."
  62. exit 1
  63. fi
  64. # Check if AMD processor, then need to reverse the search direction
  65. if lsmod | grep phc_k8 >/dev/null ; then
  66. vid_delta=1
  67. vid_limit=$amd_max_vid
  68. else
  69. vid_delta=-1
  70. vid_limit=0
  71. fi
  72. # Warn user about end of the world
  73. echo ""
  74. echo "Warning: this might crash your computer or applications."
  75. echo "Please save all your work and don't do anything while the test is running."
  76. echo "You can stop the test at any time with CTRL-C."
  77. echo "Press RETURN to go on or CTRL-C to cancel."
  78. read
  79. function set_sys_val
  80. {
  81. #echo Writing $2 to $1
  82. for i in /sys/devices/system/cpu/cpu*/cpufreq/$1; do
  83. echo "$2" > $i
  84. done
  85. }
  86. function debug_info
  87. {
  88. for i in /sys/devices/system/cpu/cpu*/cpufreq/{scaling_governor,cpuinfo_cur_freq,scaling_cur_freq,phc_vids}; do
  89. echo -n $i | sed 's/\/sys\/devices\/system\/cpu\///'
  90. echo " = $(cat $i)"
  91. done
  92. }
  93. # Store stuff to be able to cleanup later
  94. backup_governor=$(cat $cpuf/scaling_governor)
  95. backup_phc_vids=$(cat $cpuf/phc_vids)
  96. #if [[ $backup_governor != userspace ]]; then
  97. echo "Switching to the userspace governor. $backup_governor will be restored later."
  98. modprobe cpufreq_userspace
  99. set_sys_val scaling_governor userspace
  100. #fi
  101. # File to save the state in order to continue after a crash
  102. crash_state="/var/tmp/$(basename $0).state"
  103. # Log file for mprime
  104. mp_log=/tmp/$(basename $0).mp
  105. # process ID of mprime
  106. mp_pid=-1
  107. function launch_mprime
  108. {
  109. $mp &>$mp_log &
  110. mp_pid=$!
  111. }
  112. function kill_mprime
  113. {
  114. (( mp_pid <= 1 )) && return
  115. kill -9 $mp_pid
  116. wait $mp_pid &>/dev/null # needed to suppress the "killed" message by bash
  117. mp_pid=-1
  118. }
  119. function cleanup
  120. {
  121. echo ""
  122. echo "Restoring state..."
  123. kill_mprime
  124. # Restore vids
  125. set_sys_val phc_vids "$backup_phc_vids"
  126. # Restore governor
  127. set_sys_val scaling_governor "$backup_governor"
  128. # Delete log
  129. [[ -e $mp_log ]] && rm $mp_log
  130. # Delete state file
  131. [[ -e "$crash_state" ]] && rm "$crash_state"
  132. }
  133. # Restore original state whenever the script exits
  134. trap cleanup EXIT
  135. # List all vids and frequencies
  136. freqs=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies)
  137. default_vids=$(cat /sys/devices/system/cpu/cpu0/cpufreq/phc_default_vids)
  138. nb_freqs=0
  139. for f in $freqs; do
  140. #echo $nb_freqs - $f
  141. freq[nb_freqs]=$f
  142. ((nb_freqs++))
  143. done
  144. nb_vids=0
  145. for v in $default_vids; do
  146. #echo $nb_vids - $v
  147. vid[nb_vids]=$v
  148. ((nb_vids++))
  149. done
  150. if [[ $nb_freqs != $nb_vids ]]; then
  151. echo "Error: number of vids and number of frequencies differ!"
  152. exit 1
  153. fi
  154. # Check that writing to scaling_max_freq works (I had this problem)
  155. #set_sys_val scaling_max_freq ${freq[1]}
  156. #if [[ $backup_scaling_max_freq == $(cat $cpuf/scaling_max_freq) ]]; then
  157. # echo "Error: cannot write to scaling_max_freq!"
  158. # echo "Try updating your kernel, rebooting and/or reinstalling PHC."
  159. # exit 1
  160. #fi
  161. # Estimate length of test
  162. estimate_min=$((short_test_length * (${vid[0]} - 2) + long_test_length))
  163. estimate_max=$((short_test_length * (${vid[0]} - 2) + nb_freqs * long_test_length * 3 / 2))
  164. function print_time
  165. {
  166. # input: $1 = number of seconds
  167. # output: xx h yy min
  168. local seconds=$1
  169. local days=$((seconds/3600/24))
  170. local seconds=$((seconds-days*3600*24))
  171. local hours=$((seconds/3600))
  172. seconds=$((seconds-hours*3600))
  173. local minutes=$((seconds/60))
  174. local r
  175. ((days>0)) && r="$days d "
  176. ((hours>0 || days>0)) && r="$r$hours h "
  177. ((days==0)) && r="$r$minutes min"
  178. echo -n $r
  179. }
  180. echo -n "Estimated time to completion: between "
  181. print_time estimate_min
  182. echo -n " and "
  183. print_time estimate_max
  184. echo ""
  185. # For each available frequency, try to lower the vid as much as possible
  186. # 1st pass: Lower the vid, test mprime for a small amount of time at each step.
  187. # If an error is detected, increment cur_vid and continue with pass 2.
  188. # If vid 0 is reached, continue with pass 2.
  189. # 2nd pass: Test cur_vid for a long time.
  190. # If there is an error, increment cur_vid and loop.
  191. # Stop when there is no error or cur_vid >= max_vid-delta.
  192. # Final step: best_vid=cur_vid+delta
  193. # set_vid index vid
  194. function set_vid
  195. {
  196. # Generate phc_vids string
  197. local v=""
  198. local i
  199. for (( i=0; i<nb_freqs; i++ )); do
  200. if (( $i == $1 )); then
  201. v="$v$2"
  202. else
  203. v="$v${vid[i]}"
  204. fi
  205. (( i < nb_freqs-1 )) && v="$v "
  206. done
  207. set_sys_val phc_vids "$v"
  208. }
  209. # Save progress in order to be able to continue after a crash
  210. function save_state
  211. {
  212. # Restore default vids before writing to disk to avoid a possible crash
  213. set_sys_val phc_vids "$default_vids"
  214. # state file contains:
  215. # - current frequency between 0 and nb_freqs - 1
  216. # - current VID
  217. # - current list of best VIDS found
  218. echo $f >"$crash_state"
  219. echo $cur_vid >>"$crash_state"
  220. echo "$final_vids" >>"$crash_state"
  221. sync
  222. sleep 1
  223. }
  224. function read_state
  225. {
  226. f=$(head -1 "$crash_state")
  227. cur_vid=$(sed -n 2p "$crash_state")
  228. final_vids="$(tail -1 "$crash_state")"
  229. #echo f=$f
  230. #echo cur_vid=$cur_vid
  231. #echo final_vids="-"$final_vids"-"
  232. }
  233. # Check if state file exists, which probably means the computer crashed
  234. if [[ -e "$crash_state" ]]; then
  235. read_state
  236. echo
  237. echo "State file $crash_state detected."
  238. echo "Frequency: $f"
  239. echo "Current VID: $cur_vid"
  240. echo "Current best vids: $final_vids"
  241. echo "Press return to continue from this point after a crash, or CTRL-C to delete the state file."
  242. read
  243. # There was a crash at $cur_vid, so increase it and go directly to pass 2
  244. cur_vid=$((cur_vid-2*vid_delta))
  245. pass2=1
  246. else
  247. cur_vid=${vid[0]}
  248. f=0
  249. pass2=0
  250. fi
  251. for (( ; f<nb_freqs; f++ )); do
  252. echo ""
  253. echo "Testing frequency $f (${freq[f]})..."
  254. echo "Default vid: ${vid[f]}"
  255. # Based on comments found on this page:
  256. # http://openmindedbrain.info/09/05/2010/undervolting-in-ubuntu-10-04-lucid-lts/
  257. # it appears that some processors have a bogus 1st frequency that will always go
  258. # down to VID 0 and is in fact unused.
  259. if (( f == 0 && ${freq[0]} == ${freq[1]} + 1000 )); then
  260. echo "Looks like the first frequency is bogus; ignoring it."
  261. final_vids="${vid[0]} "
  262. continue
  263. fi
  264. # Pass 1: lowering vid quickly until there is an error
  265. if (( cur_vid*vid_delta < ${vid[f]}*vid_delta && pass2 == 0 )); then
  266. #echo "forcing cur vid to max"
  267. cur_vid=${vid[f]}
  268. fi
  269. cur_vid=$((cur_vid+vid_delta))
  270. for (( ; cur_vid*vid_delta < vid_limit*vid_delta && pass2 == 0; cur_vid+=vid_delta )); do
  271. count=$short_test_length
  272. echo "Trying vid $cur_vid for $count seconds"
  273. save_state
  274. set_vid $f $cur_vid
  275. # It looks like changing the VID also resets the frequency, so set frequency here
  276. set_sys_val scaling_setspeed ${freq[f]}
  277. launch_mprime
  278. if ((debug)); then
  279. sleep 1
  280. debug_info
  281. fi
  282. for (( ; count>0; count-- )); do
  283. sleep 1
  284. echo -n "."
  285. grep FATAL $mp_log &>/dev/null
  286. if (( $? == 0 )); then
  287. kill_mprime
  288. echo ""
  289. echo "Hardware failure detected."
  290. ((cur_vid-=vid_delta))
  291. break 2
  292. fi
  293. cur_freq=$(cat $cpuf/scaling_cur_freq)
  294. if (( (cur_freq < ${freq[f]} * (100 - wrong_freq_tolerance) / 100)
  295. || (cur_freq > ${freq[f]} * (100 + wrong_freq_tolerance) / 100) )); then
  296. echo ""
  297. echo "ERROR: Wrong frequency! (${cur_freq} instead of ${freq[f]})"
  298. debug_info
  299. exit 1
  300. fi
  301. done
  302. echo ""
  303. kill_mprime
  304. done
  305. # Pass 2: stress testing for a longer time and going up in case of an error.
  306. for (( ; cur_vid*vid_delta <= vid_limit && cur_vid*vid_delta > (${vid[f]}+safety_vid_delta*vid_delta)*vid_delta; cur_vid-=vid_delta )); do
  307. count=$long_test_length
  308. echo "Trying vid $cur_vid for $count seconds"
  309. save_state
  310. set_vid $f $cur_vid
  311. set_sys_val scaling_setspeed ${freq[f]}
  312. launch_mprime
  313. if ((debug)); then
  314. debug_info
  315. fi
  316. for (( ; count>0; count-- )); do
  317. sleep 1
  318. echo -n "."
  319. grep FATAL $mp_log &>/dev/null
  320. if (( $? == 0 )); then
  321. echo ""
  322. echo "Hardware failure detected."
  323. break
  324. fi
  325. done
  326. kill_mprime
  327. if (( count == 0 )); then
  328. break
  329. fi
  330. done
  331. echo ""
  332. echo "Found working vid. Adding $safety_vid_delta for safety."
  333. (( cur_vid*vid_delta > vid_limit*vid_delta )) && cur_vid=$vid_limit
  334. if (( (cur_vid - safety_vid_delta*vid_delta)*vid_delta < ${vid[f]}*vid_delta )); then
  335. final_vids=${final_vids}${vid[f]}
  336. else
  337. final_vids=${final_vids}$((cur_vid-safety_vid_delta*vid_delta))
  338. fi
  339. (( f < nb_freqs-1 )) && final_vids="$final_vids "
  340. echo "Current results: $final_vids"
  341. pass2=0
  342. done
  343. echo ""
  344. echo "All done."
  345. echo "Default vids: $default_vids"
  346. echo "Final vids: $final_vids"
  347. echo ""
  348. if [[ -e /etc/default/phc-intel ]]; then
  349. # Newest version of the AUR phc-intel package
  350. echo "Edit /etc/default/phc-intel to add your final vids."
  351. elif [[ -e /etc/phc-intel.conf ]]; then
  352. # Old version of the AUR phc-intel package
  353. echo "Edit /etc/phc-intel.conf to add your final vids."
  354. elif [[ -e /etc/conf.d/phc-intel ]]; then
  355. # Old version of the AUR phc-intel package, or current dkms-phc-intel package.
  356. echo "Edit /etc/conf.d/phc-intel to add your final vids."
  357. elif [[ -e /etc/default/phc-k8 ]]; then
  358. # Newest version of the AUR phc-k8 package
  359. echo "Edit /etc/default/phc-k8 to add your final vids."
  360. else
  361. echo "If your system has a working rc.local (probably not the case with systemd), you can add the following 3 lines to /etc/rc.local, before the final \"exit 0\":"
  362. echo ""
  363. echo "for i in /sys/devices/system/cpu/cpu*/cpufreq/phc_vids; do"
  364. echo " echo \"$final_vids\" > \$i"
  365. echo "done"
  366. fi