/ec2/hadoop-aws/etc/hadoop-ec2-init-remote.sh

https://github.com/andy327/bixo · Shell · 529 lines · 373 code · 78 blank · 78 comment · 22 complexity · 3196ca91d17850527ff03afdb05a1112 MD5 · raw file

  1. #!/usr/bin/env bash
  2. ################################################################################
  3. # Script that is run on each EC2 instance on boot. It is passed in the EC2 user
  4. # data, so should not exceed 16K in size.
  5. ################################################################################
  6. ################################################################################
  7. # Initialize variables
  8. ################################################################################
  9. # Slaves are started after the master, and are told its address by sending a
  10. # modified copy of this file which sets the MASTER_HOST variable.
  11. # A node knows if it is the master or not by inspecting the security group
  12. # name. If it is the master then it retrieves its address using instance data.
  13. MASTER_HOST=%MASTER_HOST% # Interpolated before being sent to EC2 node
  14. SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
  15. IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
  16. if [ "$IS_MASTER" == "true" ]; then
  17. MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
  18. fi
  19. # These values get filled in by launch-hadoop-master and launch-hadoop-slaves
  20. # locally to create the version of this file that is sent to the remote instances.
  21. # When the script with the filled-in values is run on the remote instance,
  22. # these variables (and those that depend on them) customize the startup of the
  23. # Hadoop system and related processes.
  24. AWS_ACCESS_KEY_ID=%AWS_ACCESS_KEY_ID%
  25. AWS_SECRET_ACCESS_KEY=%AWS_SECRET_ACCESS_KEY%
  26. INSTANCE_TYPE=%INSTANCE_TYPE%
  27. EXTRA_CORE_SITE_PROPERTIES="%EXTRA_CORE_SITE_PROPERTIES%"
  28. EXTRA_HDFS_SITE_PROPERTIES="%EXTRA_HDFS_SITE_PROPERTIES%"
  29. EXTRA_MAPRED_SITE_PROPERTIES="%EXTRA_MAPRED_SITE_PROPERTIES%"
  30. USER="%USER%"
  31. EXTRA_INIT_CMDS="%EXTRA_INIT_CMDS%"
  32. # not set on boot
  33. export USER
  34. HADOOP_HOME=`ls -d /usr/local/hadoop-*`
  35. # For m1.small slaves, we only get one virtual core (1 EC2 Compute Unit).
  36. #
  37. # For m1.large slaves, we get two (2 EC2 Compute Units each) plus 7.5GB of RAM,
  38. # and we also get a second drive which can share the HDFS load.
  39. # Most of the time you're either mapping or reducing, so it's ideal to use
  40. # two map slots and two reduce slots on each slave.
  41. #
  42. # WARNING: Sometimes there is an overlap between map and reduce tasks on a slave,
  43. # and four hot JVMs can work m1.large instances pretty hard.
  44. # You must also be careful with your child JVM heap sizes. With 1GB each for the
  45. # tasktracker and datanode, you've only got about 1.3GB left for each child JVM,
  46. # and you'll want an AMI with swap space to protect you from exec JVMs, etc.
  47. #
  48. # For m2.2xlarge slaves, we get 4 (with 3.25 EC2 Compute Units each),
  49. # 34.2GB of RAM, plus the second drive, so we should be able to run 4 maps
  50. # and 2 reducers simultaneously.
  51. # For m2.4xlarge slaves, we get 8 (with 3.25 EC2 Compute Units each),
  52. # 68.4GB of RAM, plus the second drive, so we should easily be able to run 8 maps
  53. # and 4 reducers simultaneously.
  54. if [ "$INSTANCE_TYPE" == "m1.small" ]; then
  55. JOBTRACKER_HEAPSIZE=1000
  56. MAPRED_CHILD_JAVA_OPTS="-server -Xmx512m"
  57. NUM_SLAVE_CORES=1
  58. MAP_TASKS_PER_SLAVE=1
  59. REDUCE_TASKS_PER_SLAVE=1
  60. HDFS_DATA_DIR="/mnt/hadoop/dfs/data"
  61. MAPRED_LOCAL_DIR="/mnt/hadoop/mapred/local"
  62. IO_SORT_MB=100
  63. IO_SORT_FACTOR=10
  64. elif [ "$INSTANCE_TYPE" == "m2.2xlarge" ]; then
  65. JOBTRACKER_HEAPSIZE=4000
  66. MAPRED_CHILD_JAVA_OPTS="-server -Xmx4g"
  67. NUM_SLAVE_CORES=4
  68. MAP_TASKS_PER_SLAVE=4
  69. REDUCE_TASKS_PER_SLAVE=2
  70. HDFS_DATA_DIR="/mnt/hadoop/dfs/data,/mnt2/hadoop/dfs/data"
  71. MAPRED_LOCAL_DIR="/mnt/hadoop/mapred/local"
  72. mkdir -p /mnt2/hadoop
  73. IO_SORT_MB=1000
  74. IO_SORT_FACTOR=100
  75. elif [ "$INSTANCE_TYPE" == "m2.4xlarge" ]; then
  76. JOBTRACKER_HEAPSIZE=4000
  77. MAPRED_CHILD_JAVA_OPTS="-server -Xmx4g"
  78. NUM_SLAVE_CORES=8
  79. MAP_TASKS_PER_SLAVE=8
  80. REDUCE_TASKS_PER_SLAVE=4
  81. HDFS_DATA_DIR="/mnt/hadoop/dfs/data,/mnt2/hadoop/dfs/data"
  82. MAPRED_LOCAL_DIR="/mnt/hadoop/mapred/local"
  83. mkdir -p /mnt2/hadoop
  84. IO_SORT_MB=1000
  85. IO_SORT_FACTOR=100
  86. else # m1.large, etc.
  87. JOBTRACKER_HEAPSIZE=4000
  88. MAPRED_CHILD_JAVA_OPTS="-server -Xmx1312m"
  89. NUM_SLAVE_CORES=2
  90. MAP_TASKS_PER_SLAVE=2
  91. REDUCE_TASKS_PER_SLAVE=2
  92. HDFS_DATA_DIR="/mnt/hadoop/dfs/data,/mnt2/hadoop/dfs/data"
  93. MAPRED_LOCAL_DIR="/mnt/hadoop/mapred/local,/mnt2/hadoop/mapred/local"
  94. mkdir -p /mnt2/hadoop
  95. IO_SORT_MB=1000
  96. IO_SORT_FACTOR=100
  97. fi
  98. ################################################################################
  99. # Hadoop configuration
  100. # Modify this section to customize your Hadoop cluster.
  101. ################################################################################
  102. cat > $HADOOP_HOME/conf/core-site.xml <<EOF
  103. <?xml version="1.0"?>
  104. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  105. <!-- Put site-specific property overrides in this file. -->
  106. <configuration>
  107. <!--- global properties -->
  108. <property>
  109. <name>hadoop.tmp.dir</name>
  110. <value>/mnt/hadoop</value>
  111. <description>A base for other temporary directories.
  112. AWS EC2 instances have a large /mnt drive for log files, etc.
  113. </description>
  114. </property>
  115. <!-- i/o properties -->
  116. <property>
  117. <name>io.file.buffer.size</name>
  118. <value>65536</value>
  119. <description>The size of buffer for use in sequence files.
  120. The size of this buffer should probably be a multiple of hardware
  121. page size (4096 on Intel x86), and it determines how much data is
  122. buffered during read and write operations.
  123. InfoChimps, Cloudera and Datameer all suggest increasing this to 64K.
  124. </description>
  125. </property>
  126. <property>
  127. <name>io.sort.mb</name>
  128. <value>$IO_SORT_MB</value>
  129. </property>
  130. <property>
  131. <name> io.sort.factor</name>
  132. <value>$IO_SORT_FACTOR</value>
  133. </property>
  134. <!-- file system properties -->
  135. <property>
  136. <name>fs.default.name</name>
  137. <value>hdfs://$MASTER_HOST:50001</value>
  138. <description>The name of the default file system. A URI whose
  139. scheme and authority determine the FileSystem implementation. The
  140. uri's scheme determines the config property (fs.SCHEME.impl) naming
  141. the FileSystem implementation class. The uri's authority is used to
  142. determine the host, port, etc. for a filesystem.
  143. While running in EC2, we want to default to HDFS.
  144. </description>
  145. </property>
  146. <!-- AWS EC2-specific properties -->
  147. <property>
  148. <name>fs.s3n.awsAccessKeyId</name>
  149. <value>$AWS_ACCESS_KEY_ID</value>
  150. </property>
  151. <property>
  152. <name>fs.s3n.awsSecretAccessKey</name>
  153. <value>$AWS_SECRET_ACCESS_KEY</value>
  154. </property>
  155. <property>
  156. <name>fs.sqs.awsAccessKeyId</name>
  157. <value>$AWS_ACCESS_KEY_ID</value>
  158. </property>
  159. <property>
  160. <name>fs.sqs.awsSecretAccessKey</name>
  161. <value>$AWS_SECRET_ACCESS_KEY</value>
  162. </property>
  163. $EXTRA_CORE_SITE_PROPERTIES
  164. </configuration>
  165. EOF
  166. cat > $HADOOP_HOME/conf/hdfs-site.xml <<EOF
  167. <?xml version="1.0"?>
  168. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  169. <!-- Put site-specific property overrides in this file. -->
  170. <configuration>
  171. <property>
  172. <name>dfs.permissions</name>
  173. <value>false</value>
  174. <description>
  175. If "true", enable permission checking in HDFS.
  176. If "false", permission checking is turned off,
  177. but all other behavior is unchanged.
  178. Switching from one parameter value to the other does not change the mode,
  179. owner or group of files or directories.
  180. TODO CSc Figure out why AWS wants this set to false.
  181. </description>
  182. </property>
  183. <property>
  184. <name>dfs.data.dir</name>
  185. <value>$HDFS_DATA_DIR</value>
  186. <description>Determines where on the local filesystem an DFS data node
  187. should store its blocks. If this is a comma-delimited
  188. list of directories, then data will be stored in all named
  189. directories, typically on different devices.
  190. Directories that do not exist are ignored.
  191. The m1.large instances have two drives, so we set this up above dependent on $INSTANCE_TYPE.
  192. </description>
  193. </property>
  194. $EXTRA_HDFS_SITE_PROPERTIES
  195. </configuration>
  196. EOF
  197. # TODO CSc Should we install and use an LZO compression codec?
  198. #
  199. cat > $HADOOP_HOME/conf/mapred-site.xml <<EOF
  200. <?xml version="1.0"?>
  201. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  202. <!-- Put site-specific property overrides in this file. -->
  203. <configuration>
  204. <!-- i/o properties -->
  205. <property>
  206. <name>mapred.job.tracker</name>
  207. <value>hdfs://$MASTER_HOST:50002</value>
  208. <description>The host and port that the MapReduce job tracker runs
  209. at. If "local", then jobs are run in-process as a single map
  210. and reduce task.
  211. Here we specify the AWS EC2 job tracker host and port.
  212. </description>
  213. </property>
  214. <property>
  215. <name>mapred.local.dir</name>
  216. <value>$MAPRED_LOCAL_DIR</value>
  217. <description>The local directory where MapReduce stores intermediate
  218. data files. May be a comma-separated list of
  219. directories on different devices in order to spread disk i/o.
  220. Directories that do not exist are ignored.
  221. The m1.large instances have two drives, so we set this up above dependent on $INSTANCE_TYPE.
  222. </description>
  223. </property>
  224. <property>
  225. <name>mapred.task.timeout</name>
  226. <value>1200000</value>
  227. <description>The number of milliseconds before a task will be
  228. terminated if it neither reads an input, writes an output, nor
  229. updates its status string.
  230. Bump to 20 minutes (was 10m, or 600K ms)
  231. </description>
  232. </property>
  233. <property>
  234. <name>mapred.tasktracker.map.tasks.maximum</name>
  235. <value>$MAP_TASKS_PER_SLAVE</value>
  236. <description>The maximum number of map tasks that will be run
  237. simultaneously by a task tracker.
  238. For m1.small slaves, we only get one virtual core (1 EC2 Compute Unit),
  239. so we shouldn't be running two map tasks on the same slave simultaneously.
  240. </description>
  241. </property>
  242. <property>
  243. <name>mapred.tasktracker.reduce.tasks.maximum</name>
  244. <value>$REDUCE_TASKS_PER_SLAVE</value>
  245. <description>The maximum number of reduce tasks that will be run
  246. simultaneously by a task tracker.
  247. For m1.small slaves, we only get one virtual core (1 EC2 Compute Unit),
  248. so we shouldn't be running two reduce tasks on the same slave simultaneously.
  249. </description>
  250. </property>
  251. <property>
  252. <name>tasktracker.http.threads</name>
  253. <value>80</value>
  254. <description>The number of worker threads that for the http server. This is
  255. used for map output fetching
  256. Double number of threads per AWS recommendation.
  257. </description>
  258. </property>
  259. <property>
  260. <name>mapred.output.compress</name>
  261. <value>true</value>
  262. <description>Should the job outputs be compressed?
  263. Compress them by default, though jobs can override this.
  264. </description>
  265. </property>
  266. <property>
  267. <name>mapred.output.compression.type</name>
  268. <value>BLOCK</value>
  269. <description>If the job outputs are to compressed as SequenceFiles, how should
  270. they be compressed? Should be one of NONE, RECORD or BLOCK.
  271. Compressing on a BLOCK basis seems better for both text and sequence files.
  272. </description>
  273. </property>
  274. <property>
  275. <name>mapred.compress.map.output</name>
  276. <value>true</value>
  277. <description>Should the outputs of the maps be compressed before being
  278. sent across the network. Uses SequenceFile compression.
  279. The maps often generate huge files outside DFS, so it's better to compress them.
  280. </description>
  281. </property>
  282. <property>
  283. <name>mapred.child.java.opts</name>
  284. <value>$MAPRED_CHILD_JAVA_OPTS</value>
  285. <description>Java opts for the task tracker child processes.</description>
  286. </property>
  287. $EXTRA_MAPRED_SITE_PROPERTIES
  288. </configuration>
  289. EOF
  290. # Configure Hadoop for Ganglia
  291. # overwrite hadoop-metrics.properties
  292. cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
  293. # Ganglia
  294. # we push to the master gmond so hostnames show up properly
  295. dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
  296. dfs.period=10
  297. dfs.servers=$MASTER_HOST:8649
  298. mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
  299. mapred.period=10
  300. mapred.servers=$MASTER_HOST:8649
  301. jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
  302. jvm.period=10
  303. jvm.servers=$MASTER_HOST:8649
  304. EOF
  305. ################################################################################
  306. # s3cmd configuration
  307. ################################################################################
  308. cat > ~$USER/.s3cfg <<EOF
  309. [default]
  310. access_key = $AWS_ACCESS_KEY_ID
  311. acl_public = False
  312. bucket_location = US
  313. debug_syncmatch = False
  314. default_mime_type = binary/octet-stream
  315. delete_removed = False
  316. dry_run = False
  317. encrypt = False
  318. force = False
  319. gpg_command = /usr/bin/gpg
  320. gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
  321. gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
  322. gpg_passphrase =
  323. guess_mime_type = False
  324. host_base = s3.amazonaws.com
  325. host_bucket = %(bucket)s.s3.amazonaws.com
  326. human_readable_sizes = False
  327. preserve_attrs = True
  328. proxy_host =
  329. proxy_port = 0
  330. recv_chunk = 4096
  331. secret_key = $AWS_SECRET_ACCESS_KEY
  332. send_chunk = 4096
  333. simpledb_host = sdb.amazonaws.com
  334. use_https = False
  335. verbosity = WARNING
  336. EOF
  337. ################################################################################
  338. # Start services
  339. ################################################################################
  340. [ ! -f /etc/hosts ] && echo "127.0.0.1 localhost" > /etc/hosts
  341. mkdir -p /mnt/hadoop/logs
  342. # Increase the default ulimit -n for the root user.
  343. #
  344. # NOTE: This limits.conf content will only take effect for subsequent shells
  345. # (after Hadoop gets running), and therefore would only affect things spawned by the user
  346. # (e.g., 'hadoop fs' operations on the command line) or when the user restarts
  347. # the jobtracker/namenode/tasktracker/datanode by hand.
  348. # I'm not sure about shell commands spawned by Hadoop, but I would imagine
  349. # the limit in effect when the Hadoop process was launched would apply to them..
  350. #
  351. # TODO CSc This could be removed (at least the nofile entries) if our AMI already
  352. # had this in its limits.conf file.
  353. if [ "$IS_MASTER" == "true" ]; then
  354. cat > /etc/security/limits.conf <<EOF
  355. root soft nofile 32768
  356. root hard nofile 65536
  357. EOF
  358. else
  359. # We also want to constrain the kernel stack size used for slaves, since we
  360. # fire up a bunch of threads. We won't need this once Hadoop supports specifying
  361. # stack size as part of the ulimit settings in the job conf. But for now this
  362. # needs to be set up before Hadoop starts running.
  363. cat > /etc/security/limits.conf <<EOF
  364. root soft nofile 32768
  365. root hard nofile 65536
  366. root soft stack 256
  367. root hard stack 256
  368. EOF
  369. fi
  370. if [ "$IS_MASTER" == "true" ]; then
  371. # MASTER
  372. # Prep Ganglia
  373. sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
  374. -e "s|\( *bind *=.*\)|#\1|" \
  375. -e "s|\( *mute *=.*\)| mute = yes|" \
  376. -e "s|\( *location *=.*\)| location = \"master-node\"|" \
  377. /etc/gmond.conf
  378. mkdir -p /mnt/ganglia/rrds
  379. chown -R ganglia:ganglia /mnt/ganglia/rrds
  380. rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd
  381. service gmond start
  382. service gmetad start
  383. apachectl start
  384. # Hadoop master processes
  385. # Set the file descriptor limit for the namenode and jobtracker processes.
  386. # The system-wide file descriptor limit (761408) should be plenty for several processes with
  387. # this many files open.
  388. ulimit -n 32768
  389. # only format on first boot
  390. [ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format
  391. "$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode
  392. # Temporarily set $HADOOP_HEAPSIZE to the value calculated above while we launch
  393. # the JobTracker.
  394. #
  395. SAVE_HADOOP_HEAPSIZE=$HADOOP_HEAPSIZE
  396. export HADOOP_HEAPSIZE=$JOBTRACKER_HEAPSIZE
  397. "$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker
  398. export HADOOP_HEAPSIZE=$SAVE_HADOOP_HEAPSIZE
  399. else
  400. # SLAVE
  401. # Prep Ganglia
  402. sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
  403. -e "s|\( *bind *=.*\)|#\1|" \
  404. -e "s|\(udp_send_channel {\)|\1\n host=$MASTER_HOST|" \
  405. /etc/gmond.conf
  406. service gmond start
  407. # Hadoop slave processes
  408. # Set the file descriptor limit for the datanode and tasktracker processes.
  409. # The system-wide file descriptor limit (761408) should be plenty for several processes with
  410. # this many files open.
  411. ulimit -n 32768
  412. "$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode
  413. # Reduce the stack size for the tasktracker, since crawling jobs can launch a large number
  414. # of threads. Note that this ends up getting applied to the NSCD launch immediately below
  415. # as well, though it's hard to imagine NSCD needing a ton of stack space.
  416. ulimit -s 256
  417. "$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker
  418. fi
  419. # Values for nscd are:
  420. # positive-time-to-live hosts 86400 (1 day)
  421. # negative-time-to-live hosts 3600 (1 hour)
  422. # suggested-size hosts 331 (prime # a bit bigger than 211)
  423. # max-db-size hosts 134217728 (128MB)
  424. if [ "$IS_MASTER" == "false" ]; then
  425. # NSCD
  426. sed -r -i -e "s|(^\t*positive-time-to-live\t*hosts\t*)[0-9]+.*$|\186400|" \
  427. -e "s|(^\t*negative-time-to-live\t*hosts\t*)[0-9]+.*$|\13600|" \
  428. -e "s|(^\t*suggested-size\t*hosts\t*)[0-9]+.*$|\1331|" \
  429. -e "s|(^\t*max-db-size\t*hosts\t*)[0-9]+.*$|\1134217728|" \
  430. /etc/nscd.conf
  431. service nscd start
  432. fi
  433. eval $EXTRA_INIT_CMDS
  434. # Run this script on next boot
  435. #
  436. rm -f /var/ec2/ec2-run-user-data.*