/boto-2.5.2/tests/unit/emr/test_emr_responses.py
Python | 373 lines | 345 code | 6 blank | 22 comment | 0 complexity | be83a998fbfcdefa6fc2fd55dccd4172 MD5 | raw file
- # Copyright (c) 2010 Jeremy Thurgood <firxen+boto@gmail.com>
- #
- # Permission is hereby granted, free of charge, to any person obtaining a
- # copy of this software and associated documentation files (the
- # "Software"), to deal in the Software without restriction, including
- # without limitation the rights to use, copy, modify, merge, publish, dis-
- # tribute, sublicense, and/or sell copies of the Software, and to permit
- # persons to whom the Software is furnished to do so, subject to the fol-
- # lowing conditions:
- #
- # The above copyright notice and this permission notice shall be included
- # in all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
- # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
- # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- # IN THE SOFTWARE.
- # NOTE: These tests only cover the very simple cases I needed to test
- # for the InstanceGroup fix.
- import xml.sax
- import unittest
- from boto import handler
- from boto.emr import emrobject
- from boto.resultset import ResultSet
- JOB_FLOW_EXAMPLE = """
- <DescribeJobFlowsResponse
- xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-01-15">
- <DescribeJobFlowsResult>
- <JobFlows>
- <member>
- <ExecutionStatusDetail>
- <CreationDateTime>2009-01-28T21:49:16Z</CreationDateTime>
- <StartDateTime>2009-01-28T21:49:16Z</StartDateTime>
- <State>STARTING</State>
- </ExecutionStatusDetail>
- <Name>MyJobFlowName</Name>
- <LogUri>mybucket/subdir/</LogUri>
- <Steps>
- <member>
- <ExecutionStatusDetail>
- <CreationDateTime>2009-01-28T21:49:16Z</CreationDateTime>
- <State>PENDING</State>
- </ExecutionStatusDetail>
- <StepConfig>
- <HadoopJarStep>
- <Jar>MyJarFile</Jar>
- <MainClass>MyMailClass</MainClass>
- <Args>
- <member>arg1</member>
- <member>arg2</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>MyStepName</Name>
- <ActionOnFailure>CONTINUE</ActionOnFailure>
- </StepConfig>
- </member>
- </Steps>
- <JobFlowId>j-3UN6WX5RRO2AG</JobFlowId>
- <Instances>
- <Placement>
- <AvailabilityZone>us-east-1a</AvailabilityZone>
- </Placement>
- <SlaveInstanceType>m1.small</SlaveInstanceType>
- <MasterInstanceType>m1.small</MasterInstanceType>
- <Ec2KeyName>myec2keyname</Ec2KeyName>
- <InstanceCount>4</InstanceCount>
- <KeepJobFlowAliveWhenNoSteps>true</KeepJobFlowAliveWhenNoSteps>
- </Instances>
- </member>
- </JobFlows>
- </DescribeJobFlowsResult>
- <ResponseMetadata>
- <RequestId>9cea3229-ed85-11dd-9877-6fad448a8419</RequestId>
- </ResponseMetadata>
- </DescribeJobFlowsResponse>
- """
- JOB_FLOW_COMPLETED = """
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
- <DescribeJobFlowsResult>
- <JobFlows>
- <member>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <LastStateChangeReason>Steps completed</LastStateChangeReason>
- <StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
- <ReadyDateTime>2010-10-21T01:03:59Z</ReadyDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
- </ExecutionStatusDetail>
- <BootstrapActions/>
- <Name>RealJobFlowName</Name>
- <LogUri>s3n://example.emrtest.scripts/jobflow_logs/</LogUri>
- <Steps>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>s3n://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar</Jar>
- <Args>
- <member>s3n://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>Setup Hadoop Debugging</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:04:22Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
- <Args>
- <member>-mapper</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-InitialMapper.py</member>
- <member>-reducer</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-InitialReducer.py</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/20/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/19/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/18/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/17/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/16/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/15/*</member>
- <member>-input</member>
- <member>s3://example.emrtest.data/raw/2010/10/14/*</member>
- <member>-output</member>
- <member>s3://example.emrtest.crunched/</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>testjob_Initial</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:04:22Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:36:18Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
- <Args>
- <member>-mapper</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step1Mapper.py</member>
- <member>-reducer</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step1Reducer.py</member>
- <member>-input</member>
- <member>s3://example.emrtest.crunched/*</member>
- <member>-output</member>
- <member>s3://example.emrtest.step1/</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>testjob_step1</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:36:18Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:37:51Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
- <Args>
- <member>-mapper</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step2Mapper.py</member>
- <member>-reducer</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step2Reducer.py</member>
- <member>-input</member>
- <member>s3://example.emrtest.crunched/*</member>
- <member>-output</member>
- <member>s3://example.emrtest.step2/</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>testjob_step2</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:37:51Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:39:32Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
- <Args>
- <member>-mapper</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step3Mapper.py</member>
- <member>-reducer</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step3Reducer.py</member>
- <member>-input</member>
- <member>s3://example.emrtest.step1/*</member>
- <member>-output</member>
- <member>s3://example.emrtest.step3/</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>testjob_step3</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:39:32Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:41:22Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- <member>
- <StepConfig>
- <HadoopJarStep>
- <Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
- <Args>
- <member>-mapper</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step4Mapper.py</member>
- <member>-reducer</member>
- <member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step4Reducer.py</member>
- <member>-input</member>
- <member>s3://example.emrtest.step1/*</member>
- <member>-output</member>
- <member>s3://example.emrtest.step4/</member>
- </Args>
- <Properties/>
- </HadoopJarStep>
- <Name>testjob_step4</Name>
- <ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
- </StepConfig>
- <ExecutionStatusDetail>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <StartDateTime>2010-10-21T01:41:22Z</StartDateTime>
- <State>COMPLETED</State>
- <EndDateTime>2010-10-21T01:43:03Z</EndDateTime>
- </ExecutionStatusDetail>
- </member>
- </Steps>
- <JobFlowId>j-3H3Q13JPFLU22</JobFlowId>
- <Instances>
- <SlaveInstanceType>m1.large</SlaveInstanceType>
- <MasterInstanceId>i-64c21609</MasterInstanceId>
- <Placement>
- <AvailabilityZone>us-east-1b</AvailabilityZone>
- </Placement>
- <InstanceGroups>
- <member>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <InstanceRunningCount>0</InstanceRunningCount>
- <StartDateTime>2010-10-21T01:02:09Z</StartDateTime>
- <ReadyDateTime>2010-10-21T01:03:03Z</ReadyDateTime>
- <State>ENDED</State>
- <EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
- <InstanceRequestCount>1</InstanceRequestCount>
- <InstanceType>m1.large</InstanceType>
- <Market>ON_DEMAND</Market>
- <LastStateChangeReason>Job flow terminated</LastStateChangeReason>
- <InstanceRole>MASTER</InstanceRole>
- <InstanceGroupId>ig-EVMHOZJ2SCO8</InstanceGroupId>
- <Name>master</Name>
- </member>
- <member>
- <CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
- <InstanceRunningCount>0</InstanceRunningCount>
- <StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
- <ReadyDateTime>2010-10-21T01:03:59Z</ReadyDateTime>
- <State>ENDED</State>
- <EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
- <InstanceRequestCount>9</InstanceRequestCount>
- <InstanceType>m1.large</InstanceType>
- <Market>ON_DEMAND</Market>
- <LastStateChangeReason>Job flow terminated</LastStateChangeReason>
- <InstanceRole>CORE</InstanceRole>
- <InstanceGroupId>ig-YZHDYVITVHKB</InstanceGroupId>
- <Name>slave</Name>
- </member>
- </InstanceGroups>
- <NormalizedInstanceHours>40</NormalizedInstanceHours>
- <HadoopVersion>0.20</HadoopVersion>
- <MasterInstanceType>m1.large</MasterInstanceType>
- <MasterPublicDnsName>ec2-184-72-153-139.compute-1.amazonaws.com</MasterPublicDnsName>
- <Ec2KeyName>myubersecurekey</Ec2KeyName>
- <InstanceCount>10</InstanceCount>
- <KeepJobFlowAliveWhenNoSteps>false</KeepJobFlowAliveWhenNoSteps>
- </Instances>
- </member>
- </JobFlows>
- </DescribeJobFlowsResult>
- <ResponseMetadata>
- <RequestId>c31e701d-dcb4-11df-b5d9-337fc7fe4773</RequestId>
- </ResponseMetadata>
- </DescribeJobFlowsResponse>
- """
- class TestEMRResponses(unittest.TestCase):
- def _parse_xml(self, body, markers):
- rs = ResultSet(markers)
- h = handler.XmlHandler(rs, None)
- xml.sax.parseString(body, h)
- return rs
- def _assert_fields(self, response, **fields):
- for field, expected in fields.items():
- actual = getattr(response, field)
- self.assertEquals(expected, actual,
- "Field %s: %r != %r" % (field, expected, actual))
- def test_JobFlows_example(self):
- [jobflow] = self._parse_xml(JOB_FLOW_EXAMPLE,
- [('member', emrobject.JobFlow)])
- self._assert_fields(jobflow,
- creationdatetime='2009-01-28T21:49:16Z',
- startdatetime='2009-01-28T21:49:16Z',
- state='STARTING',
- instancecount='4',
- jobflowid='j-3UN6WX5RRO2AG',
- loguri='mybucket/subdir/',
- name='MyJobFlowName',
- availabilityzone='us-east-1a',
- slaveinstancetype='m1.small',
- masterinstancetype='m1.small',
- ec2keyname='myec2keyname',
- keepjobflowalivewhennosteps='true')
- def test_JobFlows_completed(self):
- [jobflow] = self._parse_xml(JOB_FLOW_COMPLETED,
- [('member', emrobject.JobFlow)])
- self._assert_fields(jobflow,
- creationdatetime='2010-10-21T01:00:25Z',
- startdatetime='2010-10-21T01:03:59Z',
- enddatetime='2010-10-21T01:44:18Z',
- state='COMPLETED',
- instancecount='10',
- jobflowid='j-3H3Q13JPFLU22',
- loguri='s3n://example.emrtest.scripts/jobflow_logs/',
- name='RealJobFlowName',
- availabilityzone='us-east-1b',
- slaveinstancetype='m1.large',
- masterinstancetype='m1.large',
- ec2keyname='myubersecurekey',
- keepjobflowalivewhennosteps='false')
- self.assertEquals(6, len(jobflow.steps))
- self.assertEquals(2, len(jobflow.instancegroups))