- get the Whirr tar file
wget http://www.eu.apache.org/dist/whirr/stable/whirr-0.8.2.tar.gz
- untar the Whirr tar file
tar -vxf whirr-0.8.2.tar.gz
- create credentials file
mkdir ~/.whirr
cp conf/credentials.sample ~/.whirr/credentials
- add the following content to credentials file
# Set cloud provider connection details
PROVIDER=aws-ec2
IDENTITY=<AWS Access Key ID>
CREDENTIAL=<AWS Secret Access Key>
generate a rsa key pair
ssh-keygen -t rsa -P ''
- create a hadoop.properties file and add the following content
whirr.cluster-name=whirrhadoopcluster
whirr.instance-templates=1 hadoop-jobtracker+hadoop-namenode,2 hadoop-datanode+hadoop-tasktracker
whirr.provider=aws-ec2
whirr.private-key-file=${sys:user.home}/.ssh/id_rsa
whirr.public-key-file=${sys:user.home}/.ssh/id_rsa.pub
whirr.hadoop.version=1.0.2
whirr.aws-ec2-spot-price=0.08
- launch hadoop
bin/whirr launch-cluster --config hadoop.properties
- launch proxy
cd ~/.whirr/whirrhadoopcluster/
./hadoop-proxy.sh
- add a rule to iptables
0.0.0.0/0 50030
0.0.0.0/0 50070
- check the web ui in the browser
http://<aws-public-dns>:50030
- add to /etc/profile
export HADOOP_CONF_DIR=~/.whirr/whirrhadoopcluster/
- check if the hadoop works
hadoop fs -ls /