This script will
- create a new key pair if one doesn't exist
- create a security group if one doesn't exist
- request a new spot instance
- wait for the spot request to be fulfilled
- wait for the instance to boot
- connect to it via SSH and run a script of your choice
Install
First, install the dependencies. This is for Ubuntu Server 14.04.
sudo apt-get install -y python python-pip python-dev libffi-dev libssl-dev
sudo pip install boto paramiko
Create a new file (e.g. launch-spot.py
) and give it exec permissions (chmod +x launch-spot.py
).
#!/usr/bin/python2.7 -u
# pip install boto paramiko
import argparse
import boto, boto.ec2, boto.ec2.blockdevicemapping, boto.manage
import paramiko
import os, sys, time
#boto.set_stream_logger('boto')
def launch_spot_instance(id, profile, spot_wait_sleep=5, instance_wait_sleep=3):
ec2 = boto.ec2.connect_to_region(profile['region'])
if not 'key_pair' in profile:
profile['key_pair'] = ('KP-' + id, 'KP-' + id + '.pem')
try:
print >> sys.stderr, 'Creating key pair...',
keypair = ec2.create_key_pair('KP-' + id)
keypair.save('.')
print >> sys.stderr, 'created'
except boto.exception.EC2ResponseError as e:
if e.code == 'InvalidKeyPair.Duplicate':
print >> sys.stderr, 'already exists'
else:
raise e
if not 'security_group' in profile:
try:
print >> sys.stderr, 'Creating security group...',
sc = ec2.create_security_group('SG-' + id, 'Security Group for ' + id)
for proto, fromport, toport, ip in profile['firewall']:
sc.authorize(proto, fromport, toport, ip)
profile['security_group'] = (sc.id, sc.name)
print >> sys.stderr, 'created'
except boto.exception.EC2ResponseError as e:
if e.code == 'InvalidGroup.Duplicate':
print >> sys.stderr, 'already exists'
sc = ec2.get_all_security_groups(groupnames=['SG-' + id])[0]
profile['security_group'] = (sc.id, sc.name)
else:
raise e
existing_requests = ec2.get_all_spot_instance_requests(filters={'launch.group-id': profile['security_group'][0], 'state': ['open', 'active']})
if existing_requests:
if len(existing_requests) > 1:
raise Exception('Too many existing spot requests')
print >> sys.stderr, 'Reusing existing spot request'
spot_req_id = existing_requests[0].id
else:
bdm = boto.ec2.blockdevicemapping.BlockDeviceMapping()
bdm['/dev/sda1'] = boto.ec2.blockdevicemapping.BlockDeviceType(volume_type='gp2', size=profile['disk_size'], delete_on_termination=profile['disk_delete_on_termination'])
bdm['/dev/sdb'] = boto.ec2.blockdevicemapping.BlockDeviceType(ephemeral_name='ephemeral0')
print >> sys.stderr, 'Requesting spot instance'
spot_reqs = ec2.request_spot_instances(
price=profile['price'], image_id=profile['image_id'], instance_type=profile['type'], placement=profile['region'] + profile['availability_zone'],
security_groups=[profile['security_group'][1]], key_name=profile['key_pair'][0], block_device_map=bdm)
spot_req_id = spot_reqs[0].id
print >> sys.stderr, 'Waiting for launch',
instance_id = None
spot_tag_added = False
while not instance_id:
spot_req = ec2.get_all_spot_instance_requests(request_ids=[spot_req_id])[0]
if not spot_tag_added:
spot_req.add_tag('Name', id)
spot_tag_added = True
if spot_req.state == 'failed':
raise Exception('Spot request failed')
instance_id = spot_req.instance_id
if not instance_id:
print >> sys.stderr, '.',
time.sleep(spot_wait_sleep)
print >> sys.stderr
print >> sys.stderr, 'Retrieving instance by id'
reservations = ec2.get_all_instances(instance_ids=[instance_id])
instance = reservations[0].instances[0]
instance.add_tag('Name', id)
print >> sys.stderr, 'Got instance: ' + str(instance.id) + ' [' + instance.state + ']'
print >> sys.stderr, 'Waiting for instance to boot',
while not instance.state in ['running', 'terminated', 'shutting-down']:
print >> sys.stderr, '.',
time.sleep(instance_wait_sleep)
instance.update()
print >> sys.stderr
if instance.state != 'running':
raise Exception('Instance was terminated')
return instance
def connect_to_instance(ip, username, key_filename, timeout=10):
print >> sys.stderr, 'Connecting to SSH [' + ip + '] ',
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
retries = 0
while retries < 30:
try:
print >> sys.stderr, '.',
client.connect(ip, username=username, key_filename=key_filename, timeout=timeout)
break
except:
retries += 1
print >> sys.stderr
return client
def setup_instance(id, instance, file, user_name, key_name):
script = open(file, 'r').read().replace('\r', '')
client = connect_to_instance(instance.ip_address, user_name, key_name)
session = client.get_transport().open_session()
session.set_combine_stderr(True)
print >> sys.stderr, 'Running script: ' + os.path.relpath(file, os.getcwd())
session.exec_command(script)
stdout = session.makefile()
try:
for line in stdout:
print line.rstrip()
except (KeyboardInterrupt, SystemExit):
print >> sys.stderr, 'Ctrl-C, stopping'
client.close()
exit_code = session.recv_exit_status()
print >> sys.stderr, 'Exit code: ' + str(exit_code)
return exit_code == 0
if __name__ == '__main__':
profiles = {
'15G': {
'region': 'eu-west-1',
'availability_zone': 'a',
'price': '0.05',
'type': 'r3.large',
'image_id': 'ami-ed82e39e',
'username': 'ubuntu',
#'key_pair': ('AWS-EU', 'eu-key.pem'),
'disk_size': 20,
'disk_delete_on_termination': True,
'scripts': [],
'firewall': [ ('tcp', 22, 22, '0.0.0.0/0') ]
}
}
parser = argparse.ArgumentParser(description='Launch spot instance')
parser.add_argument('-n', '--name', help='Name', required=True)
parser.add_argument('-p', '--profile', help='Profile', default=profiles.keys()[0], choices=profiles.keys())
parser.add_argument('-s', '--script', help='Script path', action='append', default=[])
parser.add_argument('-i', '--interactive', help='Connect to SSH', action='store_true')
args = parser.parse_args()
profile = profiles[args.profile]
try:
instance = launch_spot_instance(args.name, profile)
except boto.exception.NoAuthHandlerFound:
print >> sys.stderr, 'Error: No credentials found, try setting the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables'
sys.exit(1)
for script in profile['scripts'] + args.script:
if not setup_instance(id=args.name, instance=instance, file=script, user_name=profile['username'], key_name=profile['key_pair'][1]):
break
if args.interactive:
print 'ssh ' + profile['username'] + '@' + instance.ip_address + ' -i ' + profile['key_pair'][1] + ' -oStrictHostKeyChecking=no'
The script is less than 200 lines long and should be readable from top to bottom.
Use
Set your Amazon AWS access keys as environment variables.
Or you can read the boto documentation to find out how to store the keys in configuration files.
export AWS_ACCESS_KEY_ID="XXXXXXXXXXXXXXXXXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXXXXXXXXXXXXXXXXXXXXXXX"
Then launch a new spot instance like this
$ ./launch-spot.py -n test -p 15G
Creating key pair... created
Creating security group... created
Requesting spot instance
Waiting for launch . . . . . .
Retrieving instance by id
Got instance: i-15fc09d5 [pending]
Waiting for instance to boot . . . .
-n
stands for --name
and it is how you can identify this spot instance.
-p
stands for --profile
and is a collection of settings for launching this instance
(region, availability zone, instance type, max spot price, AMI, SSH username, disk size, security group rules, etc.)
The profiles are hard coded in the script. Feel free to modify the script to load them from an external configuration file (in JSON perhaps).
Currently, there is just one profile named 15G
profiles = {
'15G': {
'region': 'eu-west-1',
'availability_zone': 'a',
'price': '0.05',
'type': 'r3.large',
'image_id': 'ami-ed82e39e',
'username': 'ubuntu',
#'key_pair': ('AWS-EU', 'eu-key.pem'),
'disk_size': 20,
'disk_delete_on_termination': True,
'scripts': [],
'firewall': [ ('tcp', 22, 22, '0.0.0.0/0') ]
}
}
You can run the script again and as long as you use the same name it will resume.
$ ./launch-spot.py -n test -p 15G
Creating key pair... already exists
Creating security group... already exists
Reusing existing spot request
Waiting for launch
Retrieving instance by id
Got instance: i-15fc09d5 [running]
Waiting for instance to boot
To run a script on this instance via SSH use one or more -s
arguments.
For example, if you have test.sh
#!/bin/bash
touch /tmp/i-was-here
ls -l /tmp/i-was-here
this will be the output of the script
$ ./launch-spot.py -n test -p 15G -s test.sh
Creating key pair... already exists
Creating security group... already exists
Reusing existing spot request
Waiting for launch
Retrieving instance by id
Got instance: i-15fc09d5 [running]
Waiting for instance to boot
Connecting to SSH [54.74.149.116] .
Running script: test.sh
-rw-rw-r-- 1 ubuntu ubuntu 0 Oct 10 11:53 /tmp/i-was-here
Exit code: 0
If you want to connect to this instance manually,
you can use the -i
flag:
$ ./launch-spot.py -n test -p 15G -i
Creating key pair... already exists
Creating security group... already exists
Reusing existing spot request
Waiting for launch
Retrieving instance by id
Got instance: i-15fc09d5 [running]
Waiting for instance to boot
ssh [email protected] -i KP-test.pem -oStrictHostKeyChecking=no
You can copy & paste the last line in your terminal and connect to the instance.
As you can see, if you haven't specified key_pair
in the profile,
a new key pair will be created with the name KP-test
and it will be saved as KP-test.pem
.
If there's no security_group
in the profile, a new one will be created with the name SG-test
and the rules from the firewall
profile setting will be applied.
At a minimum, SSH from your IP should be allowed.
If you want to stop the spot instance, simply run sudo halt
on the instance.
The instance, the instance request and disks (if auto terminate is on) will automatically be shut down and terminated.
Scripting
If you use a simple bash script, I suggest using one like this.
set -e
will stop the execution if an error occurs.
If the script finished successfully, it will not be run again.
#!/bin/bash
set -e
if [ ! -f /var/setup.done ]; then
...
sudo touch /var/setup.done
fi
If you share a provisioning script with Vagrant and AWS, you can use this line to check if you are running on AWS EC2 or locally on Vagrant.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null`
export VAGRANT=`ls /vagrant 2> /dev/null | head -n 1`
Amazon Ubuntu mirrors and apt-get
can be slow and flaky.
Especially if you hit CTRL+C
while the script is executing apt-get
and then run it again.
# this may or may not prevent "hash sum mismatch" errors
sudo rm -rf /var/lib/apt/lists/*
sudo apt-get clean
# amazon mirrors are very slow
sudo sed -i 's/us-east-1.ec2.archive.ubuntu.com/us.archive.ubuntu.com/g' /etc/apt/sources.list
sudo sed -i 's/eu-west-1.ec2.archive.ubuntu.com/ie.archive.ubuntu.com/g' /etc/apt/sources.list
# stay up to date
sudo apt-get update
# in case previous apt-get was interrupted
sudo dpkg --configure -a
Create a file system on the instance disk and move /home
, /tmp
and /swapfile
to it.
This can be useful on large SSD backed instance disks.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null`
export AWS_INSTANCE_STORE=1
if [ "$AWS" ] && [ "$AWS_INSTANCE_STORE" -e "1" ] && [ ! -d /mnt/instance ]; then
# aws instance store
sudo mkfs -t ext4 /dev/xvdb
sudo mkdir -p /mnt/instance
sudo mount /dev/xvdb /mnt/instance
sudo chown -R $USER:$USER /mnt/instance
# don't use root disk
[ ! -d /mnt/instance/home ] && sudo mv /home/ubuntu /mnt/instance/home && sudo ln -s /mnt/instance/home /home/ubuntu
[ ! -d /mnt/instance/tmp ] && sudo mv /tmp/ /mnt/instance/ && sudo ln -s /mnt/instance/tmp /tmp
[ ! -f /mnt/instance/swapfile ] && sudo touch /mnt/instance/swapfile && sudo ln -s /mnt/instance/swapfile /swapfile
fi
Add swap.
export SWAP="4G"
if [ "$SWAP" ]; then
sudo fallocate -l $SWAP /swapfile && sudo mkswap /swapfile && sudo swapon /swapfile
sudo sysctl vm.swappiness=1 && sudo sysctl vm.vfs_cache_pressure=50
fi
Keep /tmp
in RAM.
export TMPFS=0
export TMPFS_SWAP="5G"
if [ "$TMPFS" ] && [ "$TMPFS" -eq "1" ]; then
sudo mount -o defaults,noatime,nosuid,nodev,noexec,mode=1777,size=500G -t tmpfs tmpfs /tmp
[ "$TMPFS_SWAP" ] && sudo fallocate -l $TMPFS_SWAP /swapfile.tmpfs && sudo mkswap /swapfile.tmpfs && sudo swapon /swapfile.tmpfs
fi
To upload/download files to S3 on AWS and use local files in /vagrant
when running in Vagrant, you could do something like this.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null`
export AWS_ACCESS_KEY_ID="XXXXXXXXXXXXXXXXXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXXXXXXXXXXXXXXXXXXXXXXX"
export VAGRANT=`ls /vagrant 2> /dev/null | head -n 1`
sudo apt-get install -y python-pip
sudo pip install awscli
[ "$AWS_ACCESS_KEY_ID" ] && [ "$AWS_SECRET_ACCESS_KEY" ] && echo -e "$AWS_ACCESS_KEY_ID\n$AWS_SECRET_ACCESS_KEY\n\n" | aws configure
if [ "$VAGRANT" ]; then
cp /vagrant/file.tar.gz file.tar.gz
elif [ "$AWS" ]; then
aws s3 cp s3://bucket/file.tar.gz file.tar.gz
fi
...
if [ "$VAGRANT" ]; then
cp result.tar.gz /vagrant/result.tar.gz
elif [ "$AWS" ]; then
aws s3 cp result.tar.gz s3://bucket/result.tar.gz --storage-class REDUCED_REDUNDANCY
fi