I have a Django app that users can submit video through to be processed via a python script running OpenCV on a separate EC2 instance. As this is a moderately expensive server to run (p2.Xlarge ~ $3.00/h) it is only spun up when the video is submitted and I want to ensure that it doesn't continue to run if there is some hiccup in the processing. If the program works fine the instance is properly shut down.
The problem is sometimes the python script gets hung up (I can't seem to replicate this on it's own which is a separate problem) when the script doesn't fully execute the server continues to run indefinitely. I have tried the solution provided here for self terminating an AWS EC2 instance. The solution works if the server is idle but doesn't seem to work if the server is busy trying to process the video.
Is there a better way to make sure the server doesn't run longer than x minutes and stop it, even if the server is in the middle of a process?
The code I'm currently using:
import paramiko
import boto3
import sys
from botocore.exceptions import ClientError
import json
from time import sleep
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--username', required=False)
parser.add_argument('--date', required=False)
args = parser.parse_args()
uName = args.username
theDate = args.date
ec2 = boto3.client('ec2', region_name= 'us-east-1', aws_access_key_id=accessKey, aws_secret_access_key=secretKey, )
ec2_2 = boto3.resource('ec2', region_name= 'us-east-1', aws_access_key_id=accessKey, aws_secret_access_key=secretKey, )
client = boto3.client('ses',region_name= 'us-east-1', aws_access_key_id=accessKey, aws_secret_access_key=secretKey,)
s3_resource = boto3.client('s3', region_name= 'us-east-1', aws_access_key_id=accessKey, aws_secret_access_key=secretKey, )
s3_instance = boto3.resource('s3', region_name= 'us-east-1', aws_access_key_id=accessKey, aws_secret_access_key=secretKey, )
obj = s3_instance.Object('my_bucket', 'data/instances.txt')#load file of instances
body=obj.get()['Body'].read().decode('utf-8')
instance_ids.index(body.split()[-1:][0])#get index of last run instance
if instance_ids.index(body.split()[-1:][0]) != 4: #if it isn't the 5th instance run the next instance
instance_id=instance_ids[instance_ids.index(body.split()[-1:][0])+1]
else:
instance_id=instance_ids[0]#if it is the last instance then run the first instance
body+='\n'+instance_id #add the instance run to the end of the file
obj.put(Body=body) #write the file back to S3
while True:
try:
ec2.start_instances(InstanceIds=[instance_id], DryRun=True)
except ClientError as e:
if 'DryRunOperation' not in str(e):
raise
try:
ec2.start_instances(InstanceIds=[instance_id], DryRun=False)
break
except:
continue
#except 'ClientError' as e:
# print(e)
print('instance started')
while True:
if not ec2_2.Instance(instance_id).state['Code']== 16:
print(ec2_2.Instance(instance_id).state)
sleep(2.5)
continue
else:
print('state == running')
break
while True:
try:
instance = ec2_2.Instance(instance_id).public_ip_address
ip_add=instance
break
except:
continue
prevent_bankruptcy = 'echo "sudo halt" | at now + 15 minutes'
move_frome_s3 = 'aws s3 cp s3://my-bucket/media/{0}/Sessions/{1}/Uploads/{2} ./python-scripts/data/'.format(uName,theDate, file)
move_about_file = 'aws s3 cp s3://my-bucket/media/{}/about.txt ./python-scripts/data/results/result-dicts/'.format(uName)
move_assessment_file = 'aws s3 cp s3://my-bucket/media/{}/ranges.txt ./python-scripts/data/results/result-dicts/'.format(uName)
convert_file= 'cd python-scripts && python3 convert_codec.py --username {0} --date {1}'.format(uName, theDate)
key_location = "/my/key/folder/MyKey.pem"
k = paramiko.RSAKey.from_private_key_file(key_location)
c = paramiko.SSHClient()
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
while True:
try:
c.connect( hostname = ip_add, username = "ubuntu", pkey = k, banner_timeout=60)
break
except:
sleep(1.5)
commands = [prevent_bankruptcy, make_dir, move_frome_s3, move_about_file, convert_file, move_assessment_file, create_folder]
for command in commands:
print ("Executing {}".format( command ))
stdin , stdout, stderr = c.exec_command(command)
errList.append(stderr.read())
print (stdout.read())
print( "Errors")
print ("***",stderr.read())
c.close()
try:
ec2.stop_instances(InstanceIds=[instance_id], DryRun=False)
except ClientError as e:
if 'DryRunOperation' not in str(e):
raise
try:
ec2.stop_instances(InstanceIds=[instance_id], DryRun=False)
except 'ClientError' as e:
print(e)
If I edit commands to only run prevent_bankruptcy which calls 'sudo echo halt' and let the server sit idle for 15 minutes it will automatically shut down. However if something goes wrong with convert_file then it will continue to run indefinitely which can lead to a surprise come billing time.