39 lines
1.4 KiB
Python
39 lines
1.4 KiB
Python
import os, json, boto3
|
|
from typing import List
|
|
|
|
|
|
def launch_cluster(name, region, ec2_attributes, bootstrap_actions, instance_groups, configurations):
|
|
def log_uri(name, region):
|
|
return f's3n://{name}-{region}/elasticmapreduce/'
|
|
|
|
os.system(f"""
|
|
aws emr create-cluster \
|
|
--applications Name=Hadoop Name=Hive Name=Spark \
|
|
--ec2-attributes '{json.dumps(ec2_attributes)}' \
|
|
--release-label emr-5.26.0 \
|
|
--log-uri '{str(log_uri(name, region))}' \
|
|
--instance-groups '{json.dumps(instance_groups)}' \
|
|
--configurations '{json.dumps(configurations)}' \
|
|
--auto-scaling-role EMR_AutoScaling_DefaultRole \
|
|
--bootstrap-actions '{json.dumps(bootstrap_actions)}' \
|
|
--ebs-root-volume-size 10 \
|
|
--service-role EMR_DefaultRole \
|
|
--enable-debugging \
|
|
--name '{name}' \
|
|
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
|
|
--region {region}
|
|
""")
|
|
|
|
|
|
# def benchmark(names: List[str], region, ec2_attributes, bootstrap_actions, instance_groups, configurations):
|
|
# current_dir = os.path.dirname(__file__)
|
|
# s3 = boto3.client('s3')
|
|
# bucket = 'insightde'
|
|
#
|
|
# file = 'distroduce.sh'
|
|
# abs_path = os.path.join(current_dir, file)
|
|
# key = f'emr/bootstraps/{file}'
|
|
#
|
|
# s3.upload_file(abs_path, bucket, key)
|
|
# for name in names:
|
|
# launch_cluster(name, region, ec2_attributes, bootstrap_actions, instance_groups, configurations) |