added cluster launch
This commit is contained in:
parent
2f0316e5f0
commit
2a8c1d5e8f
|
|
@ -1,9 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
aws emr create-cluster --auto-scaling-role EMR_AutoScaling_DefaultRole \
|
|
||||||
--applications Name=Hadoop Name=Hive Name=Spark \
|
|
||||||
--bootstrap-actions '[{"Path":"s3://insightde/emr/bootstraps/bootstrap.sh","Name":"bootstrap"}]' \
|
|
||||||
--ebs-root-volume-size 10 \
|
|
||||||
--ec2-attributes '{"KeyName":"joshua-IAM-keypair","InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-0034e615b047fd112","EmrManagedSlaveSecurityGroup":"sg-08e546ae27d86d6a3","EmrManagedMasterSecurityGroup":"sg-08e546ae27d86d6a3"}' \
|
|
||||||
--service-role EMR_DefaultRole --release-label emr-5.26.0 --name 'test_cluster12d' \
|
|
||||||
--instance-groups '[{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"MASTER","InstanceType":"m4.xlarge","Name":"Master - 1"},{"InstanceCount":5,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"CORE","InstanceType":"m4.xlarge","Name":"Core - 2"}]' --configurations '[{"Classification":"spark-env","Properties":{},"Configurations":[{"Classification":"export","Properties":{}}]},{"Classification":"spark-defaults","Properties":{}},{"Classification":"spark","Properties":{"maximizeResourceAllocation":"true"}}]' \
|
|
||||||
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region us-east-1
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
[
|
|
||||||
{
|
|
||||||
"Classification": "spark-env",
|
|
||||||
"Configurations": [
|
|
||||||
{
|
|
||||||
"Classification": "export",
|
|
||||||
"ConfigurationProperties": {
|
|
||||||
"PYSPARK_PYTHON": "/usr/bin/python3",
|
|
||||||
"PYSPARK_DRIVER_PYTHON": "/usr/bin/python3"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Classification": "spark-defaults",
|
|
||||||
"ConfigurationProperties": {
|
|
||||||
"spark.sql.execution.arrow.enabled": "true"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Classification": "spark",
|
|
||||||
"Properties": {
|
|
||||||
"maximizeResourceAllocation": "true"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
@ -5,7 +5,6 @@ yes | sudo python3 -m pip install pathos kafka-python
|
||||||
wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/dist/cadCAD-0.0.2-py3-none-any.whl
|
wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/dist/cadCAD-0.0.2-py3-none-any.whl
|
||||||
yes | sudo python3 -m pip install cadCAD-0.0.2-py3-none-any.whl
|
yes | sudo python3 -m pip install cadCAD-0.0.2-py3-none-any.whl
|
||||||
|
|
||||||
|
|
||||||
# check for master node
|
# check for master node
|
||||||
IS_MASTER=false
|
IS_MASTER=false
|
||||||
if grep -i isMaster /mnt/var/lib/info/instance.json | grep -i true;
|
if grep -i isMaster /mnt/var/lib/info/instance.json | grep -i true;
|
||||||
|
|
@ -0,0 +1,125 @@
|
||||||
|
import os, json, boto3
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
ec2_attributes = {
|
||||||
|
"KeyName":"joshua-IAM-keypair",
|
||||||
|
"InstanceProfile":"EMR_EC2_DefaultRole",
|
||||||
|
"SubnetId":"subnet-0034e615b047fd112",
|
||||||
|
"EmrManagedSlaveSecurityGroup":"sg-08e546ae27d86d6a3",
|
||||||
|
"EmrManagedMasterSecurityGroup":"sg-08e546ae27d86d6a3"
|
||||||
|
}
|
||||||
|
|
||||||
|
bootstrap_actions = [
|
||||||
|
{
|
||||||
|
"Path":"s3://insightde/emr/bootstraps/distroduce.sh",
|
||||||
|
"Name":"bootstrap"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
instance_groups = [
|
||||||
|
{
|
||||||
|
"InstanceCount":5,
|
||||||
|
"EbsConfiguration":
|
||||||
|
{"EbsBlockDeviceConfigs":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"VolumeSpecification":
|
||||||
|
{"SizeInGB":32,"VolumeType":"gp2"},
|
||||||
|
"VolumesPerInstance":2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"InstanceGroupType":"CORE",
|
||||||
|
"InstanceType":"m4.xlarge",
|
||||||
|
"Name":"Core - 2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"InstanceCount":1,
|
||||||
|
"EbsConfiguration":
|
||||||
|
{
|
||||||
|
"EbsBlockDeviceConfigs":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"VolumeSpecification":
|
||||||
|
{"SizeInGB":32,"VolumeType":"gp2"},
|
||||||
|
"VolumesPerInstance":2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"InstanceGroupType":"MASTER",
|
||||||
|
"InstanceType":"m4.xlarge",
|
||||||
|
"Name":"Master - 1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
configurations = [
|
||||||
|
{
|
||||||
|
"Classification":"spark-env",
|
||||||
|
"Properties":{},
|
||||||
|
"Configurations":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"Classification":"export",
|
||||||
|
"Properties":{
|
||||||
|
"PYSPARK_PYTHON": "/usr/bin/python3",
|
||||||
|
"PYSPARK_DRIVER_PYTHON": "/usr/bin/python3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Classification":"spark-defaults",
|
||||||
|
"Properties":{
|
||||||
|
"spark.sql.execution.arrow.enabled": "true"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Classification":"spark",
|
||||||
|
"Properties":{
|
||||||
|
"maximizeResourceAllocation":"true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_distroduce_cluster(name, region, ec2_attributes, bootstrap_actions, instance_groups, configurations):
|
||||||
|
def log_uri(name, region):
|
||||||
|
return f's3n://{name}-{region}/elasticmapreduce/'
|
||||||
|
|
||||||
|
os.system(f"""
|
||||||
|
aws emr create-cluster \
|
||||||
|
--applications Name=Hadoop Name=Hive Name=Spark \
|
||||||
|
--ec2-attributes '{json.dumps(ec2_attributes)}' \
|
||||||
|
--release-label emr-5.26.0 \
|
||||||
|
--log-uri '{str(log_uri(name, region))}' \
|
||||||
|
--instance-groups '{json.dumps(instance_groups)}' \
|
||||||
|
--configurations '{json.dumps(configurations)}' \
|
||||||
|
--auto-scaling-role EMR_AutoScaling_DefaultRole \
|
||||||
|
--bootstrap-actions '{json.dumps(bootstrap_actions)}' \
|
||||||
|
--ebs-root-volume-size 10 \
|
||||||
|
--service-role EMR_DefaultRole \
|
||||||
|
--enable-debugging \
|
||||||
|
--name '{name}' \
|
||||||
|
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
|
||||||
|
--region {region}
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark(names: List[str], region, ec2_attributes, bootstrap_actions, instance_groups, configurations):
|
||||||
|
current_dir = os.path.dirname(__file__)
|
||||||
|
s3 = boto3.client('s3')
|
||||||
|
bucket = 'insightde'
|
||||||
|
|
||||||
|
file = 'distroduce.sh'
|
||||||
|
abs_path = os.path.join(current_dir, file)
|
||||||
|
key = f'emr/bootstraps/{file}'
|
||||||
|
|
||||||
|
s3.upload_file(abs_path, bucket, key)
|
||||||
|
for name in names:
|
||||||
|
create_distroduce_cluster(name, region, ec2_attributes, bootstrap_actions, instance_groups, configurations)
|
||||||
|
|
||||||
|
|
||||||
|
name = 'distibuted_produce'
|
||||||
|
region = 'us-east-1'
|
||||||
|
benchmark([name], region, ec2_attributes, bootstrap_actions, instance_groups, configurations)
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/bash
|
||||||
|
aws emr create-cluster \
|
||||||
|
--applications Name=Hadoop Name=Hive Name=Spark \
|
||||||
|
--ec2-attributes '{"KeyName":"joshua-IAM-keypair","InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-0034e615b047fd112","EmrManagedSlaveSecurityGroup":"sg-08e546ae27d86d6a3","EmrManagedMasterSecurityGroup":"sg-08e546ae27d86d6a3"}' \
|
||||||
|
--release-label emr-5.26.0 \
|
||||||
|
--log-uri 's3n://aws-logs-251682129355-us-east-1/elasticmapreduce/' \
|
||||||
|
--instance-groups '[{"InstanceCount":5,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"CORE","InstanceType":"m4.xlarge","Name":"Core - 2"},{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"MASTER","InstanceType":"m4.xlarge","Name":"Master - 1"}]' \
|
||||||
|
--configurations '[{"Classification":"spark-env","Properties":{},"Configurations":[{"Classification":"export","Properties":{}}]},{"Classification":"spark-defaults","Properties":{}},{"Classification":"spark","Properties":{"maximizeResourceAllocation":"true"}}]' \
|
||||||
|
--auto-scaling-role EMR_AutoScaling_DefaultRole \
|
||||||
|
--bootstrap-actions '[{"Path":"s3://insightde/emr/bootstraps/distroduce.sh","Name":"bootstrap"}]' \
|
||||||
|
--ebs-root-volume-size 10 \
|
||||||
|
--service-role EMR_DefaultRole \
|
||||||
|
--enable-debugging \
|
||||||
|
--name 'distibuted_produce' \
|
||||||
|
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
|
||||||
|
--region us-east-1
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
PRIVATE_IP=`hostname -I | xargs`
|
PRIVATE_IP=`hostname -I | xargs`
|
||||||
sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh
|
|
||||||
spark-submit --master yarn --py-files distroduce.zip messaging_sim.py $PRIVATE_IP
|
spark-submit --master yarn --py-files distroduce.zip messaging_sim.py $PRIVATE_IP
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
scp -i ~/.ssh/joshua-IAM-keypair.pem ~/Projects/event-bench/event_bench.py \
|
scp -i ~/.ssh/joshua-IAM-keypair.pem ~/Projects/event-bench/event_bench.py \
|
||||||
hadoop@ec2-3-230-158-62.compute-1.amazonaws.com:/home/hadoop/
|
hadoop@ec2-3-230-158-62.compute-1.amazonaws.com:/home/hadoop/
|
||||||
|
|
||||||
Loading…
Reference in New Issue