diff --git a/dist/cadCAD-0.0.2-py3-none-any.whl b/dist/cadCAD-0.0.2-py3-none-any.whl index 8a48f7c..758253f 100644 Binary files a/dist/cadCAD-0.0.2-py3-none-any.whl and b/dist/cadCAD-0.0.2-py3-none-any.whl differ diff --git a/dist/cadCAD-0.0.2.tar.gz b/dist/cadCAD-0.0.2.tar.gz index 83d3518..86a900d 100644 Binary files a/dist/cadCAD-0.0.2.tar.gz and b/dist/cadCAD-0.0.2.tar.gz differ diff --git a/distroduce/bash/bootstrap.sh b/distroduce/bash/bootstrap.sh index a6b78a8..6d900d6 100644 --- a/distroduce/bash/bootstrap.sh +++ b/distroduce/bash/bootstrap.sh @@ -1,81 +1,24 @@ #!/bin/bash -# SSH into all machines -ssh -i ~/.ssh/joshua-IAM-keypair.pem hadoop@ -sudo python3 -m pip install --upgrade pip -sudo python3 -m pip install pathos kafka-python - -# SetUp Window: head node cd ~ -#sudo python3 -m pip install --upgrade pip -#sudo python3 -m pip install pathos kafka-python -sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh -wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz -tar -xzf kafka_2.12-2.3.0.tgz -cd kafka_2.12-2.3.0 -bin/zookeeper-server-start.sh config/zookeeper.properties & -bin/kafka-server-start.sh config/server.properties & -# get ip -bin/kafka-topics.sh --create --bootstrap-server 10.0.0.9:9092 --replication-factor 1 --partitions 1 --topic test -# bin/kafka-topics.sh --list --bootstrap-server 10.0.0.9:9092 - -# Consume (Window): head node -kafka_2.12-2.3.0/bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning -# DELETE -# bin/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic test - - -# local -python3 setup.py sdist bdist_wheel -pip3 install dist/*.whl -# SCP: all nodes -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-239-171-181.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-3-230-154-170.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-232-52-219.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-70-210.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-243-101.compute-1.amazonaws.com:/home/hadoop/ - - -sudo python3 -m pip install *.whl - - -# SCP head node -# ToDo: zip build for cadCAD OR give py library after whl install -scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/dist/distroduce.zip hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ -scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/messaging_sim.py hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ -#scp -i ~/.ssh/joshua-IAM-keypair.pem dist/distroduce.zip hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/ -#scp -i ~/.ssh/joshua-IAM-keypair.pem examples/event_bench/main.py hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/ - - -# Run Window: Head Node -spark-submit --master yarn messaging_app.py -# spark-submit --master yarn --py-files distroduce.zip main.py - -# Cluster Config -#[ -# { -# "Classification": "spark-env", -# "Configurations": [ -# { -# "Classification": "export", -# "ConfigurationProperties": { -# "PYSPARK_PYTHON": "/usr/bin/python3", -# "PYSPARK_DRIVER_PYTHON": "/usr/bin/python3" -# } -# } -# ] -# }, -# { -# "Classification": "spark-defaults", -# "ConfigurationProperties": { -# "spark.sql.execution.arrow.enabled": "true" -# } -# }, -# { -# "Classification": "spark", -# "Properties": { -# "maximizeResourceAllocation": "true" -# } -# } -#] +yes | sudo python3 -m pip install --upgrade pip +yes | sudo python3 -m pip install pathos kafka-python +wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/dist/cadCAD-0.0.2-py3-none-any.whl +yes | sudo python3 -m pip install cadCAD-0.0.2-py3-none-any.whl +wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/dist/distroduce.zip +wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/messaging_sim.py +# check for master node +PRIVATE_IP=localhost +IS_MASTER=false +if grep -i isMaster /mnt/var/lib/info/instance.json | grep -i true; +then + IS_MASTER=true + sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh + wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz + tar -xzf kafka_2.12-2.3.0.tgz + kafka_2.12-2.3.0/bin/zookeeper-server-start.sh config/zookeeper.properties & + kafka_2.12-2.3.0/bin/kafka-server-start.sh config/server.properties & + PRIVATE_IP=`hostname -I | xargs` + kafka_2.12-2.3.0/bin/kafka-topics.sh --create --bootstrap-server ${PRIVATE_IP}:9092 \ + --replication-factor 1 --partitions 1 --topic test +fi diff --git a/distroduce/bash/gen_bootstrap.sh b/distroduce/bash/gen_bootstrap.sh new file mode 100644 index 0000000..03471d7 --- /dev/null +++ b/distroduce/bash/gen_bootstrap.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SSH into all machines +ssh -i ~/.ssh/joshua-IAM-keypair.pem hadoop@ +sudo python3 -m pip install --upgrade pip +sudo python3 -m pip install pathos kafka-python + +# SetUp Window: head node +cd ~ +#sudo python3 -m pip install --upgrade pip +#sudo python3 -m pip install pathos kafka-python +sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh +wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz +tar -xzf kafka_2.12-2.3.0.tgz +cd kafka_2.12-2.3.0 +bin/zookeeper-server-start.sh config/zookeeper.properties & +bin/kafka-server-start.sh config/server.properties & +# get ip +bin/kafka-topics.sh --create --bootstrap-server 10.0.0.9:9092 --replication-factor 1 --partitions 1 --topic test +# bin/kafka-topics.sh --list --bootstrap-server 10.0.0.9:9092 + +# Consume (Window): head node +kafka_2.12-2.3.0/bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning +# DELETE +# bin/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic test + + +# local +python3 setup.py sdist bdist_wheel +pip3 install dist/*.whl +# SCP: all nodes +# S3 duuhhhh +# git clone specific file +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-239-171-181.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-3-230-154-170.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-232-52-219.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-70-210.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-243-101.compute-1.amazonaws.com:/home/hadoop/ + + +sudo python3 -m pip install *.whl + + +# SCP head node +# ToDo: zip build for cadCAD OR give py library after whl install +scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/dist/distroduce.zip hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ +scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/messaging_sim.py hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/ +#scp -i ~/.ssh/joshua-IAM-keypair.pem dist/distroduce.zip hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/ +#scp -i ~/.ssh/joshua-IAM-keypair.pem examples/event_bench/main.py hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/ + + +# Run Window: Head Node +#spark-submit --master yarn messaging_app.py +spark-submit --master yarn --py-files distroduce.zip main.py + +# Cluster Config +#[ +# { +# "Classification": "spark-env", +# "Configurations": [ +# { +# "Classification": "export", +# "ConfigurationProperties": { +# "PYSPARK_PYTHON": "/usr/bin/python3", +# "PYSPARK_DRIVER_PYTHON": "/usr/bin/python3" +# } +# } +# ] +# }, +# { +# "Classification": "spark-defaults", +# "ConfigurationProperties": { +# "spark.sql.execution.arrow.enabled": "true" +# } +# }, +# { +# "Classification": "spark", +# "Properties": { +# "maximizeResourceAllocation": "true" +# } +# } +#] + diff --git a/distroduce/bash/launch_cluster.sh b/distroduce/bash/launch_cluster.sh new file mode 100644 index 0000000..c49de60 --- /dev/null +++ b/distroduce/bash/launch_cluster.sh @@ -0,0 +1,9 @@ +#!/bin/bash +aws emr create-cluster --auto-scaling-role EMR_AutoScaling_DefaultRole \ +--applications Name=Hadoop Name=Hive Name=Spark \ +--bootstrap-actions '[{"Path":"s3://insightde/emr/bootstraps/bootstrap.sh","Name":"bootstrap"}]' \ +--ebs-root-volume-size 10 \ +--ec2-attributes '{"KeyName":"joshua-IAM-keypair","InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-0034e615b047fd112","EmrManagedSlaveSecurityGroup":"sg-08e546ae27d86d6a3","EmrManagedMasterSecurityGroup":"sg-08e546ae27d86d6a3"}' \ +--service-role EMR_DefaultRole --release-label emr-5.26.0 --name 'test_cluster12d' \ +--instance-groups '[{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"MASTER","InstanceType":"m4.xlarge","Name":"Master - 1"},{"InstanceCount":5,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"CORE","InstanceType":"m4.xlarge","Name":"Core - 2"}]' --configurations '[{"Classification":"spark-env","Properties":{},"Configurations":[{"Classification":"export","Properties":{}}]},{"Classification":"spark-defaults","Properties":{}},{"Classification":"spark","Properties":{"maximizeResourceAllocation":"true"}}]' \ +--scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region us-east-1 \ No newline at end of file diff --git a/distroduce/bash/spark_submit.sh b/distroduce/bash/spark_submit.sh new file mode 100644 index 0000000..0dbe39c --- /dev/null +++ b/distroduce/bash/spark_submit.sh @@ -0,0 +1,2 @@ +#!/bin/bash +spark-submit --master yarn --py-files distroduce.zip messaging_sim.py \ No newline at end of file diff --git a/distroduce/dist/distroduce.zip b/distroduce/dist/distroduce.zip index 0e42bec..866365d 100644 Binary files a/distroduce/dist/distroduce.zip and b/distroduce/dist/distroduce.zip differ diff --git a/distroduce/executor/spark/jobs/__init__.py b/distroduce/executor/spark/jobs/__init__.py index 5b442f9..9c78941 100644 --- a/distroduce/executor/spark/jobs/__init__.py +++ b/distroduce/executor/spark/jobs/__init__.py @@ -39,6 +39,6 @@ def distributed_produce( {'var_dict': t[4], 'states_lists': t[5], 'Ts': t[6], 'Ns': t[7]} ) for t in val_params ] - results_rdd = spark_context.parallelize(val_params_kv).coalesce(35) + results_rdd = spark_context.parallelize(val_params_kv).coalesce(1) return list(results_rdd.map(lambda x: simulate(*x)).collect()) diff --git a/distroduce/messaging_sim.py b/distroduce/messaging_sim.py index b982000..4134a8e 100644 --- a/distroduce/messaging_sim.py +++ b/distroduce/messaging_sim.py @@ -13,7 +13,7 @@ from distroduce.executor.spark import distributed_produce from distroduce.action_policies import enter_action, message_actions, exit_action from distroduce.state_updates import send_message, count_messages, add_send_time, current_time -# State Update +# State Updates variables = { 'client_a': send_message('client_a'), 'client_b': send_message('client_b'), @@ -76,6 +76,7 @@ if __name__ == "__main__": policy_ops=[lambda a, b: a + b] ) + # parmeterize localhost kafkaConfig = {'send_topic': 'test', 'producer_config': {'bootstrap_servers': 'localhost:9092', 'acks': 'all'}} dist_proc_ctx = ExecutionContext(context=exec_mode.dist_proc, method=distributed_produce, kafka_config=kafkaConfig) run = Executor(exec_context=dist_proc_ctx, configs=configs, spark_context=sc)