MI5
This commit is contained in:
parent
cdfc4b474f
commit
6c235f9914
Binary file not shown.
Binary file not shown.
|
|
@ -1,81 +1,24 @@
|
|||
#!/bin/bash
|
||||
# SSH into all machines
|
||||
ssh -i ~/.ssh/joshua-IAM-keypair.pem hadoop@
|
||||
sudo python3 -m pip install --upgrade pip
|
||||
sudo python3 -m pip install pathos kafka-python
|
||||
|
||||
# SetUp Window: head node
|
||||
cd ~
|
||||
#sudo python3 -m pip install --upgrade pip
|
||||
#sudo python3 -m pip install pathos kafka-python
|
||||
sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh
|
||||
wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz
|
||||
tar -xzf kafka_2.12-2.3.0.tgz
|
||||
cd kafka_2.12-2.3.0
|
||||
bin/zookeeper-server-start.sh config/zookeeper.properties &
|
||||
bin/kafka-server-start.sh config/server.properties &
|
||||
# get ip
|
||||
bin/kafka-topics.sh --create --bootstrap-server 10.0.0.9:9092 --replication-factor 1 --partitions 1 --topic test
|
||||
# bin/kafka-topics.sh --list --bootstrap-server 10.0.0.9:9092
|
||||
|
||||
# Consume (Window): head node
|
||||
kafka_2.12-2.3.0/bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning
|
||||
# DELETE
|
||||
# bin/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic test
|
||||
|
||||
|
||||
# local
|
||||
python3 setup.py sdist bdist_wheel
|
||||
pip3 install dist/*.whl
|
||||
# SCP: all nodes
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-239-171-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-3-230-154-170.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-232-52-219.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-70-210.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-243-101.compute-1.amazonaws.com:/home/hadoop/
|
||||
|
||||
|
||||
sudo python3 -m pip install *.whl
|
||||
|
||||
|
||||
# SCP head node
|
||||
# ToDo: zip build for cadCAD OR give py library after whl install
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/dist/distroduce.zip hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/messaging_sim.py hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
#scp -i ~/.ssh/joshua-IAM-keypair.pem dist/distroduce.zip hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/
|
||||
#scp -i ~/.ssh/joshua-IAM-keypair.pem examples/event_bench/main.py hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/
|
||||
|
||||
|
||||
# Run Window: Head Node
|
||||
spark-submit --master yarn messaging_app.py
|
||||
# spark-submit --master yarn --py-files distroduce.zip main.py
|
||||
|
||||
# Cluster Config
|
||||
#[
|
||||
# {
|
||||
# "Classification": "spark-env",
|
||||
# "Configurations": [
|
||||
# {
|
||||
# "Classification": "export",
|
||||
# "ConfigurationProperties": {
|
||||
# "PYSPARK_PYTHON": "/usr/bin/python3",
|
||||
# "PYSPARK_DRIVER_PYTHON": "/usr/bin/python3"
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# },
|
||||
# {
|
||||
# "Classification": "spark-defaults",
|
||||
# "ConfigurationProperties": {
|
||||
# "spark.sql.execution.arrow.enabled": "true"
|
||||
# }
|
||||
# },
|
||||
# {
|
||||
# "Classification": "spark",
|
||||
# "Properties": {
|
||||
# "maximizeResourceAllocation": "true"
|
||||
# }
|
||||
# }
|
||||
#]
|
||||
yes | sudo python3 -m pip install --upgrade pip
|
||||
yes | sudo python3 -m pip install pathos kafka-python
|
||||
wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/dist/cadCAD-0.0.2-py3-none-any.whl
|
||||
yes | sudo python3 -m pip install cadCAD-0.0.2-py3-none-any.whl
|
||||
wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/dist/distroduce.zip
|
||||
wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/messaging_sim.py
|
||||
|
||||
# check for master node
|
||||
PRIVATE_IP=localhost
|
||||
IS_MASTER=false
|
||||
if grep -i isMaster /mnt/var/lib/info/instance.json | grep -i true;
|
||||
then
|
||||
IS_MASTER=true
|
||||
sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh
|
||||
wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz
|
||||
tar -xzf kafka_2.12-2.3.0.tgz
|
||||
kafka_2.12-2.3.0/bin/zookeeper-server-start.sh config/zookeeper.properties &
|
||||
kafka_2.12-2.3.0/bin/kafka-server-start.sh config/server.properties &
|
||||
PRIVATE_IP=`hostname -I | xargs`
|
||||
kafka_2.12-2.3.0/bin/kafka-topics.sh --create --bootstrap-server ${PRIVATE_IP}:9092 \
|
||||
--replication-factor 1 --partitions 1 --topic test
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
#!/bin/bash
|
||||
# SSH into all machines
|
||||
ssh -i ~/.ssh/joshua-IAM-keypair.pem hadoop@
|
||||
sudo python3 -m pip install --upgrade pip
|
||||
sudo python3 -m pip install pathos kafka-python
|
||||
|
||||
# SetUp Window: head node
|
||||
cd ~
|
||||
#sudo python3 -m pip install --upgrade pip
|
||||
#sudo python3 -m pip install pathos kafka-python
|
||||
sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh
|
||||
wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz
|
||||
tar -xzf kafka_2.12-2.3.0.tgz
|
||||
cd kafka_2.12-2.3.0
|
||||
bin/zookeeper-server-start.sh config/zookeeper.properties &
|
||||
bin/kafka-server-start.sh config/server.properties &
|
||||
# get ip
|
||||
bin/kafka-topics.sh --create --bootstrap-server 10.0.0.9:9092 --replication-factor 1 --partitions 1 --topic test
|
||||
# bin/kafka-topics.sh --list --bootstrap-server 10.0.0.9:9092
|
||||
|
||||
# Consume (Window): head node
|
||||
kafka_2.12-2.3.0/bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning
|
||||
# DELETE
|
||||
# bin/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic test
|
||||
|
||||
|
||||
# local
|
||||
python3 setup.py sdist bdist_wheel
|
||||
pip3 install dist/*.whl
|
||||
# SCP: all nodes
|
||||
# S3 duuhhhh
|
||||
# git clone specific file
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-239-171-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-3-230-154-170.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-18-232-52-219.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-70-210.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem dist/*.whl hadoop@ec2-34-231-243-101.compute-1.amazonaws.com:/home/hadoop/
|
||||
|
||||
|
||||
sudo python3 -m pip install *.whl
|
||||
|
||||
|
||||
# SCP head node
|
||||
# ToDo: zip build for cadCAD OR give py library after whl install
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/dist/distroduce.zip hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
scp -i ~/.ssh/joshua-IAM-keypair.pem distroduce/messaging_sim.py hadoop@ec2-18-206-12-181.compute-1.amazonaws.com:/home/hadoop/
|
||||
#scp -i ~/.ssh/joshua-IAM-keypair.pem dist/distroduce.zip hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/
|
||||
#scp -i ~/.ssh/joshua-IAM-keypair.pem examples/event_bench/main.py hadoop@ec2-18-232-54-233.compute-1.amazonaws.com:/home/hadoop/
|
||||
|
||||
|
||||
# Run Window: Head Node
|
||||
#spark-submit --master yarn messaging_app.py
|
||||
spark-submit --master yarn --py-files distroduce.zip main.py
|
||||
|
||||
# Cluster Config
|
||||
#[
|
||||
# {
|
||||
# "Classification": "spark-env",
|
||||
# "Configurations": [
|
||||
# {
|
||||
# "Classification": "export",
|
||||
# "ConfigurationProperties": {
|
||||
# "PYSPARK_PYTHON": "/usr/bin/python3",
|
||||
# "PYSPARK_DRIVER_PYTHON": "/usr/bin/python3"
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# },
|
||||
# {
|
||||
# "Classification": "spark-defaults",
|
||||
# "ConfigurationProperties": {
|
||||
# "spark.sql.execution.arrow.enabled": "true"
|
||||
# }
|
||||
# },
|
||||
# {
|
||||
# "Classification": "spark",
|
||||
# "Properties": {
|
||||
# "maximizeResourceAllocation": "true"
|
||||
# }
|
||||
# }
|
||||
#]
|
||||
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
aws emr create-cluster --auto-scaling-role EMR_AutoScaling_DefaultRole \
|
||||
--applications Name=Hadoop Name=Hive Name=Spark \
|
||||
--bootstrap-actions '[{"Path":"s3://insightde/emr/bootstraps/bootstrap.sh","Name":"bootstrap"}]' \
|
||||
--ebs-root-volume-size 10 \
|
||||
--ec2-attributes '{"KeyName":"joshua-IAM-keypair","InstanceProfile":"EMR_EC2_DefaultRole","SubnetId":"subnet-0034e615b047fd112","EmrManagedSlaveSecurityGroup":"sg-08e546ae27d86d6a3","EmrManagedMasterSecurityGroup":"sg-08e546ae27d86d6a3"}' \
|
||||
--service-role EMR_DefaultRole --release-label emr-5.26.0 --name 'test_cluster12d' \
|
||||
--instance-groups '[{"InstanceCount":1,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"MASTER","InstanceType":"m4.xlarge","Name":"Master - 1"},{"InstanceCount":5,"EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"SizeInGB":32,"VolumeType":"gp2"},"VolumesPerInstance":2}]},"InstanceGroupType":"CORE","InstanceType":"m4.xlarge","Name":"Core - 2"}]' --configurations '[{"Classification":"spark-env","Properties":{},"Configurations":[{"Classification":"export","Properties":{}}]},{"Classification":"spark-defaults","Properties":{}},{"Classification":"spark","Properties":{"maximizeResourceAllocation":"true"}}]' \
|
||||
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region us-east-1
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
spark-submit --master yarn --py-files distroduce.zip messaging_sim.py
|
||||
Binary file not shown.
|
|
@ -39,6 +39,6 @@ def distributed_produce(
|
|||
{'var_dict': t[4], 'states_lists': t[5], 'Ts': t[6], 'Ns': t[7]}
|
||||
) for t in val_params
|
||||
]
|
||||
results_rdd = spark_context.parallelize(val_params_kv).coalesce(35)
|
||||
results_rdd = spark_context.parallelize(val_params_kv).coalesce(1)
|
||||
|
||||
return list(results_rdd.map(lambda x: simulate(*x)).collect())
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from distroduce.executor.spark import distributed_produce
|
|||
from distroduce.action_policies import enter_action, message_actions, exit_action
|
||||
from distroduce.state_updates import send_message, count_messages, add_send_time, current_time
|
||||
|
||||
# State Update
|
||||
# State Updates
|
||||
variables = {
|
||||
'client_a': send_message('client_a'),
|
||||
'client_b': send_message('client_b'),
|
||||
|
|
@ -76,6 +76,7 @@ if __name__ == "__main__":
|
|||
policy_ops=[lambda a, b: a + b]
|
||||
)
|
||||
|
||||
# parmeterize localhost
|
||||
kafkaConfig = {'send_topic': 'test', 'producer_config': {'bootstrap_servers': 'localhost:9092', 'acks': 'all'}}
|
||||
dist_proc_ctx = ExecutionContext(context=exec_mode.dist_proc, method=distributed_produce, kafka_config=kafkaConfig)
|
||||
run = Executor(exec_context=dist_proc_ctx, configs=configs, spark_context=sc)
|
||||
|
|
|
|||
Loading…
Reference in New Issue