diff --git a/dist/cadCAD-0.0.2-py3-none-any.whl b/dist/cadCAD-0.0.2-py3-none-any.whl index 758253f..1ae1e7d 100644 Binary files a/dist/cadCAD-0.0.2-py3-none-any.whl and b/dist/cadCAD-0.0.2-py3-none-any.whl differ diff --git a/dist/cadCAD-0.0.2.tar.gz b/dist/cadCAD-0.0.2.tar.gz index 86a900d..a66ed02 100644 Binary files a/dist/cadCAD-0.0.2.tar.gz and b/dist/cadCAD-0.0.2.tar.gz differ diff --git a/distroduce/bash/bootstrap.sh b/distroduce/bash/distroduce.sh similarity index 51% rename from distroduce/bash/bootstrap.sh rename to distroduce/bash/distroduce.sh index 6d900d6..50929af 100644 --- a/distroduce/bash/bootstrap.sh +++ b/distroduce/bash/distroduce.sh @@ -4,8 +4,7 @@ yes | sudo python3 -m pip install --upgrade pip yes | sudo python3 -m pip install pathos kafka-python wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/dist/cadCAD-0.0.2-py3-none-any.whl yes | sudo python3 -m pip install cadCAD-0.0.2-py3-none-any.whl -wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/dist/distroduce.zip -wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/messaging_sim.py + # check for master node PRIVATE_IP=localhost @@ -13,12 +12,15 @@ IS_MASTER=false if grep -i isMaster /mnt/var/lib/info/instance.json | grep -i true; then IS_MASTER=true + wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/dist/distroduce.zip + wget https://raw.githubusercontent.com/JEJodesty/cadCAD/dev/distroduce/messaging_sim.py sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh wget http://apache.spinellicreations.com/kafka/2.3.0/kafka_2.12-2.3.0.tgz tar -xzf kafka_2.12-2.3.0.tgz - kafka_2.12-2.3.0/bin/zookeeper-server-start.sh config/zookeeper.properties & - kafka_2.12-2.3.0/bin/kafka-server-start.sh config/server.properties & - PRIVATE_IP=`hostname -I | xargs` - kafka_2.12-2.3.0/bin/kafka-topics.sh --create --bootstrap-server ${PRIVATE_IP}:9092 \ - --replication-factor 1 --partitions 1 --topic test + cd kafka_2.12-2.3.0 + bin/zookeeper-server-start.sh config/zookeeper.properties & + bin/kafka-server-start.sh config/server.properties & + bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic test +# PRIVATE_IP=`hostname -I | xargs` +# bin/kafka-topics.sh --create --bootstrap-server ${PRIVATE_IP}:9092 --replication-factor 1 --partitions 1 --topic test fi diff --git a/distroduce/bash/gen_bootstrap.sh b/distroduce/bash/gen_bootstrap.sh index 03471d7..5a0a9cd 100644 --- a/distroduce/bash/gen_bootstrap.sh +++ b/distroduce/bash/gen_bootstrap.sh @@ -19,7 +19,7 @@ bin/kafka-topics.sh --create --bootstrap-server 10.0.0.9:9092 --replication-fact # bin/kafka-topics.sh --list --bootstrap-server 10.0.0.9:9092 # Consume (Window): head node -kafka_2.12-2.3.0/bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning +bin/kafka-console-consumer.sh --bootstrap-server 10.0.0.9:9092 --topic test --from-beginning # DELETE # bin/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic test diff --git a/distroduce/bash/spark.json b/distroduce/bash/spark.json new file mode 100644 index 0000000..affb901 --- /dev/null +++ b/distroduce/bash/spark.json @@ -0,0 +1,26 @@ +[ + { + "Classification": "spark-env", + "Configurations": [ + { + "Classification": "export", + "ConfigurationProperties": { + "PYSPARK_PYTHON": "/usr/bin/python3", + "PYSPARK_DRIVER_PYTHON": "/usr/bin/python3" + } + } + ] + }, + { + "Classification": "spark-defaults", + "ConfigurationProperties": { + "spark.sql.execution.arrow.enabled": "true" + } + }, + { + "Classification": "spark", + "Properties": { + "maximizeResourceAllocation": "true" + } + } +] diff --git a/distroduce/bash/spark_submit.sh b/distroduce/bash/spark_submit.sh index 0dbe39c..b9bd344 100644 --- a/distroduce/bash/spark_submit.sh +++ b/distroduce/bash/spark_submit.sh @@ -1,2 +1,3 @@ #!/bin/bash +sudo sed -i -e '$a\export PYSPARK_PYTHON=/usr/bin/python3' /etc/spark/conf/spark-env.sh spark-submit --master yarn --py-files distroduce.zip messaging_sim.py \ No newline at end of file diff --git a/distroduce/messaging_sim.py b/distroduce/messaging_sim.py index 4134a8e..68ad8bc 100644 --- a/distroduce/messaging_sim.py +++ b/distroduce/messaging_sim.py @@ -76,8 +76,8 @@ if __name__ == "__main__": policy_ops=[lambda a, b: a + b] ) - # parmeterize localhost - kafkaConfig = {'send_topic': 'test', 'producer_config': {'bootstrap_servers': 'localhost:9092', 'acks': 'all'}} + # parmeterize localhost, PRIVATE_IP=`hostname -I | xargs` + kafkaConfig = {'send_topic': 'test', 'producer_config': {'bootstrap_servers': '10.0.0.7:9092', 'acks': 'all'}} dist_proc_ctx = ExecutionContext(context=exec_mode.dist_proc, method=distributed_produce, kafka_config=kafkaConfig) run = Executor(exec_context=dist_proc_ctx, configs=configs, spark_context=sc)