in prog
This commit is contained in:
parent
662046f34f
commit
71c334d5f6
39
README.md
39
README.md
|
|
@ -10,17 +10,32 @@ by BlockScience
|
||||||
through simulation, with support for Monte Carlo methods, A/B testing and parameter sweeping.
|
through simulation, with support for Monte Carlo methods, A/B testing and parameter sweeping.
|
||||||
|
|
||||||
# Getting Started
|
# Getting Started
|
||||||
## 1. Install cadCAD
|
## 1. Installation:
|
||||||
cadCAD requires [Python 3](https://www.python.org/downloads/)
|
Requires [Python 3](https://www.python.org/downloads/)
|
||||||
cadCAD can be installed using Python’s package manager, [pip](https://pypi.org/project/cadCAD/)
|
|
||||||
```bash
|
|
||||||
pip install cadCAD
|
|
||||||
```
|
|
||||||
## 2. Learn the basics
|
|
||||||
Check out our tutorials (available both as [Jupyter Notebooks](tutorials) and
|
|
||||||
[videos](https://www.youtube.com/watch?v=uJEiYHRWA9g&list=PLmWm8ksQq4YKtdRV-SoinhV6LbQMgX1we)) to familiarize yourself
|
|
||||||
with some system modelling concepts and cadCAD terminology. Alternatively, go straight to the
|
|
||||||
[documentation](documentation).
|
|
||||||
|
|
||||||
## 3. Connect
|
**Option A: Install Using [pip](https://pypi.org/project/cadCAD/)**
|
||||||
|
```bash
|
||||||
|
pip3 install cadCAD
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option B:** Build From Source
|
||||||
|
```
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
python3 setup.py sdist bdist_wheel
|
||||||
|
pip3 install dist/*.whl
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 2. Learn the basics
|
||||||
|
**Tutorials:** available both as [Jupyter Notebooks](tutorials) and [videos](https://www.youtube.com/watch?v=uJEiYHRWA9g&list=PLmWm8ksQq4YKtdRV-SoinhV6LbQMgX1we)
|
||||||
|
|
||||||
|
Familiarize yourself with some system modelling concepts and cadCAD terminology.
|
||||||
|
|
||||||
|
## 3. Documentation:
|
||||||
|
* [System Model Configuration](documentation/Simulation_Configuration.md)
|
||||||
|
* [System Simulation Execution](documentation/Simulation_Execution.md)
|
||||||
|
* [Policy Aggregation](documentation/Policy_Aggregation.md)
|
||||||
|
* [System Model Parameter Sweep](documentation/System_Model_Parameter_Sweep.md)
|
||||||
|
|
||||||
|
## 4. Connect
|
||||||
Find other cadCAD users at our [Discourse](https://community.cadcad.org/). We are a small but rapidly growing community.
|
Find other cadCAD users at our [Discourse](https://community.cadcad.org/). We are a small but rapidly growing community.
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,11 @@
|
||||||
name = "cadCAD"
|
name = "cadCAD"
|
||||||
configs = []
|
configs = []
|
||||||
|
|
||||||
|
print(r'''
|
||||||
|
__________ ____
|
||||||
|
________ __ _____/ ____/ | / __ \
|
||||||
|
/ ___/ __` / __ / / / /| | / / / /
|
||||||
|
/ /__/ /_/ / /_/ / /___/ ___ |/ /_/ /
|
||||||
|
\___/\__,_/\__,_/\____/_/ |_/_____/
|
||||||
|
by BlockScience
|
||||||
|
''')
|
||||||
|
|
@ -3,6 +3,7 @@ from typing import Dict, Callable, List, Tuple
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.core.frame import DataFrame
|
from pandas.core.frame import DataFrame
|
||||||
|
# import cloudpickle, pickle
|
||||||
|
|
||||||
from cadCAD import configs
|
from cadCAD import configs
|
||||||
from cadCAD.utils import key_filter
|
from cadCAD.utils import key_filter
|
||||||
|
|
@ -101,7 +102,10 @@ class Identity:
|
||||||
def state_identity(self, k: str) -> Callable:
|
def state_identity(self, k: str) -> Callable:
|
||||||
return lambda var_dict, sub_step, sL, s, _input: (k, s[k])
|
return lambda var_dict, sub_step, sL, s, _input: (k, s[k])
|
||||||
|
|
||||||
|
# state_identity = cloudpickle.dumps(state_identity)
|
||||||
|
|
||||||
def apply_identity_funcs(self, identity: Callable, df: DataFrame, cols: List[str]) -> List[DataFrame]:
|
def apply_identity_funcs(self, identity: Callable, df: DataFrame, cols: List[str]) -> List[DataFrame]:
|
||||||
|
# identity = pickle.loads(identity)
|
||||||
def fillna_with_id_func(identity, df, col):
|
def fillna_with_id_func(identity, df, col):
|
||||||
return df[[col]].fillna(value=identity(col))
|
return df[[col]].fillna(value=identity(col))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ ascii_art = r'''
|
||||||
by Joshua E. Jodesty
|
by Joshua E. Jodesty
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def distributed_produce(
|
def distributed_produce(
|
||||||
sc: SparkContext,
|
sc: SparkContext,
|
||||||
spark_run,
|
spark_run,
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from pprint import pprint
|
||||||
from typing import Callable, Dict, List, Any, Tuple
|
from typing import Callable, Dict, List, Any, Tuple
|
||||||
from pathos.multiprocessing import ProcessingPool as PPool
|
from pathos.multiprocessing import ProcessingPool as PPool
|
||||||
from pandas.core.frame import DataFrame
|
from pandas.core.frame import DataFrame
|
||||||
|
from pyspark.context import SparkContext
|
||||||
|
|
||||||
from cadCAD.utils import flatten
|
from cadCAD.utils import flatten
|
||||||
from cadCAD.configuration import Configuration, Processor
|
from cadCAD.configuration import Configuration, Processor
|
||||||
|
|
@ -17,6 +18,7 @@ EnvProcessesType = Dict[str, Callable]
|
||||||
class ExecutionMode:
|
class ExecutionMode:
|
||||||
single_proc = 'single_proc'
|
single_proc = 'single_proc'
|
||||||
multi_proc = 'multi_proc'
|
multi_proc = 'multi_proc'
|
||||||
|
dist_proc = 'dist_proc'
|
||||||
|
|
||||||
|
|
||||||
def single_proc_exec(
|
def single_proc_exec(
|
||||||
|
|
@ -52,7 +54,7 @@ def parallelize_simulations(
|
||||||
userIDs,
|
userIDs,
|
||||||
sessionIDs,
|
sessionIDs,
|
||||||
simulationIDs,
|
simulationIDs,
|
||||||
runIDs: List[int],
|
runIDs: List[int]
|
||||||
):
|
):
|
||||||
params = list(zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
params = list(zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||||
userIDs, sessionIDs, simulationIDs, runIDs))
|
userIDs, sessionIDs, simulationIDs, runIDs))
|
||||||
|
|
@ -60,6 +62,38 @@ def parallelize_simulations(
|
||||||
results = p.map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]), params)
|
results = p.map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]), params)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def distributed_simulations(
|
||||||
|
sc: SparkContext,
|
||||||
|
simulation_execs: List[Callable],
|
||||||
|
var_dict_list: List[VarDictType],
|
||||||
|
states_lists: List[StatesListsType],
|
||||||
|
configs_structs: List[ConfigsType],
|
||||||
|
env_processes_list: List[EnvProcessesType],
|
||||||
|
Ts: List[range],
|
||||||
|
Ns: List[int],
|
||||||
|
userIDs,
|
||||||
|
sessionIDs,
|
||||||
|
simulationIDs,
|
||||||
|
runIDs: List[int]
|
||||||
|
):
|
||||||
|
params = list(zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||||
|
userIDs, sessionIDs, simulationIDs, runIDs))
|
||||||
|
pprint(runIDs)
|
||||||
|
with PPool(len(configs_structs)) as p:
|
||||||
|
results = p.map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]), params)
|
||||||
|
return results
|
||||||
|
|
||||||
|
# _pickle.PicklingError: Can't pickle <function <lambda> at 0x1115149e0>: attribute lookup <lambda> on
|
||||||
|
# simulations.regression_tests.config1 failed
|
||||||
|
# simulation_execs, env_processes_list
|
||||||
|
# Configuration Layer: configs_structs
|
||||||
|
# AttributeError: Can't pickle local object 'Identity.state_identity.<locals>.<lambda>'
|
||||||
|
# pprint(configs_structs)
|
||||||
|
# sc.parallelize([configs_structs])
|
||||||
|
# exit()
|
||||||
|
# result = sc.parallelize(params) \
|
||||||
|
# .map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10])) \
|
||||||
|
# .collect()
|
||||||
|
|
||||||
class ExecutionContext:
|
class ExecutionContext:
|
||||||
def __init__(self, context: str = ExecutionMode.multi_proc) -> None:
|
def __init__(self, context: str = ExecutionMode.multi_proc) -> None:
|
||||||
|
|
@ -70,10 +104,13 @@ class ExecutionContext:
|
||||||
self.method = single_proc_exec
|
self.method = single_proc_exec
|
||||||
elif context == 'multi_proc':
|
elif context == 'multi_proc':
|
||||||
self.method = parallelize_simulations
|
self.method = parallelize_simulations
|
||||||
|
elif context == 'dist_proc':
|
||||||
|
self.method = distributed_simulations
|
||||||
|
|
||||||
|
|
||||||
class Executor:
|
class Executor:
|
||||||
def __init__(self, exec_context: ExecutionContext, configs: List[Configuration]) -> None:
|
def __init__(self, exec_context: ExecutionContext, configs: List[Configuration], spark_context: SparkContext = None) -> None:
|
||||||
|
self.sc = spark_context
|
||||||
self.SimExecutor = SimExecutor
|
self.SimExecutor = SimExecutor
|
||||||
self.exec_method = exec_context.method
|
self.exec_method = exec_context.method
|
||||||
self.exec_context = exec_context.name
|
self.exec_context = exec_context.name
|
||||||
|
|
@ -83,14 +120,6 @@ class Executor:
|
||||||
config_proc = Processor()
|
config_proc = Processor()
|
||||||
create_tensor_field = TensorFieldReport(config_proc).create_tensor_field
|
create_tensor_field = TensorFieldReport(config_proc).create_tensor_field
|
||||||
|
|
||||||
print(r'''
|
|
||||||
__________ ____
|
|
||||||
________ __ _____/ ____/ | / __ \
|
|
||||||
/ ___/ __` / __ / / / /| | / / / /
|
|
||||||
/ /__/ /_/ / /_/ / /___/ ___ |/ /_/ /
|
|
||||||
\___/\__,_/\__,_/\____/_/ |_/_____/
|
|
||||||
by BlockScience
|
|
||||||
''')
|
|
||||||
print(f'Execution Mode: {self.exec_context + ": " + str(self.configs)}')
|
print(f'Execution Mode: {self.exec_context + ": " + str(self.configs)}')
|
||||||
print(f'Configurations: {self.configs}')
|
print(f'Configurations: {self.configs}')
|
||||||
|
|
||||||
|
|
@ -141,5 +170,16 @@ class Executor:
|
||||||
results.append((flatten(result), create_tensor_field(partial_state_updates, ep)))
|
results.append((flatten(result), create_tensor_field(partial_state_updates, ep)))
|
||||||
|
|
||||||
final_result = results
|
final_result = results
|
||||||
|
elif self.exec_context == ExecutionMode.dist_proc:
|
||||||
|
# if len(self.configs) > 1:
|
||||||
|
simulations = self.exec_method(
|
||||||
|
self.sc,
|
||||||
|
simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||||
|
userIDs, sessionIDs, simulationIDs, runIDs
|
||||||
|
)
|
||||||
|
results = []
|
||||||
|
for result, partial_state_updates, ep in list(zip(simulations, partial_state_updates, eps)):
|
||||||
|
results.append((flatten(result), create_tensor_field(partial_state_updates, ep)))
|
||||||
|
final_result = None
|
||||||
|
|
||||||
return final_result
|
return final_result
|
||||||
|
|
|
||||||
|
|
@ -5,4 +5,5 @@ fn
|
||||||
tabulate
|
tabulate
|
||||||
funcy
|
funcy
|
||||||
pyspark
|
pyspark
|
||||||
kafka-python
|
kafka-python
|
||||||
|
cloudpickle
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
from simulations.test_executions.spark.session import spark_context as sc
|
||||||
|
from simulations.regression_tests import config1, config2
|
||||||
|
|
||||||
|
from cadCAD.engine import ExecutionMode, ExecutionContext, Executor
|
||||||
|
from cadCAD.utils import arrange_cols
|
||||||
|
from cadCAD import configs
|
||||||
|
|
||||||
|
exec_mode = ExecutionMode()
|
||||||
|
|
||||||
|
print("Simulation Execution: Distributed Execution")
|
||||||
|
dist_proc_ctx = ExecutionContext(context=exec_mode.dist_proc)
|
||||||
|
run = Executor(exec_context=dist_proc_ctx, configs=configs, spark_context=sc)
|
||||||
|
# pprint(dist_proc_ctx)
|
||||||
|
|
||||||
|
# print(configs)
|
||||||
|
i = 0
|
||||||
|
config_names = ['config1', 'config2']
|
||||||
|
for raw_result, tensor_field in run.execute():
|
||||||
|
result = arrange_cols(pd.DataFrame(raw_result), False)
|
||||||
|
print()
|
||||||
|
# print(f"Tensor Field: {config_names[i]}")
|
||||||
|
print(tabulate(tensor_field, headers='keys', tablefmt='psql'))
|
||||||
|
print("Output:")
|
||||||
|
print(tabulate(result, headers='keys', tablefmt='psql'))
|
||||||
|
print()
|
||||||
|
i += 1
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from pyspark.sql import SparkSession
|
||||||
|
from pyspark.context import SparkContext
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ['PYSPARK_PYTHON'] = '/usr/local/bin/python3'
|
||||||
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/local/bin/python3'
|
||||||
|
|
||||||
|
spark = SparkSession\
|
||||||
|
.builder\
|
||||||
|
.appName("distroduce")\
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
spark_context: SparkContext = spark.sparkContext
|
||||||
|
print(f"Spark UI: {spark_context.uiWebUrl}")
|
||||||
|
print()
|
||||||
Loading…
Reference in New Issue