distroduce integration pt 1 in prog
This commit is contained in:
parent
71c334d5f6
commit
db2bc8c8b8
|
|
@ -1,8 +1,11 @@
|
|||
from pprint import pprint
|
||||
from typing import Callable, Dict, List, Any, Tuple
|
||||
from pathos.multiprocessing import ProcessingPool as PPool
|
||||
from pathos.multiprocessing import ThreadPool as TPool
|
||||
from pandas.core.frame import DataFrame
|
||||
from pyspark.context import SparkContext
|
||||
from pyspark import cloudpickle
|
||||
import pickle
|
||||
|
||||
from cadCAD.utils import flatten
|
||||
from cadCAD.configuration import Configuration, Processor
|
||||
|
|
@ -63,7 +66,6 @@ def parallelize_simulations(
|
|||
return results
|
||||
|
||||
def distributed_simulations(
|
||||
sc: SparkContext,
|
||||
simulation_execs: List[Callable],
|
||||
var_dict_list: List[VarDictType],
|
||||
states_lists: List[StatesListsType],
|
||||
|
|
@ -74,13 +76,99 @@ def distributed_simulations(
|
|||
userIDs,
|
||||
sessionIDs,
|
||||
simulationIDs,
|
||||
runIDs: List[int]
|
||||
runIDs: List[int],
|
||||
sc: SparkContext = None
|
||||
):
|
||||
params = list(zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||
userIDs, sessionIDs, simulationIDs, runIDs))
|
||||
pprint(runIDs)
|
||||
with PPool(len(configs_structs)) as p:
|
||||
results = p.map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]), params)
|
||||
# params = list(zip(simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||
# userIDs, sessionIDs, simulationIDs, runIDs))
|
||||
# simulation_execs, configs_structs, env_processes_list
|
||||
|
||||
func_params = list(zip(userIDs, sessionIDs, simulationIDs, runIDs, simulation_execs, configs_structs, env_processes_list))
|
||||
# val_params = list(zip(userIDs, sessionIDs, simulationIDs, runIDs, var_dict_list, states_lists, Ts, Ns))
|
||||
# vals_rdd = sc.parallelize(val_params).map(
|
||||
# lambda t: {'user_id': t[0], 'session_id': t[1], 'simulation_id': t[2], 'run_id': t[3],
|
||||
# 'var_dict': t[4], 'states_lists': t[5], 'Ts': t[6], 'Ns': t[7]}
|
||||
# )
|
||||
# vals_rdd.foreach(print)
|
||||
|
||||
# func_params_dicts = dict(map(
|
||||
# lambda t: {'user_id': t[0], 'session_id': t[1], 'simulation_id': t[2], 'run_id': t[3],
|
||||
# 'executions': {'sim_exec': t[4], 'configs': t[5], 'env_proc': t[6]}},
|
||||
# func_params
|
||||
# ))
|
||||
|
||||
|
||||
|
||||
# func_params_dicts = dict(list(map(
|
||||
# lambda t: {'key': (t[0], t[1], t[2], t[3]),
|
||||
# 'executions': {'sim_exec': t[4], 'configs': t[5], 'env_proc': t[6]}},
|
||||
# func_params
|
||||
# )))
|
||||
|
||||
func_keys = [(t[0], t[1], t[2], t[3]) for t in func_params]
|
||||
func_values = [(t[4], t[5], t[6], t[7]) for t in func_params]
|
||||
|
||||
d = {}
|
||||
for key in func_keys:
|
||||
d[key] =
|
||||
|
||||
# func_params_dicts = dict([{(t[0], t[1], t[2], t[3]): {'sim_exec': t[4], 'configs': t[5], 'env_proc': t[6]}} for t in func_params])
|
||||
|
||||
pprint(func_params_dicts)
|
||||
|
||||
# val_params = list(zip(userIDs, sessionIDs, simulationIDs, runIDs, var_dict_list, states_lists, Ts, Ns))
|
||||
# vals_rdd = sc.parallelize(val_params).map(
|
||||
# lambda t: {'key': (t[0], t[1], t[2], t[3]),
|
||||
# 'var_dict': t[4], 'states_lists': t[5], 'Ts': t[6], 'Ns': t[7]}
|
||||
# )
|
||||
|
||||
|
||||
# val_params_dicts = list(map(
|
||||
# lambda t: {'user_id': t[0], 'session_id': t[1], 'simulation_id': t[2], 'run_id': t[3],
|
||||
# 'var_dict': t[4], 'states_lists': t[5], 'Ts': t[6], 'Ns': t[7]},
|
||||
# val_params
|
||||
# ))
|
||||
# pprint(val_params_dicts)
|
||||
# print("Execution: simulation_execs")
|
||||
# print("Configuation: configs_structs, env_processes_list")
|
||||
# print()
|
||||
|
||||
exit()
|
||||
|
||||
# Pickle send to worker
|
||||
# atom = params[0]
|
||||
# # pprint(atom)
|
||||
# sim_exec = atom[0]
|
||||
# configs_struct = atom[3]
|
||||
# env_processes = atom[4]
|
||||
# pickled_sim_exec = cloudpickle.dumps(sim_exec)
|
||||
# pickled_configs = cloudpickle.dumps(configs_struct)
|
||||
# pickled_env_procs = cloudpickle.dumps(env_processes)
|
||||
#
|
||||
# def cucumber(pickled):
|
||||
# unpickled = pickle.loads(pickled)
|
||||
# return unpickled
|
||||
#
|
||||
# sc.parallelize([pickled_sim_exec]).map(cucumber).collect()
|
||||
|
||||
|
||||
|
||||
# configs_structs, simulation_execs, env_processes_list
|
||||
results = [t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10], sc) for t in params]
|
||||
|
||||
# def pickle_magic(cucumber):
|
||||
# cucumber[]
|
||||
# cloudpickle.dump()
|
||||
# cucumber = pickle.loads(pickled)
|
||||
|
||||
|
||||
|
||||
pickled_params = cloudpickle.dump(params)
|
||||
results = sc.parallelize(pickled_params).map(
|
||||
lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10])
|
||||
).collect()
|
||||
# with PPool(len(configs_structs)) as p:
|
||||
# results = p.map(lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10], sc), params)
|
||||
return results
|
||||
|
||||
# _pickle.PicklingError: Can't pickle <function <lambda> at 0x1115149e0>: attribute lookup <lambda> on
|
||||
|
|
@ -168,18 +256,16 @@ class Executor:
|
|||
results = []
|
||||
for result, partial_state_updates, ep in list(zip(simulations, partial_state_updates, eps)):
|
||||
results.append((flatten(result), create_tensor_field(partial_state_updates, ep)))
|
||||
|
||||
final_result = results
|
||||
elif self.exec_context == ExecutionMode.dist_proc:
|
||||
# if len(self.configs) > 1:
|
||||
simulations = self.exec_method(
|
||||
self.sc,
|
||||
simulation_execs, var_dict_list, states_lists, configs_structs, env_processes_list, Ts, Ns,
|
||||
userIDs, sessionIDs, simulationIDs, runIDs
|
||||
userIDs, sessionIDs, simulationIDs, runIDs, self.sc
|
||||
)
|
||||
results = []
|
||||
for result, partial_state_updates, ep in list(zip(simulations, partial_state_updates, eps)):
|
||||
results.append((flatten(result), create_tensor_field(partial_state_updates, ep)))
|
||||
final_result = None
|
||||
final_result = results
|
||||
|
||||
return final_result
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
from pprint import pprint
|
||||
from typing import Any, Callable, Dict, List, Tuple
|
||||
from pathos.pools import ThreadPool as TPool
|
||||
from copy import deepcopy
|
||||
from functools import reduce
|
||||
|
||||
from pyspark import SparkContext
|
||||
|
||||
from cadCAD.engine.utils import engine_exception
|
||||
from cadCAD.utils import flatten
|
||||
|
||||
|
|
@ -206,7 +209,8 @@ class Executor:
|
|||
user_id,
|
||||
session_id,
|
||||
simulation_id,
|
||||
run_id
|
||||
run_id,
|
||||
sc: SparkContext = None
|
||||
) -> List[List[Dict[str, Any]]]:
|
||||
|
||||
def execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run) -> List[Dict[str, Any]]:
|
||||
|
|
@ -226,16 +230,33 @@ class Executor:
|
|||
del states_list_copy
|
||||
|
||||
return first_timestep_per_run
|
||||
#
|
||||
# pprint()
|
||||
#
|
||||
# exit()
|
||||
|
||||
pipe_run = flatten(
|
||||
[execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run) for run in range(runs)]
|
||||
)
|
||||
|
||||
return pipe_run
|
||||
|
||||
# ******** Only has one run
|
||||
# pipe_run: List[List[Dict[str, Any]]] = flatten(
|
||||
# sc.parallelize(list(range(runs))).map(
|
||||
# lambda run: execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run)
|
||||
# ).collect()
|
||||
# )
|
||||
|
||||
# print(type(run_id))
|
||||
# print(runs)
|
||||
tp = TPool(runs)
|
||||
pipe_run: List[List[Dict[str, Any]]] = flatten(
|
||||
tp.map(
|
||||
lambda run: execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run),
|
||||
list(range(runs))
|
||||
)
|
||||
)
|
||||
|
||||
tp.clear()
|
||||
return pipe_run
|
||||
# tp = TPool(runs)
|
||||
# pipe_run: List[List[Dict[str, Any]]] = flatten(
|
||||
# tp.map(
|
||||
# lambda run: execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run),
|
||||
# list(range(runs))
|
||||
# )
|
||||
# )
|
||||
#
|
||||
# tp.clear()
|
||||
r
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from pprint import pprint
|
|||
import pandas as pd
|
||||
from tabulate import tabulate
|
||||
|
||||
from simulations.test_executions.spark.session import spark_context as sc
|
||||
from simulations.distributed.spark.session import spark_context as sc
|
||||
from simulations.regression_tests import config1, config2
|
||||
|
||||
from cadCAD.engine import ExecutionMode, ExecutionContext, Executor
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
import pandas as pd
|
||||
from tabulate import tabulate
|
||||
# The following imports NEED to be in the exact order
|
||||
from cadCAD.engine import ExecutionMode, ExecutionContext, Executor
|
||||
from cadCAD.utils import arrange_cols
|
||||
from simulations.regression_tests import config1, config2
|
||||
from cadCAD import configs
|
||||
|
||||
from simulations.distributed.spark.session import spark
|
||||
|
||||
exec_mode = ExecutionMode()
|
||||
|
||||
print("Simulation Execution: Concurrent Execution")
|
||||
multi_proc_ctx = ExecutionContext(context=exec_mode.multi_proc)
|
||||
run = Executor(exec_context=multi_proc_ctx, configs=configs)
|
||||
|
||||
# print(configs)
|
||||
tf = None
|
||||
i = 0
|
||||
config_names = ['config1', 'config2']
|
||||
for raw_result, tensor_field in run.execute():
|
||||
result = arrange_cols(pd.DataFrame(raw_result), False)
|
||||
print()
|
||||
# print(f"Tensor Field: {config_names[i]}")
|
||||
tf = tensor_field
|
||||
# print(tabulate(tensor_field, headers='keys', tablefmt='psql'))
|
||||
print("Output:")
|
||||
# print(tabulate(result, headers='keys', tablefmt='psql'))
|
||||
print()
|
||||
i += 1
|
||||
|
||||
spark.conf.set("spark.sql.execution.arrow.enabled", "true")
|
||||
|
||||
df = spark.createDataFrame(tf)
|
||||
|
||||
df.show()
|
||||
Loading…
Reference in New Issue