From 74346ad6e27e2d44c9233cdf70bad549f2e9faba Mon Sep 17 00:00:00 2001 From: Jeffrey Wigger <jeffrey.wigger@epfl.ch> Date: Wed, 15 Jun 2022 00:22:33 +0200 Subject: [PATCH] dynamic jwins 30 --- eval/run_xtimes_cifar.sh | 2 +- eval/run_xtimes_reddit_rws.sh | 4 +- ...nfig_cifar_sharing_dynamicGraphJwins30.ini | 47 +++++++++++++++++++ ...fig_reddit_sharing_dynamicGraphJwins30.ini | 45 ++++++++++++++++++ 4 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini create mode 100644 eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini diff --git a/eval/run_xtimes_cifar.sh b/eval/run_xtimes_cifar.sh index 42ebb1a..616572b 100755 --- a/eval/run_xtimes_cifar.sh +++ b/eval/run_xtimes_cifar.sh @@ -52,7 +52,7 @@ m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print export PYTHONFAULTHANDLER=1 # Base configs for which the gird search is done -tests=("step_configs/config_cifar_sharing_dynamicGraphJwins.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") +tests=("step_configs/config_cifar_sharing_dynamicGraphJwins30.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") # Learning rates lr="0.01" # Batch size diff --git a/eval/run_xtimes_reddit_rws.sh b/eval/run_xtimes_reddit_rws.sh index 2df3462..7b75e8e 100755 --- a/eval/run_xtimes_reddit_rws.sh +++ b/eval/run_xtimes_reddit_rws.sh @@ -42,7 +42,7 @@ graph=96_regular.edges config_file=~/tmp/config.ini procs_per_machine=16 machines=6 -global_epochs=160 +global_epochs=80 eval_file=testing.py log_level=DEBUG @@ -54,7 +54,7 @@ export PYTHONFAULTHANDLER=1 # Base configs for which the gird search is done # tests=("step_configs/config_reddit_sharing_topKdynamicGraph.ini") # tests=("step_configs/config_reddit_sharing_topKsharingasyncrw.ini" "step_configs/config_reddit_sharing_topKdpsgdrwasync.ini" "step_configs/config_reddit_sharing_topKdpsgdrw.ini") -tests=("step_configs/config_reddit_sharing_dynamicGraphJwins.ini") # ("step_configs/config_reddit_sharing_dpsgdrwasync0.ini") +tests=("step_configs/config_reddit_sharing_dynamicGraphJwins30.ini") # ("step_configs/config_reddit_sharing_dpsgdrwasync0.ini") # tests=("step_configs/config_reddit_sharing_dpsgdrw.ini" "step_configs/config_reddit_sharing_dpsgdrwasync.ini" "step_configs/config_reddit_sharing_sharingasyncrw.ini" "step_configs/config_reddit_sharing_sharingrw.ini") # Learning rates lr="1" diff --git a/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini b/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini new file mode 100644 index 0000000..fba0881 --- /dev/null +++ b/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini @@ -0,0 +1,47 @@ +[DATASET] +dataset_package = decentralizepy.datasets.CIFAR10 +dataset_class = CIFAR10 +model_class = LeNet +train_dir = /mnt/nfs/shared/CIFAR +test_dir = /mnt/nfs/shared/CIFAR +; python list of fractions below +sizes = +random_seed = 99 +partition_niid = True +shards = 1 + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 65 +full_epochs = False +batch_size = 8 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalkRouting +comm_class = TCPRandomWalkRouting +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDynamicGraph +sharing_class = JwinsDynamicGraph +alpha=0.25 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True \ No newline at end of file diff --git a/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini b/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini new file mode 100644 index 0000000..d80b17e --- /dev/null +++ b/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini @@ -0,0 +1,45 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Reddit +dataset_class = Reddit +random_seed = 97 +model_class = RNN +train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train +test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 47 +full_epochs = False +batch_size = 16 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalkRouting +comm_class = TCPRandomWalkRouting +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDynamicGraph +sharing_class = JwinsDynamicGraph +alpha=0.25 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True \ No newline at end of file -- GitLab