diff --git a/eval/run_xtimes_cifar.sh b/eval/run_xtimes_cifar.sh index 42ebb1a3814dfa7e2adc16ef76584f56cae7ccfc..616572b50f3a4f8ac67afbe01df13731c564150d 100755 --- a/eval/run_xtimes_cifar.sh +++ b/eval/run_xtimes_cifar.sh @@ -52,7 +52,7 @@ m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print export PYTHONFAULTHANDLER=1 # Base configs for which the gird search is done -tests=("step_configs/config_cifar_sharing_dynamicGraphJwins.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") +tests=("step_configs/config_cifar_sharing_dynamicGraphJwins30.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") # Learning rates lr="0.01" # Batch size diff --git a/eval/run_xtimes_reddit_rws.sh b/eval/run_xtimes_reddit_rws.sh index 2df346266234af80aa5774c6924dd1f7c8b747b8..7b75e8e189e091634d576168d5146b257562cc65 100755 --- a/eval/run_xtimes_reddit_rws.sh +++ b/eval/run_xtimes_reddit_rws.sh @@ -42,7 +42,7 @@ graph=96_regular.edges config_file=~/tmp/config.ini procs_per_machine=16 machines=6 -global_epochs=160 +global_epochs=80 eval_file=testing.py log_level=DEBUG @@ -54,7 +54,7 @@ export PYTHONFAULTHANDLER=1 # Base configs for which the gird search is done # tests=("step_configs/config_reddit_sharing_topKdynamicGraph.ini") # tests=("step_configs/config_reddit_sharing_topKsharingasyncrw.ini" "step_configs/config_reddit_sharing_topKdpsgdrwasync.ini" "step_configs/config_reddit_sharing_topKdpsgdrw.ini") -tests=("step_configs/config_reddit_sharing_dynamicGraphJwins.ini") # ("step_configs/config_reddit_sharing_dpsgdrwasync0.ini") +tests=("step_configs/config_reddit_sharing_dynamicGraphJwins30.ini") # ("step_configs/config_reddit_sharing_dpsgdrwasync0.ini") # tests=("step_configs/config_reddit_sharing_dpsgdrw.ini" "step_configs/config_reddit_sharing_dpsgdrwasync.ini" "step_configs/config_reddit_sharing_sharingasyncrw.ini" "step_configs/config_reddit_sharing_sharingrw.ini") # Learning rates lr="1" diff --git a/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini b/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini new file mode 100644 index 0000000000000000000000000000000000000000..fba08819e4712a6c01114114729ab3f3495dd52c --- /dev/null +++ b/eval/step_configs/config_cifar_sharing_dynamicGraphJwins30.ini @@ -0,0 +1,47 @@ +[DATASET] +dataset_package = decentralizepy.datasets.CIFAR10 +dataset_class = CIFAR10 +model_class = LeNet +train_dir = /mnt/nfs/shared/CIFAR +test_dir = /mnt/nfs/shared/CIFAR +; python list of fractions below +sizes = +random_seed = 99 +partition_niid = True +shards = 1 + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 65 +full_epochs = False +batch_size = 8 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalkRouting +comm_class = TCPRandomWalkRouting +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDynamicGraph +sharing_class = JwinsDynamicGraph +alpha=0.25 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True \ No newline at end of file diff --git a/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini b/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini new file mode 100644 index 0000000000000000000000000000000000000000..d80b17e958b44d1c50269af7cca83a58ea8b2447 --- /dev/null +++ b/eval/step_configs/config_reddit_sharing_dynamicGraphJwins30.ini @@ -0,0 +1,45 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Reddit +dataset_class = Reddit +random_seed = 97 +model_class = RNN +train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train +test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 47 +full_epochs = False +batch_size = 16 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalkRouting +comm_class = TCPRandomWalkRouting +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDynamicGraph +sharing_class = JwinsDynamicGraph +alpha=0.25 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True \ No newline at end of file