diff --git a/eval/run_xtimes_cifar.sh b/eval/run_xtimes_cifar.sh index 616572b50f3a4f8ac67afbe01df13731c564150d..1d466798bc5527b6edb25bad6b35d8f1fa5c6af8 100755 --- a/eval/run_xtimes_cifar.sh +++ b/eval/run_xtimes_cifar.sh @@ -52,7 +52,7 @@ m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print export PYTHONFAULTHANDLER=1 # Base configs for which the gird search is done -tests=("step_configs/config_cifar_sharing_dynamicGraphJwins30.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") +tests=("step_configs/config_cifar_dpsgdWithRWAsync4Jwins.ini" "step_configs/config_cifar_dpsgdWithRWAsync4Jwins30.ini") # ("step_configs/config_cifar_sharing.ini" "step_configs/config_cifar_dpsgdWithRWAsync1.ini" "step_configs/config_cifar_dpsgdWithRWAsync2.ini" "step_configs/config_cifar_dpsgdWithRWAsync4.ini") #"step_configs/config_cifar_partialmodel.ini" "step_configs/config_cifar_topkacc.ini" "step_configs/config_cifar_subsampling.ini" "step_configs/config_cifar_wavelet.ini") # Learning rates lr="0.01" # Batch size diff --git a/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins.ini b/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins.ini new file mode 100644 index 0000000000000000000000000000000000000000..c703433d09fdd3ffeb1502302311497649808732 --- /dev/null +++ b/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins.ini @@ -0,0 +1,47 @@ +[DATASET] +dataset_package = decentralizepy.datasets.CIFAR10 +dataset_class = CIFAR10 +model_class = LeNet +train_dir = /mnt/nfs/shared/CIFAR +test_dir = /mnt/nfs/shared/CIFAR +; python list of fractions below +sizes = +random_seed = 99 +partition_niid = True +shards = 1 + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 65 +full_epochs = False +batch_size = 8 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalk +comm_class = TCPRandomWalk +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi_check_history + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDPSGDAsync +sharing_class = JwinsDPSGDAsync +alpha=0.0833 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True diff --git a/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins30.ini b/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins30.ini new file mode 100644 index 0000000000000000000000000000000000000000..6dcdd1b27c5c8017c0a5396281f9542453ce16cb --- /dev/null +++ b/eval/step_configs/config_cifar_dpsgdWithRWAsync4Jwins30.ini @@ -0,0 +1,47 @@ +[DATASET] +dataset_package = decentralizepy.datasets.CIFAR10 +dataset_class = CIFAR10 +model_class = LeNet +train_dir = /mnt/nfs/shared/CIFAR +test_dir = /mnt/nfs/shared/CIFAR +; python list of fractions below +sizes = +random_seed = 99 +partition_niid = True +shards = 1 + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = SGD +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 65 +full_epochs = False +batch_size = 8 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCPRandomWalk +comm_class = TCPRandomWalk +addresses_filepath = ip_addr_6Machines.json +compression_package = decentralizepy.compression.Eliaszfplossy1 +compression_class = Eliaszfplossy1 +compress = True +sampler = equi_check_history + +[SHARING] +sharing_package = decentralizepy.sharing.JwinsDPSGDAsync +sharing_class = JwinsDPSGDAsync +alpha=0.25 +lower_bound=0.2 +metro_hastings=False +change_based_selection = True +wavelet=sym2 +level= None +accumulation = True +accumulate_averaging_changes = True