diff --git a/figures/d-cliques-mnist-scaling-fully-connected.png b/figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png similarity index 100% rename from figures/d-cliques-mnist-scaling-fully-connected.png rename to figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png diff --git a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png new file mode 100644 index 0000000000000000000000000000000000000000..317a815a7d9ad606cec5c5123ce345199043a8b6 Binary files /dev/null and b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png differ diff --git a/main.tex b/main.tex index 954455ee5f38da60b748fcb75f0c741c3f1c886c..70cb15ebe5d0399a364852a3d3592999a556b88f 100644 --- a/main.tex +++ b/main.tex @@ -253,34 +253,37 @@ We solve this problem by decoupling the gradient averaging from the weight avera \end{algorithmic} \end{algorithm} - - \section{Evaluation} \subsection{MNIST and Linear Model} \begin{figure}[htbp] \centering -% To regenerate the figure, from directory results/mnist -% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-10:15:24-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis training-loss --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Cliques (Ring) bsz=128' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=128' --legend 'upper right' --save-figure ../../figures/d-cliques-mnist-vs-1-node-training-loss.png -% \begin{subfigure}[b]{0.70\textwidth} -% \centering -% \includegraphics[width=\textwidth]{figures/d-cliques-mnist-vs-1-node-training-loss} -%\caption{\label{fig:d-cliques-mnist-training-loss} Training Loss} -% \end{subfigure} + % To regenerate the figure, from directory results/mnist % python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-18:14:35-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis test-accuracy --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Cliques (Ring) bsz=128' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=128' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-vs-1-node-test-accuracy.png - %\begin{subfigure}[b]{\textwidth} - %\centering + \centering \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} -%\caption{\label{fig:d-cliques-mnist-linear} D-Cliques with Linear Model on MNIST.} - %\end{subfigure} -\caption{\label{fig:d-cliques-mnist-linear} D-Cliques with Linear Model on MNIST.} -\end{figure} + \caption{\label{fig:d-cliques-mnist-linear-w-clique-averaging-w-initial-averaging} MNIST: D-Cliques Convergence Speed} + \end{figure} -With and without clique averaging. + + + \begin{figure}[htbp] + \centering + \begin{subfigure}[b]{0.48\textwidth} + \centering + %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} + \caption{\label{fig:mnist-init-clique-avg-effect-ring} Ring} + \end{subfigure} + \begin{subfigure}[b]{0.48\textwidth} + \centering + %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} + \caption{\label{fig:mnist-init-clique-avg-effect-fcc} Fully-Connected-Cliques} + \end{subfigure} +\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed.} +\end{figure} -TODO: Update figure with actual Clique-Ring results \subsection{CIFAR10 and Convolutional Model} @@ -331,17 +334,22 @@ Similar number of maximum hops but no or less clustering than D-Cliques (and no \subsection{Effect of Scaling} -% To regenerate the figure, from directory results/scaling -% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected.png --ymin 80 --add-min-max + \begin{figure}[htbp] \centering - \begin{subfigure}[b]{\textwidth} + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max + \begin{subfigure}[b]{0.48\textwidth} \centering - \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-scaling-fully-connected} + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz} \caption{Constant Batch-Size} \end{subfigure} - \begin{subfigure}[b]{\textwidth} + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max +\hfill + \begin{subfigure}[b]{0.48\textwidth} \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates} \caption{Constant Nb Updates per Epoch} \end{subfigure} \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} Scaling Behaviour of Fully-Connected D-Clique} diff --git a/results/mnist/no-clique-avg/clique-ring/experiments.sh b/results/mnist/no-clique-avg/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..1bf90baf4d089d037a91f926983f283e5bdf0cb4 --- /dev/null +++ b/results/mnist/no-clique-avg/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --initial-averaging + done; +done; + diff --git a/results/mnist/no-clique-avg/fully-connected-cliques/experiments.sh b/results/mnist/no-clique-avg/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..67f547fd215a9c8222017a9ef8b9c30926dbc953 --- /dev/null +++ b/results/mnist/no-clique-avg/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --initial-averaging + done; +done; + diff --git a/results/mnist/no-init-no-clique-avg/clique-ring/experiments.sh b/results/mnist/no-init-no-clique-avg/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..4ffcd299614b64aa594c88b00b0bef8ccf2f8b7a --- /dev/null +++ b/results/mnist/no-init-no-clique-avg/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear + done; +done; + diff --git a/results/mnist/no-init-no-clique-avg/fully-connected-cliques/experiments.sh b/results/mnist/no-init-no-clique-avg/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..a20fd4d0d412de9845f3ff0ebf454e5ce241aaf8 --- /dev/null +++ b/results/mnist/no-init-no-clique-avg/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear + done; +done; + diff --git a/results/mnist/no-init/clique-ring/experiments.sh b/results/mnist/no-init/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..acb22fdb234921b5dd8653295fdb99d4e16a88ec --- /dev/null +++ b/results/mnist/no-init/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient + done; +done; + diff --git a/results/mnist/no-init/fully-connected-cliques/experiments.sh b/results/mnist/no-init/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..1bb88fdd8839c591f2d59daf924e588db9c8e60f --- /dev/null +++ b/results/mnist/no-init/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 128 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient + done; +done; +