diff --git a/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png b/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..fa8184c89057bc1ea8b84870e7f4031b1598d0cd Binary files /dev/null and b/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png differ diff --git a/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png b/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..d7f1f0b0525ae381f214c9b35cad374a060b3034 Binary files /dev/null and b/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png differ diff --git a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png index 317a815a7d9ad606cec5c5123ce345199043a8b6..0db2804de41c6328a00fdb77251723eaf0725fd1 100644 Binary files a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png and b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png differ diff --git a/main.tex b/main.tex index 70cb15ebe5d0399a364852a3d3592999a556b88f..4b1b62a38a0a7deef8fd50fc104fa9c31c910106 100644 --- a/main.tex +++ b/main.tex @@ -266,24 +266,68 @@ We solve this problem by decoupling the gradient averaging from the weight avera \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} \caption{\label{fig:d-cliques-mnist-linear-w-clique-averaging-w-initial-averaging} MNIST: D-Cliques Convergence Speed} \end{figure} - - + \begin{figure}[htbp] + \centering + % To regenerate the figure, from directory results/mnist + % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-18:14:35-CET no-clique-avg/clique-ring/all/2021-03-12-10:40:37-CET no-init/clique-ring/all/2021-03-12-10:40:11-CET no-init-no-clique-avg/clique-ring/all/2021-03-12-10:41:03-CET --add-min-max --yaxis test-accuracy --labels ' 'with uniform init., with clique avg.' 'with uniform init., without clique avg.' 'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} + \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} Ring} + \end{subfigure} + % To regenerate the figure, from directory results/mnist + %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:26-CET no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --labels 'with uniform init., with clique avg.' 'with uniform init., without clique avg.' 'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} + \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected} + \end{subfigure} +\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)} +\end{figure} + \begin{figure}[htbp] \centering \begin{subfigure}[b]{0.48\textwidth} \centering %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} - \caption{\label{fig:mnist-init-clique-avg-effect-ring} Ring} + \caption{\label{fig:mnist-clique-clustering-ring} Ring} \end{subfigure} \begin{subfigure}[b]{0.48\textwidth} \centering %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} - \caption{\label{fig:mnist-init-clique-avg-effect-fcc} Fully-Connected-Cliques} + \caption{\label{fig:mnist-clique-clustering-fcc} Fully-Connected} \end{subfigure} -\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed.} +\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clustering within Cliques on Convergence Speed.} \end{figure} + \begin{figure}[htbp] + \centering + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz} + \caption{Constant Batch-Size} + \end{subfigure} + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-12-09:13:28-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max +\hfill + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates} + \caption{Constant Nb Updates per Epoch} + \end{subfigure} + \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: Scaling Behaviour of Fully-Connected D-Clique} + \end{figure} + +Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected. + +(Smallworld?) + +Robustness to not having fully-connected cliques (static and dynamic subsets). + +\clearpage \subsection{CIFAR10 and Convolutional Model} @@ -332,35 +376,6 @@ Similar number of maximum hops but no or less clustering than D-Cliques (and no %\item Random Small-World Graph \end{itemize} -\subsection{Effect of Scaling} - - - \begin{figure}[htbp] - \centering - % To regenerate the figure, from directory results/scaling -% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max - \begin{subfigure}[b]{0.48\textwidth} - \centering - \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz} - \caption{Constant Batch-Size} - \end{subfigure} - % To regenerate the figure, from directory results/scaling -% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max -\hfill - \begin{subfigure}[b]{0.48\textwidth} - \centering - \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates} - \caption{Constant Nb Updates per Epoch} - \end{subfigure} - \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} Scaling Behaviour of Fully-Connected D-Clique} - \end{figure} - -Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected. - -(Smallworld?) - -Robustness to not having fully-connected cliques (static and dynamic subsets). - \section{Related Work} D2: numerically unstable when $W_{ij}$ rows and columns do not exactly sum to $1$, as the small differences are amplified in a positive feedback loop. More work is therefore required on the algorithm to make it usable with a wider variety of topologies. In comparison, D-cliques do not modify the SGD algorithm and instead simply removes some neighbour contributions that would otherwise bias the direction of the gradient. D-Cliques with D-PSGD are therefore as tolerant to ill-conditioned $W_{ij}$ matrices as regular D-PSGD in an IID setting. diff --git a/results/cifar10/no-clique-avg/clique-ring/experiments.sh b/results/cifar10/no-clique-avg/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..db6859cdacdeb50e36d403686350a2edc34c7051 --- /dev/null +++ b/results/cifar10/no-clique-avg/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/cifar10/no-clique-avg/fully-connected-cliques/experiments.sh b/results/cifar10/no-clique-avg/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..2ec8e2ba9bc3fbb845de04ef65cc0e9b5b5c1509 --- /dev/null +++ b/results/cifar10/no-clique-avg/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/cifar10/no-init-no-clique-avg/clique-ring/experiments.sh b/results/cifar10/no-init-no-clique-avg/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..1360135517a3a8f5066d62e5aa9fab2f631f9023 --- /dev/null +++ b/results/cifar10/no-init-no-clique-avg/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/cifar10/no-init-no-clique-avg/fully-connected-cliques/experiments.sh b/results/cifar10/no-init-no-clique-avg/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..caa6afefa6182724278d75b8da372252e1fb4d80 --- /dev/null +++ b/results/cifar10/no-init-no-clique-avg/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/cifar10/no-init/clique-ring/experiments.sh b/results/cifar10/no-init/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..721ad43862392e31232a580d8974ae3ace7f2344 --- /dev/null +++ b/results/cifar10/no-init/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/cifar10/no-init/fully-connected-cliques/experiments.sh b/results/cifar10/no-init/fully-connected-cliques/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..1e1c0272582d052fcf66facff05f6977e759d1e0 --- /dev/null +++ b/results/cifar10/no-init/fully-connected-cliques/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 20 + ' +LRS=' + 0.002 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5 + done; +done; + diff --git a/results/scaling/10/mnist/clique-ring/experiments.sh b/results/scaling/10/mnist/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..de6dc66898e5bdf827fb17d6d36a3e2e86baf295 --- /dev/null +++ b/results/scaling/10/mnist/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 1280 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 10 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 1 1 1 1 1 1 1 1 1 1 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging + done; +done; + diff --git a/results/scaling/1000/mnist/clique-ring/experiments.sh b/results/scaling/1000/mnist/clique-ring/experiments.sh new file mode 100755 index 0000000000000000000000000000000000000000..7bd359b4da0a793fe4b3d27188bd2a43709e6f2a --- /dev/null +++ b/results/scaling/1000/mnist/clique-ring/experiments.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS +BSZS=' + 13 + ' +LRS=' + 0.1 + ' +for BSZ in $BSZS; + do for LR in $LRS; + do python sgp-mnist.py --nb-nodes 1000 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 100 100 100 100 100 100 100 100 100 100 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging + done; +done; +