Added results and new experiments

30aca3a7 · Erick Lavoie · bf26e134 · 30aca3a7 · 30aca3a7 · bf26e134
Commit 30aca3a7 authored 4 years ago by Erick Lavoie
--- a/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
+++ b/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
--- a/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png
+++ b/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png
--- a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
+++ b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
--- a/main.tex
+++ b/main.tex
@@ -266,24 +266,68 @@ We solve this problem by decoupling the gradient averaging from the weight avera
         \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
         \caption{\label{fig:d-cliques-mnist-linear-w-clique-averaging-w-initial-averaging} MNIST: D-Cliques Convergence Speed}
        \end{figure}
-
-     
     
+    \begin{figure}[htbp]
+     \centering
+     % To regenerate the figure, from directory results/mnist
+     % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-18:14:35-CET no-clique-avg/clique-ring/all/2021-03-12-10:40:37-CET no-init/clique-ring/all/2021-03-12-10:40:11-CET no-init-no-clique-avg/clique-ring/all/2021-03-12-10:41:03-CET --add-min-max --yaxis test-accuracy --labels '  'with uniform init., with clique avg.'    'with uniform init., without clique avg.'  'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png   
+      \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy}
+         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} Ring}
+     \end{subfigure}
+     % To regenerate the figure, from directory results/mnist
+     %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:26-CET no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --labels 'with uniform init., with clique avg.'    'with uniform init., without clique avg.'  'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
+       \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy}
+         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected}
+     \end{subfigure}
+\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)}
+\end{figure}
+
    \begin{figure}[htbp]
     \centering   
      \begin{subfigure}[b]{0.48\textwidth}
         \centering
         %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
-         \caption{\label{fig:mnist-init-clique-avg-effect-ring} Ring}
+         \caption{\label{fig:mnist-clique-clustering-ring} Ring}
     \end{subfigure}
       \begin{subfigure}[b]{0.48\textwidth}
         \centering
         %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
-         \caption{\label{fig:mnist-init-clique-avg-effect-fcc} Fully-Connected-Cliques}
+         \caption{\label{fig:mnist-clique-clustering-fcc} Fully-Connected}
     \end{subfigure}
-\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed.}
+\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clustering within Cliques on Convergence Speed.}
 \end{figure}

+     \begin{figure}[htbp]
+         \centering
+         % To regenerate the figure, from directory results/scaling
+% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
+         \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
+         \caption{Constant Batch-Size}
+     \end{subfigure}
+              % To regenerate the figure, from directory results/scaling
+% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-12-09:13:28-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
+\hfill
+      \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
+         \caption{Constant Nb Updates per Epoch}
+     \end{subfigure}
+     \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: Scaling Behaviour of Fully-Connected D-Clique}
+     \end{figure}
+     
+Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected.
+
+(Smallworld?)
+
+Robustness to not having fully-connected cliques (static and dynamic subsets).
+
+\clearpage

 \subsection{CIFAR10 and Convolutional Model}

@@ -332,35 +376,6 @@ Similar number of maximum hops but no or less clustering than D-Cliques (and no
    %\item Random Small-World Graph
 \end{itemize}

-\subsection{Effect of Scaling}
-
-
-     \begin{figure}[htbp]
-         \centering
-         % To regenerate the figure, from directory results/scaling
-% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
-         \begin{subfigure}[b]{0.48\textwidth}
-         \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
-         \caption{Constant Batch-Size}
-     \end{subfigure}
-              % To regenerate the figure, from directory results/scaling
-% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
-\hfill
-      \begin{subfigure}[b]{0.48\textwidth}
-         \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
-         \caption{Constant Nb Updates per Epoch}
-     \end{subfigure}
-     \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} Scaling Behaviour of Fully-Connected D-Clique}
-     \end{figure}
-
-Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected.
-
-(Smallworld?)
-
-Robustness to not having fully-connected cliques (static and dynamic subsets).
-
 \section{Related Work}

 D2: numerically unstable when $W_{ij}$ rows and columns do not exactly sum to $1$, as the small differences are amplified in a positive feedback loop. More work is therefore required on the algorithm to make it usable with a wider variety of topologies. In comparison, D-cliques do not modify the SGD algorithm and instead simply removes some neighbour contributions that would otherwise bias the direction of the gradient. D-Cliques with D-PSGD are therefore as tolerant to ill-conditioned $W_{ij}$ matrices as regular D-PSGD in an IID setting.

--- a/results/cifar10/no-clique-avg/clique-ring/experiments.sh
+++ b/results/cifar10/no-clique-avg/clique-ring/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/cifar10/no-clique-avg/fully-connected-cliques/experiments.sh
+++ b/results/cifar10/no-clique-avg/fully-connected-cliques/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/cifar10/no-init-no-clique-avg/clique-ring/experiments.sh
+++ b/results/cifar10/no-init-no-clique-avg/clique-ring/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/cifar10/no-init-no-clique-avg/fully-connected-cliques/experiments.sh
+++ b/results/cifar10/no-init-no-clique-avg/fully-connected-cliques/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/cifar10/no-init/clique-ring/experiments.sh
+++ b/results/cifar10/no-init/clique-ring/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/cifar10/no-init/fully-connected-cliques/experiments.sh
+++ b/results/cifar10/no-init/fully-connected-cliques/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    20
+    '
+LRS='
+    0.002
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5
+    done;
+done;
+
--- a/results/scaling/10/mnist/clique-ring/experiments.sh
+++ b/results/scaling/10/mnist/clique-ring/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    1280
+    '
+LRS='
+    0.1
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 10 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 1 1 1 1 1 1 1 1 1 1 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging
+    done;
+done;
+
--- a/results/scaling/1000/mnist/clique-ring/experiments.sh
+++ b/results/scaling/1000/mnist/clique-ring/experiments.sh
+#!/usr/bin/env bash
+TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+BSZS='
+    13
+    '
+LRS='
+    0.1
+    '
+for BSZ in $BSZS; 
+    do for LR in $LRS;
+        do python sgp-mnist.py --nb-nodes 1000 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 100 100 100 100 100 100 100 100 100 100 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging
+    done;
+done;
+