Skip to content
Snippets Groups Projects
Commit 30aca3a7 authored by Erick Lavoie's avatar Erick Lavoie
Browse files

Added results and new experiments

parent bf26e134
No related branches found
No related tags found
No related merge requests found
Showing
with 161 additions and 34 deletions
figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png

71.6 KiB

figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png

73 KiB

figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png

43.5 KiB | W: | H:

figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png

49.2 KiB | W: | H:

figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -266,24 +266,68 @@ We solve this problem by decoupling the gradient averaging from the weight avera
\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
\caption{\label{fig:d-cliques-mnist-linear-w-clique-averaging-w-initial-averaging} MNIST: D-Cliques Convergence Speed}
\end{figure}
\begin{figure}[htbp]
\centering
% To regenerate the figure, from directory results/mnist
% python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-18:14:35-CET no-clique-avg/clique-ring/all/2021-03-12-10:40:37-CET no-init/clique-ring/all/2021-03-12-10:40:11-CET no-init-no-clique-avg/clique-ring/all/2021-03-12-10:41:03-CET --add-min-max --yaxis test-accuracy --labels ' 'with uniform init., with clique avg.' 'with uniform init., without clique avg.' 'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy}
\caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} Ring}
\end{subfigure}
% To regenerate the figure, from directory results/mnist
%python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:26-CET no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --labels 'with uniform init., with clique avg.' 'with uniform init., without clique avg.' 'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy}
\caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected}
\end{subfigure}
\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)}
\end{figure}
\begin{figure}[htbp]
\centering
\begin{subfigure}[b]{0.48\textwidth}
\centering
%\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
\caption{\label{fig:mnist-init-clique-avg-effect-ring} Ring}
\caption{\label{fig:mnist-clique-clustering-ring} Ring}
\end{subfigure}
\begin{subfigure}[b]{0.48\textwidth}
\centering
%\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
\caption{\label{fig:mnist-init-clique-avg-effect-fcc} Fully-Connected-Cliques}
\caption{\label{fig:mnist-clique-clustering-fcc} Fully-Connected}
\end{subfigure}
\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed.}
\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clustering within Cliques on Convergence Speed.}
\end{figure}
\begin{figure}[htbp]
\centering
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
\caption{Constant Batch-Size}
\end{subfigure}
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-12-09:13:28-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
\hfill
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
\caption{Constant Nb Updates per Epoch}
\end{subfigure}
\caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: Scaling Behaviour of Fully-Connected D-Clique}
\end{figure}
Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected.
(Smallworld?)
Robustness to not having fully-connected cliques (static and dynamic subsets).
\clearpage
\subsection{CIFAR10 and Convolutional Model}
......@@ -332,35 +376,6 @@ Similar number of maximum hops but no or less clustering than D-Cliques (and no
%\item Random Small-World Graph
\end{itemize}
\subsection{Effect of Scaling}
\begin{figure}[htbp]
\centering
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
\caption{Constant Batch-Size}
\end{subfigure}
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
\hfill
\begin{subfigure}[b]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
\caption{Constant Nb Updates per Epoch}
\end{subfigure}
\caption{\label{fig:d-cliques-mnist-scaling-fully-connected} Scaling Behaviour of Fully-Connected D-Clique}
\end{figure}
Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected.
(Smallworld?)
Robustness to not having fully-connected cliques (static and dynamic subsets).
\section{Related Work}
D2: numerically unstable when $W_{ij}$ rows and columns do not exactly sum to $1$, as the small differences are amplified in a positive feedback loop. More work is therefore required on the algorithm to make it usable with a wider variety of topologies. In comparison, D-cliques do not modify the SGD algorithm and instead simply removes some neighbour contributions that would otherwise bias the direction of the gradient. D-Cliques with D-PSGD are therefore as tolerant to ill-conditioned $W_{ij}$ matrices as regular D-PSGD in an IID setting.
......
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --initial-averaging --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
20
'
LRS='
0.002
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology fully-connected-cliques --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --clique-gradient --accuracy-logging-interval 10 --validation-set-ratio 0.5
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
1280
'
LRS='
0.1
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 10 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 1 1 1 1 1 1 1 1 1 1 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging
done;
done;
#!/usr/bin/env bash
TOOLS=../../../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
BSZS='
13
'
LRS='
0.1
'
for BSZ in $BSZS;
do for LR in $LRS;
do python sgp-mnist.py --nb-nodes 1000 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 100 100 100 100 100 100 100 100 100 100 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology clique-ring --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear --clique-gradient --initial-averaging
done;
done;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment