Skip to content
Snippets Groups Projects
Commit af1c2465 authored by Erick Lavoie's avatar Erick Lavoie
Browse files

Added updated scaling experiments in Annex

parent 820b70d1
No related branches found
No related tags found
No related merge requests found
Showing
with 44 additions and 30 deletions
......@@ -175,50 +175,53 @@ nodes, there is a significant increase in variance between nodes and the
convergence is slower, only marginally for Fully-Connected but
significantly so for Fractal and Ring. Small-world has higher variance between nodes but maintains a convergence speed close to that of Fully-Connected.
% MNIST
% all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-09-28-23:18:49-CEST-labostrex119 all/2021-09-29-22:12:31-CEST-labostrex112
\begin{figure}[htbp]
\centering
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-14-17:56:26-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-09-28-23:18:49-CEST-labostrex119 all/2021-09-29-22:12:31-CEST-labostrex112 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --ymax 92.5 --ymin 80 --yaxis test-accuracy
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-uneq-classes --topology:interclique-topology smallworld --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-09-28-23:18:49-CEST-labostrex119 all/2021-09-29-22:12:31-CEST-labostrex112 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --ymax 92.5 --ymin 80 --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
\caption{Fully-Connected}
\end{subfigure}
\quad
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/smallworld-logn-cliques/all/2021-03-23-21:44:56-CET 1000/mnist/smallworld-logn-cliques/all/2021-03-23-21:45:39-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-smallworld-cst-updates.png --ymin 80 --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-uneq-classes --topology:interclique-topology smallworld --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-10-13-13:54:20-CEST-labostrex114 all/2021-09-30-11:40:59-CEST-labostrex116 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --ymax 92.5 --ymin 80 --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-mnist-scaling-smallworld-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-mnist-scaling-smallworld-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-smallworld-cst-updates}
\caption{Small-world}
\end{subfigure}
\quad
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fractal-cliques/all/2021-03-14-17:41:59-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png --ymin 80 --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-uneq-classes --topology:interclique-topology fractal --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-10-13-13:48:08-CEST-labostrex112 all/2021-09-30-11:39:47-CEST-labostrex114 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --ymax 92.5 --ymin 80 --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates}
\caption{Fractal}
\end{subfigure}
\quad
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/clique-ring/all/2021-03-10-18:14:35-CET 1000/mnist/clique-ring/all/2021-03-13-18:22:36-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png --ymin 80 --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-uneq-classes --topology:interclique-topology ring --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-12-03:00:24-CEST-labostrex116 all/2021-10-11-17:38:20-CEST-labostrex118 all/2021-10-13-13:53:50-CEST-labostrex113 all/2021-09-30-11:40:32-CEST-labostrex115 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --ymax 92.5 --ymin 80 --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-mnist-scaling-ring-cliques-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-mnist-scaling-clique-ring-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-ring-cliques-cst-updates}
\caption{Ring}
\end{subfigure}
\caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST:
D-Cliques scaling behavior (constant updates per epoch) for different
inter-clique topologies.\textsuperscript{*}.}
\footnotesize\textsuperscript{*}\textit{These results were obtained with a previous version of the simulator and the partitioning scheme of \autoref{app:extreme-local-skew} but should be consistent with the latest version and the partitioning scheme used in the main text. They will be updated for the final version of the paper to use the latter.}
inter-clique topologies.}
\end{figure}
Figure~\ref{fig:d-cliques-cifar10-scaling-fully-connected} shows the results
......@@ -243,44 +246,50 @@ linear-logarithmic number of edges of Small-world makes it the best compromise
between convergence speed and connectivity, and thus the best choice for
efficient large-scale decentralized learning in practice.
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-eq-classes --nodes:nb-nodes 1 10 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
\begin{figure}[htbp]
\centering
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/fully-connected-cliques/all/2021-03-10-13:58:57-CET 1000/cifar10/fully-connected-cliques/all/2021-03-14-17:41:20-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-eq-classes --topology:interclique-topology fully-connected --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-11-19:57:35-CEST-labostrex116 all/2021-10-11-19:39:08-CEST-labostrex117 all/2021-10-03-19:53:21-CEST-labostrex117 all/2021-10-05-19:38:01-CEST-labostrex114 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-scaling-fully-connected-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-fully-connected-cst-updates}
\caption{Fully-Connected}
\end{subfigure}
\quad
% python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/smallworld-logn-cliques/all/2021-03-23-22:13:23-CET 1000/cifar10/smallworld-logn-cliques/all/2021-03-23-22:13:57-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-eq-classes --topology:interclique-topology smallworld --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-11-19:57:35-CEST-labostrex116 all/2021-10-11-19:39:08-CEST-labostrex117 all/2021-10-13-13:56:16-CEST-labostrex117 all/2021-10-05-19:40:04-CEST-labostrex117 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-scaling-smallworld-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-smallworld-cst-updates}
\caption{Small-world}
\end{subfigure}
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/fully-connected-cliques/all/2021-03-10-13:58:57-CET 1000/cifar10/fractal-cliques/all/2021-03-14-17:42:46-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-eq-classes --topology:interclique-topology fractal --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-11-19:57:35-CEST-labostrex116 all/2021-10-11-19:39:08-CEST-labostrex117 all/2021-10-13-13:55:42-CEST-labostrex115 all/2021-10-05-19:39:26-CEST-labostrex116 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-cifar10-scaling-fractal-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-fractal-cst-updates}
\caption{Fractal}
\end{subfigure}
\quad
% To regenerate the figure, from directory results/scaling
% python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/clique-ring/all/2021-03-10-11:58:43-CET 1000/cifar10/clique-ring/all/2021-03-14-09:55:24-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png --add-min-max
% To regenerate the figure, from directory results-v2
% python $TOOLS/analyze/filter.py all/ --nodes:name 2-shards-eq-classes --topology:interclique-topology ring --nodes:nb-nodes 100 1000 --meta:seed 1 | python $TOOLS/analyze/diff.py
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-11-19:57:35-CEST-labostrex116 all/2021-10-11-19:39:08-CEST-labostrex117 all/2021-10-13-13:56:00-CEST-labostrex116 all/2021-10-05-19:40:41-CEST-labostrex119 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --yaxis test-accuracy --save-figure '../mlsys2022style/figures/d-cliques-cifar10-scaling-ring-cst-updates.png' --font-size 18 --linewidth 3
\begin{subfigure}[b]{0.35\textwidth}
\centering
\includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-scaling-clique-ring-cst-updates}
\includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-ring-cst-updates}
\caption{Ring}
\end{subfigure}
\caption{\label{fig:d-cliques-cifar10-scaling-fully-connected} CIFAR10: D-Cliques scaling behavior (constant updates per epoch) for different
inter-clique topologies.\textsuperscript{*}.}
\footnotesize\textsuperscript{*}\textit{These results were obtained with a previous version of the simulator and the partitioning scheme of \autoref{app:extreme-local-skew} but should be consistent with the latest version and the partitioning scheme used in the main text. They will be updated for the final version of the paper to use the latter.}
inter-clique topologies.}
\end{figure}
\section{Additional Experiments with Extreme Node Skew}
......@@ -452,7 +461,7 @@ We provide comments only if the results deviate substantially.
\caption{\label{fig:convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-1-class-per-node} With Clique Averaging}
\end{subfigure}
\caption{\label{fig:cifar10-c-avg-momentum-1-class-per-node} Effect of Clique Averaging and
Momentum on CIFAR10 with LeNet on 100 nodes (Variation of \autoref{fig:cifar10-c-avg-momentum} with 1 class/node.}
Momentum on CIFAR10 with LeNet on 100 nodes (Variation of \autoref{fig:cifar10-c-avg-momentum} with 1 class/node).}
\end{figure}
% From directory 'results-v2':
......
......@@ -255,6 +255,11 @@ want to keep it in the main text, then remove the comment and stick to the
argument ``it is challenging to make a fair comparison so we change the
setting a bit''.}
% MNIST
% python $TOOLS/analyze/diff.py --rundirs all/2021-10-11-17:01:19-CEST-labostrex113 all/2021-10-11-18:32:25-CEST-labostrex113 all/2021-09-29-22:17:30-CEST-labostrex119 all/2021-09-28-12:39:00-CEST-labostrex117 all/2021-09-28-23:18:40-CEST-labostrex118 all/2021-09-29-03:52:47-CEST-labostrex118 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 80 --ymax 92.5 --labels 'diverse random (unbiased)' 'diverse random (biased)' 'random' 'fully-connected' 'd-cliques' 'd-cliques w/o c-avg.'
% CIFAR10
% python $TOOLS/analyze/diff.py --rundirs --pass-through | python $TOOLS/plot/convergence.py --add-min-max --labels 'diverse random (unbiased)' 'diverse random (biased)' 'random' 'fully-connected' 'd-cliques' 'd-cliques w/o c-avg.'
\begin{figure}[htbp]
\centering
\begin{subfigure}[b]{0.23\textwidth}
......
mlsys2022style/figures/d-cliques-cifar10-scaling-fractal-cst-updates.png

69.2 KiB

mlsys2022style/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png

65.7 KiB

mlsys2022style/figures/d-cliques-cifar10-scaling-ring-cst-updates.png

67.9 KiB

mlsys2022style/figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png

67.7 KiB

mlsys2022style/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png

75.3 KiB

mlsys2022style/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png

66.3 KiB

mlsys2022style/figures/d-cliques-mnist-scaling-ring-cliques-cst-updates.png

75.3 KiB

mlsys2022style/figures/d-cliques-mnist-scaling-smallworld-cst-updates.png

67.1 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment