diff --git a/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png b/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png new file mode 100644 index 0000000000000000000000000000000000000000..d6d149d21fef72f432f28d4ef13c2ddf1aadb57c Binary files /dev/null and b/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png differ diff --git a/figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png b/figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png new file mode 100644 index 0000000000000000000000000000000000000000..8aeac653b72a944b8ca0c228fc4457e4936f3aba Binary files /dev/null and b/figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png differ diff --git a/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering.png b/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering.png new file mode 100644 index 0000000000000000000000000000000000000000..d0bcaa653281f055db045555aab5ed5a7bb035a5 Binary files /dev/null and b/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering.png differ diff --git a/main.bib b/main.bib index 8b6b2f98d8badb0cd88de48f46a9dd01265613d9..37764d25598c39f25f2d1872ee6ef197f17278dd 100644 --- a/main.bib +++ b/main.bib @@ -801,4 +801,13 @@ pages={211-252} eprint={1803.08494}, archivePrefix={arXiv}, primaryClass={cs.CV} +} + +@misc{kong2021consensus, + title={Consensus Control for Decentralized Deep Learning}, + author={Lingjing Kong and Tao Lin and Anastasia Koloskova and Martin Jaggi and Sebastian U. Stich}, + year={2021}, + eprint={2102.04828}, + archivePrefix={arXiv}, + primaryClass={cs.LG} } \ No newline at end of file diff --git a/main.tex b/main.tex index 0f75a0bcd39104fd87c56c13e47ed7896d7ee9ec..2b4011484f5cb1c19e6d62273141c75f740fa8b8 100644 --- a/main.tex +++ b/main.tex @@ -433,22 +433,43 @@ We solve this problem by decoupling the gradient averaging from the weight avera \begin{figure}[htbp] \centering - \begin{subfigure}[htb]{0.7\textwidth} + \begin{subfigure}[htb]{0.48\textwidth} % To regenerate the figure, from directory results/mnist % python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET random-10/all/2021-03-17-20:28:12-CET random-10-diverse/all/2021-03-17-20:28:35-CET random-10-diverse-unbiased-grad/all/2021-03-17-20:29:04-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes represented)' '10 random edges (all classes repr.) with unbiased grad.' --add-min-max --legend 'lower right' --ymin 88 --ymax 92.5 --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png \centering \includegraphics[width=\textwidth]{figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies} \caption{\label{fig:d-cliques-mnist-linear-comparison-to-non-clustered-topologies} Linear Model} \end{subfigure} - - \begin{subfigure}[htb]{0.7\textwidth} + \hfill + \begin{subfigure}[htb]{0.48\textwidth} +% To regenerate the figure, from directory results/mnist +% python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET random-10/all/2021-03-17-20:28:12-CET random-10-diverse/all/2021-03-17-20:28:35-CET random-10-diverse-unbiased-grad/all/2021-03-17-20:29:04-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes represented)' '10 random edges (all classes repr.) with unbiased grad.' --add-min-max --legend 'upper right' --ymax 0.7 --yaxis scattering --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering.png + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering} + \caption{\label{fig:d-cliques-mnist-linear-comparison-to-non-clustered-topologies-scattering} Linear Model (Scattering)} + \end{subfigure} + \\ + \begin{subfigure}[htb]{0.48\textwidth} % To regenerate the figure, from directory results/mnist/gn-lenet % python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'lower right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymin 80 --yaxis test-accuracy --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies.png \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies} \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies} LeNet Model} \end{subfigure} + \hfill + \begin{subfigure}[htb]{0.48\textwidth} +% To regenerate the figure, from directory results/mnist/gn-lenet +% python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'upper right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymax 0.7 --yaxis scattering --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering} + \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering} LeNet Model (Scattering)} + \end{subfigure} + + \caption{\label{fig:d-cliques-mnist-comparison-to-non-clustered-topologies} MNIST: Comparison to non-Clustered Topologies} \end{figure} + +\begin{itemize} + \item Clustering does not seem to make a difference in MNIST, even when using a higher-capacity model (LeNet) instead of a linear model. (Fig.\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}) +\end{itemize} \begin{figure}[htbp] @@ -568,7 +589,20 @@ In addition, it is important that all nodes are initialized with the same model % To regenerate the figure, from directory results/cifar10 % python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-03-17-20:30:03-CET random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET random-10-diverse-unbiased-gradient-uniform-init/all/2021-03-17-20:31:41-CET --labels 'd-clique (fcc) clique avg., uniform init.' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' '10 random (all classes repr.) with unbiased grad., uniform init.' --add-min-max --legend 'upper left' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png --ymax 100 - \includegraphics[width=0.6\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies} + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies} + \caption{LeNet Model: Convergence Speed} + \end{subfigure} + \hfill + % To regenerate the figure, from directory results/cifar10 +% python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-03-17-20:30:03-CET random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET random-10-diverse-unbiased-gradient-uniform-init/all/2021-03-17-20:31:41-CET --labels 'd-clique (fcc) clique avg., uniform init.' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' '10 random (all classes repr.) with unbiased grad., uniform init.' --add-min-max --legend 'upper right' --yaxis scattering --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png --ymax 0.7 + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering} + \caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering} LeNet Model: Scattering} + \end{subfigure} + \caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies} CIFAR10: Comparison to non-Clustered Topologies} \end{figure} @@ -591,11 +625,13 @@ In addition, it is important that all nodes are initialized with the same model \caption{\label{fig:d-cliques-cifar10-clique-clustering} CIFAR10: Effect of Relaxed Intra-Clique Connectivity.} \end{figure} +\begin{itemize} + \item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{kong2021consensus}. +\end{itemize} + \section{XP Suppl.} \begin{itemize} - \item Test MNIST avec gn-lenet - \item Plot Scattering (Citer l'article décentralisé) \item Test topology en n-log n https://dl.acm.org/doi/10.1145/335305.335325 \item Enlever les mentions à l'initialisation uniforme \end{itemize}