diff --git a/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png b/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..ea70d6fb5dbece43e554ccf0be0fb62b36f00e72 Binary files /dev/null and b/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png differ diff --git a/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png b/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..3828514a34647ab3974d2c21f69cce7ac94558de Binary files /dev/null and b/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png differ diff --git a/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png b/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png new file mode 100644 index 0000000000000000000000000000000000000000..cfac11b0b8698cf35e7ea9b1c3bf23401f43eaaa Binary files /dev/null and b/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png differ diff --git a/main.tex b/main.tex index 4b1b62a38a0a7deef8fd50fc104fa9c31c910106..c3d0d2ad911b9c655774cbc388bf43f1765d0d5c 100644 --- a/main.tex +++ b/main.tex @@ -283,7 +283,7 @@ We solve this problem by decoupling the gradient averaging from the weight avera \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected} \end{subfigure} -\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)} +\caption{\label{fig:d-cliques-mnist-init-clique-avg-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)} \end{figure} \begin{figure}[htbp] @@ -298,27 +298,38 @@ We solve this problem by decoupling the gradient averaging from the weight avera %\includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy} \caption{\label{fig:mnist-clique-clustering-fcc} Fully-Connected} \end{subfigure} -\caption{\label{fig:d-cliques-mnist-initialization-effect} MNIST: Effects of Clustering within Cliques on Convergence Speed.} +\caption{\label{fig:d-cliques-mnist-clique-clustering} MNIST: Effects of Clustering within Cliques on Convergence Speed.} \end{figure} \begin{figure}[htbp] \centering - % To regenerate the figure, from directory results/scaling -% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max - \begin{subfigure}[b]{0.48\textwidth} - \centering - \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz} - \caption{Constant Batch-Size} - \end{subfigure} +% % To regenerate the figure, from directory results/scaling +%% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max +% \begin{subfigure}[b]{0.48\textwidth} +% \centering +% \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz} +% \caption{FCC: Constant Batch-Size} +% \end{subfigure} + % To regenerate the figure, from directory results/scaling % python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-12-09:13:28-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max -\hfill - \begin{subfigure}[b]{0.48\textwidth} + + \begin{subfigure}[b]{0.7\textwidth} \centering \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates} - \caption{Constant Nb Updates per Epoch} + \caption{Fully-Connected} \end{subfigure} - \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: Scaling Behaviour of Fully-Connected D-Clique} + + + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/clique-ring/all/ ../mnist/clique-ring/all/ 1000/mnist/clique-ring/all/ --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png --ymin 80 --add-min-max + \begin{subfigure}[b]{0.7\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-clique-ring-cst-updates} + \caption{Ring} + \end{subfigure} + + \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: D-Clique Scaling Behaviour (Constant Updates per Epoch)} \end{figure} Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected. @@ -365,6 +376,51 @@ In addition, it is important that all nodes are initialized with the same model \caption{\label{fig:d-cliques-cifar10-convolutional} D-Cliques with Convolutional Network on CIFAR10.} \end{figure} + + + \begin{figure}[htbp] + \centering + % To regenerate the figure, from directory results/cifar10 + % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-11:58:43-CET no-init/clique-ring/all/2021-03-13-18:28:30-CET no-clique-avg/clique-ring/all/2021-03-13-18:27:09-CET no-init-no-clique-avg/clique-ring/all/2021-03-13-18:29:58-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.' 'without clique avg., with uniform init.' 'without clique avg., without uniform init.' --legend 'lower right' --ymax 75 --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy} + \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy} Ring} + \end{subfigure} + % To regenerate the figure, from directory results/cifar10 + %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-clique-avg/fully-connected-cliques/all/2021-03-13-18:31:36-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.' 'without clique avg., with uniform init.' 'without clique avg., without uniform init.' --legend 'lower right' --ymax 75 --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png + \begin{subfigure}[b]{0.48\textwidth} + \centering + \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy} + \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected} + \end{subfigure} +\caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect} CIFAR10: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=20)} +\end{figure} + + \begin{figure}[htbp] + \centering + \begin{subfigure}[b]{0.48\textwidth} + \centering + %\includegraphics[width=0.7\textwidth]{figures/d-cliques-cifar10-vs-1-node-test-accuracy} + \caption{\label{fig:cifar10-clique-clustering-ring} Ring} + \end{subfigure} + \begin{subfigure}[b]{0.48\textwidth} + \centering + %\includegraphics[width=0.7\textwidth]{figures/d-cliques-cifar10-vs-1-node-test-accuracy} + \caption{\label{fig:cifar10-clique-clustering-fcc} Fully-Connected} + \end{subfigure} +\caption{\label{fig:d-cliques-cifar10-clique-clustering} CIFAR10: Effects of Clustering within Cliques on Convergence Speed.} +\end{figure} + + \begin{figure}[htbp] + % To regenerate the figure, from directory results/scaling +% python ../../../learn-topology/tools/plot_convergence.py 10/cifar10/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../cifar10/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/cifar10/fully-connected-cliques/all/2021-03-12-09:13:28-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max + \centering + %\includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-fully-connected-cst-updates} + \caption{\label{fig:d-cliques-cifar10-scaling-fully-connected} CIFAR10: Scaling Behaviour of Fully-Connected D-Clique (Constant Updates Per Epoch)} + \end{figure} + + \subsection{Comparison to similar topologies} Similar number of maximum hops but no or less clustering than D-Cliques (and no unbiasing of gradient).