diff --git a/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png b/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png
index ea70d6fb5dbece43e554ccf0be0fb62b36f00e72..0ccb6963045aaf5b094b9fc615698af29fc0af7e 100644
Binary files a/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png and b/figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png differ
diff --git a/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png b/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png
index 3828514a34647ab3974d2c21f69cce7ac94558de..b93f682ae12129c44dca4a586dca8a29905ceccb 100644
Binary files a/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png and b/figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png differ
diff --git a/figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png b/figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png
index 6c4c93b6e729131bbcbd6df7c7a5661a46f6cbd5..27cc286a2b97bfef3293a6ff2aa507f25a0e6581 100644
Binary files a/figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png and b/figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png differ
diff --git a/figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png b/figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png
index 768ccbc9a0299971b6779782ceed10a25cb1c8da..d459013ddc9ccc065e0db662ec5136c82abd3b73 100644
Binary files a/figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png and b/figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png differ
diff --git a/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png b/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png
index 4a4e636d896679f934a1d96424b624a77536a512..c0b7435886ca60dd8a498db2b7612b71058b9fe0 100644
Binary files a/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png and b/figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png differ
diff --git a/figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png b/figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png
new file mode 100644
index 0000000000000000000000000000000000000000..6328b3dbbdb6d8e9767eadd30342a6d3dae1e75e
Binary files /dev/null and b/figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png differ
diff --git a/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png b/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
index fa8184c89057bc1ea8b84870e7f4031b1598d0cd..37073c107f3c783aac9ace1c2d34c7486a4c0e52 100644
Binary files a/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png and b/figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png differ
diff --git a/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png b/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png
index d7f1f0b0525ae381f214c9b35cad374a060b3034..280073d513f4b8ae13b46b0433a9e8981e32fa1f 100644
Binary files a/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png and b/figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png differ
diff --git a/figures/d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy.png b/figures/d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..770c8317db6d470c84f1a7f09503cb8a4f8c2a4a
Binary files /dev/null and b/figures/d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy.png differ
diff --git a/figures/d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy.png b/figures/d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..ed7d2a015b73b4a4aa893eb4b835f9ed637239d6
Binary files /dev/null and b/figures/d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy.png differ
diff --git a/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png b/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png
index cfac11b0b8698cf35e7ea9b1c3bf23401f43eaaa..9054cb00cb70851da3a294c57de42dc94b0f371f 100644
Binary files a/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png and b/figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png differ
diff --git a/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png b/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png
index 4167bb1096a336d1a4c67c3068cc1e0d7efaba83..2aeb75578c1af098d4a84409bd352023bc24e452 100644
Binary files a/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png and b/figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png differ
diff --git a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png
index 1501f7a2459d301c7137131e0bbadca84e4d9219..1e737cfe5d049844ffb3334be3e97e15b2178b9d 100644
Binary files a/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png and b/figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png differ
diff --git a/figures/d-cliques-mnist-scaling-smallworld-cst-updates.png b/figures/d-cliques-mnist-scaling-smallworld-cst-updates.png
new file mode 100644
index 0000000000000000000000000000000000000000..2c97a195d9641c9abb874cd615023fc2ad079e7e
Binary files /dev/null and b/figures/d-cliques-mnist-scaling-smallworld-cst-updates.png differ
diff --git a/main.tex b/main.tex
index d39d217e1e27f81fa1b979961347439a76fa67ac..620356d6102c56b37941099bee5d20d205ba38f1 100644
--- a/main.tex
+++ b/main.tex
@@ -999,7 +999,13 @@ recover the global distribution.
  
  \section{Algorithms}
  
- \begin{algorithm}[h]
+ We present a more detailed and precise explanation the two main algorithms of the paper, for D-Clique Construction (Algorithm~\ref{Algorithm:D-Clique-Construction}) and to establish a Smallworld interconnection between cliques (Algorithm~\ref{Algorithm:Smallworld}).
+ 
+ \subsection{D-Clique Construction}
+ 
+ Algorithm~\ref{Algorithm:D-Clique-Construction} shows the overall approach for constructing a D-Cliques topology in the non-IID case.\footnote{An IID version of D-Cliques, in which each node has an equal number of examples of all classes, can be implemented by picking $\#L$ nodes per clique at random.} It expects the following inputs: $L$, the set of all classes present in the global distribution $D = \bigcup_{i \in N} D_i$; $N$, the set of all nodes; a function $classes(S)$, which given a subset $S$ of nodes in $N$ returns the set of classes in their joint local distributions ($D_S = \bigcup_{i \in S} D_i$); a function $intraconnect(DC)$, which given $DC$, a set of cliques (set of set of nodes), creates a set of edges ($\{(i,j), \dots \}$) connecting all nodes within each clique to one another; a function $interconnect(DC)$, which given a set of cliques, creates a set of edges ($\{(i,j), \dots \}$) connecting nodes belonging to different cliques; and a function $weigths(E)$, which given a set of edges, returns the weighted matrix $W_{ij}$.  Algorithm~\ref{Algorithm:D-Clique-Construction} returns both $W_{ij}$, for use in D-SGD (Algorithm~\ref{Algorithm:D-PSGD} and~\ref{Algorithm:Clique-Unbiased-D-PSGD}), and $DC$, for use with Clique Averaging (Algorithm~\ref{Algorithm:Clique-Unbiased-D-PSGD}).
+ 
+   \begin{algorithm}[h]
    \caption{D-Clique Construction}
    \label{Algorithm:D-Clique-Construction}
    \begin{algorithmic}[1]
@@ -1009,26 +1015,36 @@ recover the global distribution.
         \State~~ fn $\textit{intraconnect}(DC)$ that returns edges intraconnecting cliques of $DC$,
         \State~~ fn $\textit{interconnect}(DC)$ that returns edges interconnecting cliques of $DC$ (Sec.~\ref{section:interclique-topologies})
          \State~~ fn $\textit{weights}(E)$ that assigns weights to edges in $E$ 
+         
         \State $R \leftarrow \{ n~\text{for}~n \in N \}$ \Comment{Remaining nodes}
         \State $DC \leftarrow \emptyset$ \Comment{D-Cliques}
         \State $\textit{C} \leftarrow \emptyset$ \Comment{Current Clique}
         \While{$R \neq \emptyset$}
 		\State $n \leftarrow \text{pick}~1~\text{from}~\{ m \in R | \textit{classes}(\{m\}) \subsetneq \textit{classes}(\textit{C}) \}$
-		\State $R \leftarrow R \setminus \{ n \}; C \leftarrow C \cup \{ n \}$
+		\State $R \leftarrow R \setminus \{ n \}$; 
+		\State $C \leftarrow C \cup \{ n \}$
 		\If{$\textit{classes}(C) = L$}
-		    \State $DC \leftarrow DC \cup \{ C \}; C \leftarrow \emptyset$
+		    \State $DC \leftarrow DC \cup \{ C \}$; 
+		    \State $C \leftarrow \emptyset$
 		\EndIf
         \EndWhile
-        \State \Return $weights(\textit{intraconnect}(DC) \cup \textit{interconnect}(DC))$
+        \State \Return $(weights(\textit{intraconnect}(DC) \cup \textit{interconnect}(DC)), DC)$
    \end{algorithmic}
 \end{algorithm}
+ 
+The implementation builds a single clique by adding nodes with different classes until all classes of the global distribution are represented. All cliques are built one at a time until all nodes are parts of cliques. Because all classes are represented on an equal number of nodes, all cliques will have nodes of all classes. And because nodes have examples of a single class, we are guaranteed a valid assignment is possible in a greedy manner.  After cliques are created, edges are added and weights are assigned to edges, using the corresponding input functions.
+
+\subsection{Smallworld Interclique Topology}
 
+Algorithm~\ref{Algorithm:Smallworld} shows the construction of the smallworld interclique topology. It adds a linear number of interclique edges by first arranging cliques on a ring. It then adds a logarithmic number of "finger" edges to other cliques on the ring chosen such that there is a constant number of edges added per set, on sets that are exponentially bigger the further away on the ring. "Finger" edges are added symmetrically on both sides of the ring to the cliques in each set that are closest to a given set.
 
 \begin{algorithm}[h]
    \caption{$\textit{smallworld}(DC)$:  adds $O(\# N + log(\# N))$ edges}
    \label{Algorithm:Smallworld}
    \begin{algorithmic}[1]
-        \State \textbf{Require} Set of cliques $DC$ (set of set of nodes), size of neighborhood $ns$ (default 2), function $\textit{least\_edges}(S, E)$ that returns one of the nodes in $S$ with the least number of edges in $E$
+        \State \textbf{Require} set of cliques $DC$ (set of set of nodes)
+        \State ~~size of neighborhood $ns$ (default 2)
+        \State ~~function $\textit{least\_edges}(S, E)$ that returns one of the nodes in $S$ with the least number of edges in $E$
         \State $E \leftarrow \emptyset$ \Comment{Set of Edges}
         \State $L \leftarrow [ C~\text{for}~C \in DC ]$ \Comment{Arrange cliques in a list}
         \For{$i \in \{1,\dots,\#DC\}$} \Comment{For every clique}
@@ -1049,186 +1065,218 @@ recover the global distribution.
         \State \Return E
    \end{algorithmic}
 \end{algorithm}
+
+The algorithm expects a set of cliques $DC$, previously computed by  Algorithm~\ref{Algorithm:D-Clique-Construction}; a size of neighbourhood $ns$, which is the number of finger edges to add per set of cliques, and a function \textit{least\_edges}, which given a set of nodes $S$ and an existing set of edges $E =  ($\{(i,j), \dots \}$)$, returns one of the nodes in $E$ with the least number of edges. It returns a set of edges $($\{(i,j), \dots \}$)$ with all edges added by the smallworld topology.
+
+The implementation first arranges the cliques of $DC$ on a list, which represents the ring. Traversing the list with increasing indexes is equivalent to traversing the ring in the clockwise direction, and inversely. Then, for every clique $i$ on the ring from which we are computing the distance to others, a number of edges are added. All other cliques are implicitly arranged in mutually exclusive sets, with size and at offset exponentially bigger (doubling at every step). Then for every of these sets, $ns$ edges are added, both in the clockwise and counter-clockwise directions, always on the nodes with the least number of edges in each clique. The ring edges are implicitly added to the cliques at offset $1$ in both directions.
  
- \section{Other Experiments}
- 
- % REMOVED: Constant Batch-size
-%         % To regenerate the figure, from directory results/scaling
-%% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
-%         \begin{subfigure}[b]{0.48\textwidth}
-%         \centering
-%         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
-%         \caption{FCC: Constant Batch-Size}
-%     \end{subfigure}
+ \section{Additional Experiments}
 
-    \begin{figure}[htbp]
+\subsection{Effect of Clique Averaging and Uniform Initialization}
+
+Section~\ref{section:clique-averaging} explained how Clique Averaging reduces bias and showed that Clique Averaging was significantly beneficial on MNIST with fully-connected D-Cliques. In this section, we provide additional results for the ring topology, as well as for CIFAR10. In addition, during our early exploration, we noticed that ensuring \textit{uniform initialization}, i.e. ensuring that all nodes start with the same model, increased convergence speed when connecting two cliques with 1-2 interclique edges. We therefore also verify whether this effect is still significant with 10 cliques (100 nodes), on a ring and with full connections between cliques, as well as on MNIST and CIFAR10. We also verified what interaction this had with Clique Averaging.
+
+Figure~\ref{fig:d-cliques-mnist-init-clique-avg-effect} shows all the results for MNIST. Comparing Figure~\ref{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} to~\ref{fig:d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy}, and Figure~\ref{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} to~\ref{fig:d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy} together, we see that Uniform Initialization has imperceptible effects. However, for all four sub-figures, using Clique Averaging has a slightly better average convergence speed, and significantly lower variance between nodes, than not using it. Moreover, the improvement is larger with Fully-Connected D-Cliques.
+
+\begin{figure}[htbp]
      \centering
-     % To regenerate the figure, from directory results/mnist
-     % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-18:14:35-CET no-clique-avg/clique-ring/all/2021-03-12-10:40:37-CET no-init/clique-ring/all/2021-03-12-10:40:11-CET no-init-no-clique-avg/clique-ring/all/2021-03-12-10:41:03-CET --add-min-max --yaxis test-accuracy --labels '  'with uniform init., with clique avg.'    'with uniform init., without clique avg.'  'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png   
+      % To regenerate the figure, from directory results/mnist   
+      % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-18:14:35-CET no-clique-avg/clique-ring/all/2021-03-12-10:40:37-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg.' 'without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy.png
       \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy}
-         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} Ring}
+         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-ring-test-accuracy} D-Cliques (Ring), with Uniform Initialization}
+     \end{subfigure}
+     \quad 
+     % To regenerate the figure, from directory results/mnist
+     % python ../../../learn-topology/tools/plot_convergence.py no-init/clique-ring/all/2021-03-12-10:40:11-CET no-init-no-clique-avg/clique-ring/all/2021-03-12-10:41:03-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg.' 'without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy.png   
+      \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy}
+         \caption{\label{fig:d-cliques-mnist-no-init-clique-avg-effect-ring-test-accuracy} D-Cliques (Ring), without Uniform Initialization}
      \end{subfigure}
+     
      % To regenerate the figure, from directory results/mnist
-     %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:26-CET no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --labels 'with uniform init., with clique avg.'    'with uniform init., without clique avg.'  'without uniform init., with clique avg.' 'without uniform init., without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
+     %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:26-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg.'    'without clique avg.'  --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy.png
        \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy}
-         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected}
+         \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect-fcc-test-accuracy} D-Cliques (Fully-Connected), with Uniform Initialization}
      \end{subfigure}
+     \quad
+      % To regenerate the figure, from directory results/mnist
+     %python ../../../learn-topology/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg.' 'without clique avg.' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy.png
+       \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy}
+         \caption{\label{fig:d-cliques-mnist-no-init-clique-avg-effect-fcc-test-accuracy} D-Cliques (Fully-Connected), without Uniform Initialization}
+     \end{subfigure}
+     
+     
 \caption{\label{fig:d-cliques-mnist-init-clique-avg-effect} MNIST: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=128)}
 \end{figure}
 
+Figure~\ref{fig:d-cliques-cifar10-init-clique-avg-effect} shows all the results for CIFAR10. One the one hand, with D-Cliques arranged in a ring, uniform initialization has a small but positive effect on convergence speed, whether Clique Averaging is used or not. With fully-connected D-Cliques,  the effect is significantly smaller and almost negligible, both with and without Clique Averaging. On the other hand, Clique Averaging is always beneficial, by a significantly larger margin for both interclique topologies and with and without uniform initialization. Moreover, the effect is stronger than for MNIST.
+
     \begin{figure}[htbp]
      \centering
      % To regenerate the figure, from directory results/cifar10
-     % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-11:58:43-CET no-init/clique-ring/all/2021-03-13-18:28:30-CET no-clique-avg/clique-ring/all/2021-03-13-18:27:09-CET  no-init-no-clique-avg/clique-ring/all/2021-03-13-18:29:58-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.'  'without clique avg., with uniform init.'   'without clique avg., without uniform init.' --legend 'lower right' --ymax 75  --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png  
+     % python ../../../learn-topology/tools/plot_convergence.py clique-ring/all/2021-03-10-11:58:43-CET no-init/clique-ring/all/2021-03-13-18:28:30-CET no-clique-avg/clique-ring/all/2021-03-13-18:27:09-CET  no-init-no-clique-avg/clique-ring/all/2021-03-13-18:29:58-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.'  'without clique avg., with uniform init.'   'without clique avg., without uniform init.' --legend 'upper left' --ymax 115  --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy.png --font-size 15
       \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy}
-         \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy} Ring}
+         \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-ring-test-accuracy} D-Cliques (Ring)}
      \end{subfigure}
      % To regenerate the figure, from directory results/cifar10
-     %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-clique-avg/fully-connected-cliques/all/2021-03-13-18:31:36-CET  no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.'  'without clique avg., with uniform init.'   'without clique avg., without uniform init.' --legend 'lower right'  --ymax 75 --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png 
+     %python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-clique-avg/fully-connected-cliques/all/2021-03-13-18:31:36-CET  no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET --add-min-max --yaxis test-accuracy --labels 'with clique avg., with uniform init.' 'with clique avg., without uniform init.'  'without clique avg., with uniform init.'   'without clique avg., without uniform init.' --legend 'upper left'  --ymax 115 --save-figure ../../figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy.png --font-size 15
        \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy}
-         \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy} Fully-Connected}
+         \caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect-fcc-test-accuracy} D-Cliques (Fully-Connected)}
      \end{subfigure}
-\caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect} CIFAR10: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=20)}
+\caption{\label{fig:d-cliques-cifar10-init-clique-avg-effect} CIFAR10: Effects of Clique Averaging and Uniform Initialization on Convergence Speed. (100 nodes, non-IID, D-Cliques, bsz=20, momentum=0.9)}
 \end{figure}
 
-\begin{figure}[htbp]
-     \centering
-% To regenerate the figure, from directory results/mnist
-% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-18:14:35-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis test-accuracy --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Cliques (Ring) bsz=128' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=128' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-vs-1-node-test-accuracy.png
-         \centering
-         \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
-         \caption{\label{fig:d-cliques-mnist-linear-w-clique-averaging-w-initial-averaging} MNIST: D-Cliques Convergence Speed (100 nodes, Constant Updates per Epoch)}
-\end{figure}    
-     
- \begin{figure}[htbp]
-     \centering
-          % To regenerate the figure, from directory results/cifar10
-% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET clique-ring/all/2021-03-10-11:58:43-CET fully-connected-cliques/all/2021-03-10-13:58:57-CET --add-min-max --yaxis training-loss --labels '1-node IID bsz=2000' '100-nodes non-IID D-Cliques (Ring) bsz=20' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=20' --legend 'lower right' --save-figure ../../figures/d-cliques-cifar10-vs-1-node-training-loss.png
-     \begin{subfigure}[b]{0.48\textwidth}
-         \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-vs-1-node-training-loss}
-\caption{\label{fig:d-cliques-cifar10-training-loss} Training Loss}
-     \end{subfigure}
-     \hfill
-     % To regenerate the figure, from directory results/cifar10
-% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET clique-ring/all/2021-03-10-11:58:43-CET fully-connected-cliques/all/2021-03-10-13:58:57-CET --add-min-max --yaxis test-accuracy --labels '1-node IID bsz=2000' '100-nodes non-IID D-Cliques (Ring) bsz=20' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=20' --legend 'lower right' --save-figure ../../figures/d-cliques-cifar10-vs-1-node-test-accuracy.png
-     \begin{subfigure}[b]{0.48\textwidth}
-         \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-vs-1-node-test-accuracy}
-\caption{\label{fig:d-cliques-cifar10-test-accuracy}  Test Accuracy}
-     \end{subfigure}
-\caption{\label{fig:d-cliques-cifar10-convolutional-extended} D-Cliques Convergence Speed with Convolutional Network on CIFAR10 (100 nodes, Constant Updates per Epoch).}
-\end{figure}
+We conclude that Uniform Initialization is not so important for convergence speed but that Clique Averaging is always significantly so.
+
+% \subsection{Comparison to Non-Clustered Topologies}    
+%     
+%     \begin{figure}
+%\centering
+%              \begin{subfigure}[htb]{0.48\textwidth}
+%% To regenerate the figure, from directory results/mnist/gn-lenet
+%% python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'lower right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymin 80 --yaxis test-accuracy --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies.png
+%         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies}
+%         \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies} LeNet Model}
+%        \end{subfigure}
+%        \hfill
+%                      \begin{subfigure}[htb]{0.48\textwidth}
+%% To regenerate the figure, from directory results/mnist/gn-lenet
+%% python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'upper right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymax 0.7 --yaxis scattering --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png
+%         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering}
+%         \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering} LeNet Model (Scattering)}
+%        \end{subfigure}
+%       
+%         \caption{\label{fig:d-cliques-mnist-comparison-to-non-clustered-topologies} MNIST: Comparison to non-Clustered Topologies}
+%\end{figure}
+%
+% \begin{figure}
+% \centering
+%     % To regenerate the figure, from directory results/cifar10
+%% python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET  random-10/all/2021-03-17-20:30:03-CET  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET random-10-diverse-unbiased-gradient-uniform-init/all/2021-03-17-20:31:41-CET --labels 'd-clique (fcc) clique avg., uniform init.' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' '10 random (all classes repr.) with unbiased grad., uniform init.' --add-min-max --legend 'upper right' --yaxis scattering --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png --ymax 0.7
+%        \begin{subfigure}[b]{0.48\textwidth}
+%        \centering
+%         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering}
+%         \caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering} LeNet Model: Scattering}
+%     \end{subfigure}  
+%         
+%\caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies} CIFAR10: Comparison to non-Clustered Topologies}
+%\end{figure} 
+%
+%
+%\begin{itemize}
+%  \item Clustering does not seem to make a difference in MNIST, even when using a higher-capacity model (LeNet) instead of a linear model. (Fig.\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies})     
+%  \item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{consensus_distance}.
+%\end{itemize}
+%
+%
+
+\clearpage
+
+\subsection{Scaling behaviour as the number of nodes increases}
+ 
+ Section~\ref{section:interclique-topologies} compares the convergence speed of various interclique topologies at a scale of 1000 nodes. In this section, we show the effect of scaling the number of nodes, by comparing the convergence speed with 1, 10, 100, and 1000 nodes, and adjusting the batch size to maintain a constant number of updates per epoch. We present results for Ring, Fractal, Smallworld, and Fully-Connected Cliques interclique topologies.
+ 
+Figure~\ref{fig:d-cliques-mnist-scaling-fully-connected} shows results for MNIST. For all topologies, we notice a perfect scaling up to 100 nodes, i.e. the accuracy curves overlap, with low variance between nodes. Starting at 1000 nodes, there is a significant increase in variance between nodes and the convergence is slower, only marginally for Fully-Connected Cliques but signifiantly so for Fractal and Ring. Smallworld has higher variance between nodes but has a convergence speed close to that of Fully-Connected Cliques.
+
+Figure~\ref{fig:d-cliques-cifar10-scaling-fully-connected} shows results for CIFAR10. When increasing from 1 to 10 nodes, which results in a single fully-connected clique, there is actually a small increase both in final accuracy and convergence speed. We believe this increase is due to the gradient being computed with exactly the same number of examples for all classes with 10 fully-connected non-IID nodes, while the gradient for a single non-IID node may have a slightly bigger bias because the random sampling does not guarantee the representation of all classes exactly equally.  At a scale of 100 nodes, there is no difference between Fully-Connected Cliques and Fractal, as the connections are the same; however, a Ring already shows a significantly slower convergence. At 1000 nodes, the convergence significantly slows for Fractal and Ring, while remaining close, albeit with a larger variance, for Fully-Connected Cliques. Similar to MNIST, Smallworld has higher variance and lower convergence speed than Fully-Connected Topology but remains close.
 
-\subsection{Scaling behaviour as the number of nodes increases?}
-          
-     \begin{figure}[htbp]
+We therefore conclude that Fully-Connected Cliques and Smallworld have good scaling properties in terms of convergence speed, and that Smallworld, with its linear-logarithmic scaling, is therefore a good compromise between convergence speed and number of edges required. 
+ 
+\begin{figure}[htbp]
          \centering     
               % To regenerate the figure, from directory results/scaling
-% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-14-17:56:26-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
-
-      \begin{subfigure}[b]{0.7\textwidth}
+% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-14-17:56:26-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-updates.png --ymin 80 --add-min-max
+      \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-updates}
-         \caption{Fully-Connected (Cliques), $O(\frac{n^2}{c^2} + nc)$ edges}
+         \caption{Fully-Connected Cliques}
      \end{subfigure}
-     
+     \quad
+       % To regenerate the figure, from directory results/scaling
+% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/fully-connected-cliques/all/2021-03-12-09:13:27-CET ../mnist/smallworld-logn-cliques/all/2021-03-23-21:44:56-CET 1000/mnist/smallworld-logn-cliques/all/2021-03-23-21:45:39-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-smallworld-cst-updates.png --ymin 80 --add-min-max
+      \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-smallworld-cst-updates}
+         \caption{Smallworld}
+     \end{subfigure}
+     \quad
+
           % To regenerate the figure, from directory results/scaling
-% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fractal-cliques/all/2021-03-14-17:41:59-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png --ymin 80 --add-min-max
-         \begin{subfigure}[b]{0.7\textwidth}
+% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fractal-cliques/all/2021-03-14-17:41:59-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates.png --ymin 80 --add-min-max
+         \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-fractal-cliques-cst-updates}
-         \caption{Fractal, $O(nc)$ edges}
+         \caption{Fractal}
      \end{subfigure}  
-
-     
+     \quad
      % To regenerate the figure, from directory results/scaling
-% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/clique-ring/all/2021-03-10-18:14:35-CET 1000/mnist/clique-ring/all/2021-03-13-18:22:36-CET --labels '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png --ymin 80 --add-min-max
-         \begin{subfigure}[b]{0.7\textwidth}
+% python ../../../learn-topology/tools/plot_convergence.py ../mnist/1-node-iid/all/2021-03-10-09:20:03-CET 10/mnist/clique-ring/all/2021-03-13-18:22:01-CET ../mnist/clique-ring/all/2021-03-10-18:14:35-CET 1000/mnist/clique-ring/all/2021-03-13-18:22:36-CET --labels '1 node IID bsz=12800' '10 nodes bsz=1280' '100 nodes bsz=128' '1000 nodes bsz=13' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-clique-ring-cst-updates.png --ymin 80 --add-min-max
+         \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-scaling-clique-ring-cst-updates}
-         \caption{Ring, $O(n)$ edges}
+         \caption{Ring}
      \end{subfigure}  
      
-     \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: D-Clique Scaling Behaviour, where $n$ is the number of nodes, and $c$ the size of a clique (Constant Updates per Epoch).}
-     \end{figure}
+     \caption{\label{fig:d-cliques-mnist-scaling-fully-connected} MNIST: D-Clique Scaling Behaviour (Constant Updates per Epoch)}
+\end{figure}
      
-          \begin{figure}[htbp]
+\begin{figure}[htbp]
          \centering
      
               % To regenerate the figure, from directory results/scaling
 % python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/fully-connected-cliques/all/2021-03-10-13:58:57-CET 1000/cifar10/fully-connected-cliques/all/2021-03-14-17:41:20-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-fully-connected-cst-updates.png --add-min-max
-
-      \begin{subfigure}[b]{0.7\textwidth}
+      \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-fully-connected-cst-updates}
-         \caption{Fully-Connected (Cliques), $O(\frac{n^2}{c^2} + nc)$ edges}
+         \caption{Fully-Connected Cliques}
+     \end{subfigure}
+     \quad
+     % python ../../../learn-topology/tools/plot_convergence.py ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/smallworld-logn-cliques/all/2021-03-23-22:13:23-CET 1000/cifar10/smallworld-logn-cliques/all/2021-03-23-22:13:57-CET --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-smallworld-cst-updates.png --add-min-max
+      \begin{subfigure}[b]{0.48\textwidth}
+         \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-smallworld-cst-updates}
+         \caption{Smallworld}
      \end{subfigure}
      
+     
           % To regenerate the figure, from directory results/scaling
 % python ../../../learn-topology/tools/plot_convergence.py  ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/fully-connected-cliques/all/2021-03-10-13:58:57-CET 1000/cifar10/fractal-cliques/all/2021-03-14-17:42:46-CET  --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2'  --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates.png --add-min-max
-         \begin{subfigure}[b]{0.7\textwidth}
+         \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-fractal-cliques-cst-updates}
-         \caption{Fractal, $O(nc)$ edges}
+         \caption{Fractal}
      \end{subfigure}  
-
-     
+     \quad
      % To regenerate the figure, from directory results/scaling
 % python ../../../learn-topology/tools/plot_convergence.py  ../cifar10/1-node-iid/all/2021-03-10-13:52:58-CET 10/cifar10/fully-connected-cliques/all/2021-03-13-19:06:02-CET ../cifar10/clique-ring/all/2021-03-10-11:58:43-CET 1000/cifar10/clique-ring/all/2021-03-14-09:55:24-CET  --labels '1 node IID bsz=2000' '10 nodes non-IID bsz=200' '100 nodes non-IID bsz=20' '1000 nodes non-IID bsz=2'   --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-scaling-clique-ring-cst-updates.png --add-min-max
-         \begin{subfigure}[b]{0.7\textwidth}
+         \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-scaling-clique-ring-cst-updates}
-         \caption{Ring, $O(n)$ edges}
+         \caption{Ring}
      \end{subfigure}  
      
-     \caption{\label{fig:d-cliques-cifar10-scaling-fully-connected} CIFAR10: D-Clique Scaling Behaviour, where $n$ is the number of nodes, and $c$ the size of a clique (Constant Updates per Epoch).}
-     \end{figure}
-     
-     \begin{figure}
-\centering
-              \begin{subfigure}[htb]{0.48\textwidth}
-% To regenerate the figure, from directory results/mnist/gn-lenet
-% python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'lower right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymin 80 --yaxis test-accuracy --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies.png
-         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies}
-         \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies} LeNet Model}
-        \end{subfigure}
-        \hfill
-                      \begin{subfigure}[htb]{0.48\textwidth}
-% To regenerate the figure, from directory results/mnist/gn-lenet
-% python ../../../../learn-topology/tools/plot_convergence.py no-init/all/2021-03-22-21:39:54-CET no-init-no-clique-avg/all/2021-03-22-21:40:16-CET random-10/all/2021-03-22-21:41:06-CET random-10-diverse/all/2021-03-22-21:41:46-CET random-10-diverse-unbiased-grad/all/2021-03-22-21:42:04-CET --legend 'upper right' --add-min-max --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random edges (all classes repr.) with unbiased grad.' --ymax 0.7 --yaxis scattering --save-figure ../../../figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering.png
-         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering}
-         \caption{\label{fig:d-cliques-mnist-lenet-comparison-to-non-clustered-topologies-scattering} LeNet Model (Scattering)}
-        \end{subfigure}
-       
-         \caption{\label{fig:d-cliques-mnist-comparison-to-non-clustered-topologies} MNIST: Comparison to non-Clustered Topologies}
+     \caption{\label{fig:d-cliques-cifar10-scaling-fully-connected} CIFAR10: D-Clique Scaling Behaviour (Constant Updates per Epoch)}
 \end{figure}
 
- \begin{figure}
- \centering
-     % To regenerate the figure, from directory results/cifar10
-% python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-13:58:57-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET  random-10/all/2021-03-17-20:30:03-CET  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET random-10-diverse-unbiased-gradient-uniform-init/all/2021-03-17-20:31:41-CET --labels 'd-clique (fcc) clique avg., uniform init.' 'd-clique (fcc) no clique avg. no uniform init.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' '10 random (all classes repr.) with unbiased grad., uniform init.' --add-min-max --legend 'upper right' --yaxis scattering --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering.png --ymax 0.7
-        \begin{subfigure}[b]{0.48\textwidth}
-        \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering}
-         \caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies-scattering} LeNet Model: Scattering}
-     \end{subfigure}  
-         
-\caption{\label{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies} CIFAR10: Comparison to non-Clustered Topologies}
-\end{figure} 
-
 
-\begin{itemize}
-  \item Clustering does not seem to make a difference in MNIST, even when using a higher-capacity model (LeNet) instead of a linear model. (Fig.\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies})     
-  \item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{consensus_distance}.
-\end{itemize}
+%%         % To regenerate the figure, from directory results/scaling
+%%% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected-cst-bsz.png --ymin 80 --add-min-max
+%         \begin{figure}[htbp]
+%         \centering
+%         \includegraphics[width=0.48\textwidth]{figures/d-cliques-mnist-scaling-fully-connected-cst-bsz}
+%         \caption{FCC: Constant Batch-Size}
+%     \end{figure} 
+     
  
 \end{document}