diff --git a/.gitignore b/.gitignore
index 543b22640af89dbf2f2bf35869dad958d978970a..5bc3354174640ed0da7b5015a40b05443a38defa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ figures/.DS_Store
 *.out
 *.pdf
 *.synctex.gz
+**/all/*
diff --git a/figures/d-cliques-cifar10-vs-1-node-test-accuracy.png b/figures/d-cliques-cifar10-vs-1-node-test-accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..bd6536ecc57bbde07b8b13aee7563d60dfb5efab
Binary files /dev/null and b/figures/d-cliques-cifar10-vs-1-node-test-accuracy.png differ
diff --git a/figures/d-cliques-cifar10-vs-1-node-training-loss.png b/figures/d-cliques-cifar10-vs-1-node-training-loss.png
index a65874120ab54072e62abfed42f670053f63300f..903229f59916ef4b720869ef1dd798bb98bb5b54 100644
Binary files a/figures/d-cliques-cifar10-vs-1-node-training-loss.png and b/figures/d-cliques-cifar10-vs-1-node-training-loss.png differ
diff --git a/figures/d-cliques-mnist-scaling-fully-connected.png b/figures/d-cliques-mnist-scaling-fully-connected.png
new file mode 100644
index 0000000000000000000000000000000000000000..62bdfd9ff9eb148f74a598e9f805732c86f9b7b9
Binary files /dev/null and b/figures/d-cliques-mnist-scaling-fully-connected.png differ
diff --git a/figures/d-cliques-mnist-vs-1-node-test-accuracy.png b/figures/d-cliques-mnist-vs-1-node-test-accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..08596aa9b752541dcf02cdffa0ccb1e30e2e2daa
Binary files /dev/null and b/figures/d-cliques-mnist-vs-1-node-test-accuracy.png differ
diff --git a/figures/d-cliques-mnist-vs-1-node-training-loss.png b/figures/d-cliques-mnist-vs-1-node-training-loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..38b120ac327d6433519c960b43fdc1181de92591
Binary files /dev/null and b/figures/d-cliques-mnist-vs-1-node-training-loss.png differ
diff --git a/main.tex b/main.tex
index d8748c174a45f2aa426c5e22e704707230c668e9..a6516c8b9e10e86fb3c0457dff5f2b8023bbc967 100644
--- a/main.tex
+++ b/main.tex
@@ -257,16 +257,26 @@ We solve this problem by decoupling the gradient averaging from the weight avera
 
 \subsection{MNIST and Linear Model}
 
-% To regenerate the figure, from directory results/mnist
-% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-10:15:24-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis validation-accuracy --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Clique (Ring)' '100-nodes non-IID D-Cliques (Fully-Connected)' --legend 'lower right' --ymin 80 --ymax 92.5
      \begin{figure}[htbp]
-         \centering
-         \includegraphics[width=0.7\textwidth]{figures/10-cliques-validation-accuracy}
+     \centering
+% To regenerate the figure, from directory results/mnist
+% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-10:15:24-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis training-loss --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Cliques (Ring) bsz=128' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=128' --legend 'upper right' --save-figure ../../figures/d-cliques-mnist-vs-1-node-training-loss.png
+%     \begin{subfigure}[b]{0.70\textwidth}
+%         \centering
+%         \includegraphics[width=\textwidth]{figures/d-cliques-mnist-vs-1-node-training-loss}
+%\caption{\label{fig:d-cliques-mnist-training-loss} Training Loss}
+%     \end{subfigure}
+% To regenerate the figure, from directory results/mnist
+% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-09:20:03-CET fully-connected/all/2021-03-10-09:25:19-CET clique-ring/all/2021-03-10-18:14:35-CET fully-connected-cliques/all/2021-03-10-10:19:44-CET --add-min-max --yaxis test-accuracy --labels '1-node IID bsz=12800' '100-nodes non-IID fully-connected bsz=128' '100-nodes non-IID D-Cliques (Ring) bsz=128' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=128' --legend 'lower right' --ymin 85 --ymax 92.5 --save-figure ../../figures/d-cliques-mnist-vs-1-node-test-accuracy.png
+     %\begin{subfigure}[b]{\textwidth}
+         %\centering
+         \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-vs-1-node-test-accuracy}
+%\caption{\label{fig:d-cliques-mnist-linear} D-Cliques with Linear Model on MNIST.}
+     %\end{subfigure}
 \caption{\label{fig:d-cliques-mnist-linear} D-Cliques with Linear Model on MNIST.}
-     \end{figure}
+\end{figure}
 
-TODO: Update figure to use decoupled gradient averaging (will probably reduce variance and accelerate convergence speed)\\
-TODO: Add D-Cliques arranged in a Ring instead of Fully-Connected
+TODO: Update figure with actual Clique-Ring results
 
 \subsection{CIFAR10 and Convolutional Model}
 
@@ -283,25 +293,27 @@ x_i^{(k-\frac{1}{2})} \leftarrow x_i^{(k-1)} - \gamma v_i^{(k)}
 
 In addition, it is important that all nodes are initialized with the same model value at the beginning. Otherwise, the random initialization of models introduces another source of variance that persists over many steps. In combination with D-Clique (Algorithm~\ref{Algorithm:Clique-Unbiased-D-PSGD}), this provides the convergence results of Figure~\ref{fig:d-cliques-cifar10-convolutional}. To assess how far this would be from an "optimal" solution, in which the delay introduced by multiple hops between nodes is completely removed, we also show the convergence speed of a single node that would compute its average gradient from all the samples obtained by all nodes in a single round. The results show that minus the variance introduced by the multiple hops between nodes, which slows the convergence of the distributed averaging of models, the convergence speed on average is close to the optimal, when the distributed average is computed exactly every step.
 
+
 \begin{figure}[htbp]
      \centering
+          % To regenerate the figure, from directory results/cifar10
+% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET clique-ring/all/2021-03-10-11:58:43-CET fully-connected-cliques/all/2021-03-10-13:58:57-CET --add-min-max --yaxis training-loss --labels '1-node IID bsz=2000' '100-nodes non-IID D-Cliques (Ring) bsz=20' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=20' --legend 'lower right' --save-figure ../../figures/d-cliques-cifar10-vs-1-node-training-loss.png
      \begin{subfigure}[b]{0.48\textwidth}
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-vs-1-node-training-loss}
 \caption{\label{fig:d-cliques-cifar10-training-loss} Training Loss}
      \end{subfigure}
+     \hfill
+     % To regenerate the figure, from directory results/cifar10
+% python ../../../learn-topology/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET clique-ring/all/2021-03-10-11:58:43-CET fully-connected-cliques/all/2021-03-10-13:58:57-CET --add-min-max --yaxis test-accuracy --labels '1-node IID bsz=2000' '100-nodes non-IID D-Cliques (Ring) bsz=20' '100-nodes non-IID D-Cliques (Fully-Connected) bsz=20' --legend 'lower right' --save-figure ../../figures/d-cliques-cifar10-vs-1-node-test-accuracy.png
      \begin{subfigure}[b]{0.48\textwidth}
          \centering
-         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-vs-1-node-validation-accuracy}
-\caption{\label{fig:d-cliques-cifar10-validation-accuracy}  Validation Accuracy}
+         \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-vs-1-node-test-accuracy}
+\caption{\label{fig:d-cliques-cifar10-test-accuracy}  Test Accuracy}
      \end{subfigure}
 \caption{\label{fig:d-cliques-cifar10-convolutional} D-Cliques with Convolutional Network on CIFAR10.}
 \end{figure}
 
-
-TODO: Add D-Cliques arranged in a Ring instead of Fully-Connected
-
-
 \subsection{Comparison to similar topologies}
 
 Similar number of maximum hops but no or less clustering than D-Cliques (and no unbiasing of gradient).
@@ -315,6 +327,14 @@ Similar number of maximum hops but no or less clustering than D-Cliques (and no
 
 \subsection{Effect of Scaling}
 
+% To regenerate the figure, from directory results/scaling
+% python ../../../learn-topology/tools/plot_convergence.py 10/mnist/fully-connected-cliques/all/2021-03-10-14:40:35-CET ../mnist/fully-connected-cliques/all/2021-03-10-10:19:44-CET 1000/mnist/fully-connected-cliques/all/2021-03-10-16:44:35-CET --labels '10 nodes bsz=128' '100 nodes bsz=128' '1000 nodes bsz=128 (45)' --legend 'lower right' --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-scaling-fully-connected.png --ymin 80 --add-min-max
+     \begin{figure}[htbp]
+         \centering
+         \includegraphics[width=0.7\textwidth]{figures/d-cliques-mnist-scaling-fully-connected}
+\caption{\label{fig:d-cliques-mnist-scaling-fully-connected} Scaling Behaviour of Fully-Connected D-Clique}
+     \end{figure}
+
 Show scaling effect for 10, 100, 1000 nodes (with decreasing sample sizes) for Clique Ring, Hierarchical, Fully-Connected.
 
 Robustness to not having fully-connected cliques (static and dynamic subsets).