diff --git a/mlsys2022style/appendix.tex b/mlsys2022style/appendix.tex
index 89e7201e60ba84fff9f9551fb20e0db28b529235..d53c3d335302e2fe16cb32d1e354280cbeb2eda0 100644
--- a/mlsys2022style/appendix.tex
+++ b/mlsys2022style/appendix.tex
@@ -359,5 +359,59 @@ using the corresponding input functions.
 
 \subsection{Evaluation}
 \label{section:ideal-cliques-evaluation}
+
+\subsubsection{Convergence Speed of D-Cliques Compared to Fully-Connected}
+
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name fully-connected d-cliques/ideal --nodes:name max-local-skew --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-28-12:39:00-CEST-labostrex117 all/2021-09-28-23:18:40-CEST-labostrex118 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 80 --ymax 92.5 --yaxis test-accuracy --labels 'fully-connected' 'd-cliques (fc) w/ cliq-avg' --save-figure ../mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node.png --linestyles 'solid' 'dashed'
+% CIFAR10
+% python $TOOLS/analyze/filter.py all --dataset:name cifar10 --topology:name fully-connected d-cliques/ideal --nodes:name max-local-skew --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-10-03-16:09:21-CEST-labostrex112 all/2021-10-03-19:45:14-CEST-labostrex118 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 0 --ymax 100 --yaxis test-accuracy --labels 'fully-connected w/ momentum' 'd-cliques (fc) w/ cliq-avg and momentum' --save-figure ../mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node.png --linestyles 'solid' 'dashed' --legend 'lower right'
+
+\begin{figure}[htbp]
+    \centering        
+    \begin{subfigure}[b]{0.35\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{figures/convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node}
+    \caption{\label{fig:convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node} MNIST}
+    \end{subfigure}
+    \hfill
+    \begin{subfigure}[b]{0.35\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{figures/convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node}
+    \caption{\label{fig:convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node} CIFAR10}
+    \end{subfigure}
+\caption{\label{fig:convergence-speed-dc-vs-fc-1-class-per-node} Convergence Speed of D-Cliques Compared to Fully-Connected on 100 Nodes (1 class/node).}
+\end{figure}
+
+\subsubsection{Effect of Removing Intra-clique Edges}
+
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name d-cliques/ideal --nodes:name max-local-skew --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+
+% w/o Clique Gradient
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-29-03:52:47-CEST-labostrex118 all/2021-10-02-21:26:18-CEST-labostrex113 all/2021-10-03-06:33:52-CEST-labostrex113 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 89 --ymax 92.5 --yaxis test-accuracy --labels 'full intra-connectivity' '-1 edge/clique' '-5 edges/clique' --save-figure ../mlsys2022style/figures/d-cliques-ideal-wo-clique-avg-impact-of-edge-removal.png --linestyles 'solid' 'dashed' 'dotted' --font-size 18
+% w/ Clique Gradient
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-28-23:18:40-CEST-labostrex118 all/2021-10-02-16:50:53-CEST-labostrex113  all/2021-10-03-02:00:23-CEST-labostrex113 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 89 --ymax 92.5 --yaxis test-accuracy  --labels 'full intra-connectivity' '-1 edge/clique' '-5 edges/clique' --save-figure ../mlsys2022style/figures/d-cliques-ideal-w-clique-avg-impact-of-edge-removal.png --linestyles 'solid' 'dashed' 'dotted' --font-size 18
+
+\begin{figure}[t]
+     \centering
+
+\begin{subfigure}[htbp]{0.23\textwidth}
+     \centering   
+         \includegraphics[width=\textwidth]{figures/d-cliques-ideal-wo-clique-avg-impact-of-edge-removal}     
+\caption{\label{fig:d-cliques-ideal-wo-clique-avg-impact-of-edge-removal} Without Clique Averaging }
+\end{subfigure}
+\hfill
+\begin{subfigure}[htbp]{0.23\textwidth}
+     \centering
+         \includegraphics[width=\textwidth]{figures/d-cliques-ideal-w-clique-avg-impact-of-edge-removal}
+\caption{\label{fig:d-cliques-ideal-w-clique-avg-impact-of-edge-removal} With Clique Averaging}
+\end{subfigure}
+\caption{\label{fig:d-cliques-ideal-intra-connectivity} Impact of Intra-clique Connectivity Failures on MNIST (1 class/node). Y axis starts at 89.}
+\end{figure}
      
  
\ No newline at end of file
diff --git a/mlsys2022style/d-cliques.tex b/mlsys2022style/d-cliques.tex
index 547d03ffc85d7fca8006c78505a08174ad84afc2..ef818f865cf6e3dc34faece5db7fc08cd66be86c 100644
--- a/mlsys2022style/d-cliques.tex
+++ b/mlsys2022style/d-cliques.tex
@@ -71,6 +71,13 @@ edge with all other cliques, see Figure~\ref{fig:d-cliques-figure} for the
 corresponding D-Cliques network in the case of $n=100$ nodes and $L=10$
 classes. We will explore sparser inter-clique topologies in Section~\ref{section:interclique-topologies}.
 
+\begin{figure}[htbp]
+    \centering
+    \includegraphics[width=0.20\textwidth]{../figures/fully-connected-cliques}
+    \caption{\label{fig:d-cliques-figure} D-Cliques (fully-connected
+    cliques) example with 1 class/node.}
+\end{figure}
+
 We construct D-Cliques by initializing cliques at random, using at most $M$
 nodes to limit the intra-clique communication costs, then we 
 swap nodes between pairs of cliques chosen at random such that the swap
diff --git a/mlsys2022style/exp.tex b/mlsys2022style/exp.tex
index 81d9548d1c9ae3c5590b7bf92c9ccd266cf1aad5..15ba54e6e179265ddf61f9d759fef467e44cd04e 100644
--- a/mlsys2022style/exp.tex
+++ b/mlsys2022style/exp.tex
@@ -77,126 +77,152 @@ mini-batch size, both approaches are equivalent.
 
 \subsection{D-Cliques match the Convergence Speed of Fully-Connected with a Fraction of the Edges}
 
-\begin{figure}[t]
-    \centering 
-             
-    \begin{subfigure}[b]{0.20\textwidth}
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name fully-connected d-cliques/greedy-swap --nodes:name 2-shards-uneq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-28-23:16:47-CEST-labostrex117 all/2021-09-28-23:18:49-CEST-labostrex119 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 80 --ymax 92.5 --yaxis test-accuracy --labels 'fully-connected' 'd-cliques (fc) w/ cliq-avg' --save-figure ../mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+% CIFAR10
+% python $TOOLS/analyze/filter.py all --dataset:name cifar10 --topology:name fully-connected d-cliques/greedy-swap --nodes:name 2-shards-eq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-10-02-18:58:22-CEST-labostrex114 all/2021-10-03-19:53:21-CEST-labostrex117 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 0 --ymax 100 --yaxis test-accuracy --labels 'fully-connected w/ mom.' 'd-cliques (fc) w/ c-avg, mom.' --save-figure ../mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node.png --linestyles 'solid' 'dashed' --legend 'lower right' --font-size 18
+
+\begin{figure}[htbp]
+    \centering        
+    \begin{subfigure}[b]{0.23\textwidth}
     \centering
-    \includegraphics[width=\textwidth]{../figures/fully-connected-cliques}
-    \caption{\label{fig:d-cliques-figure} D-Cliques (fully-connected
-    cliques)}
+    \includegraphics[width=\textwidth]{figures/convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node} MNIST}
     \end{subfigure}
     \hfill
-    % To regenerate figure, from results/mnist
-    % python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py fully-connected/all/2021-03-10-09:25:19-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET --add-min-max --yaxis test-accuracy --ymin 80 --ymax 92.5 --labels '100 nodes non-IID fully-connected' '100 nodes non-IID d-cliques' --save-figure ../../figures/d-cliques-mnist-vs-fully-connected.png --legend 'lower right' --font-size 16 --linestyles 'solid' 'dashed'
-    \begin{subfigure}[b]{0.26\textwidth}
+    \begin{subfigure}[b]{0.23\textwidth}
     \centering
-    \includegraphics[width=\textwidth]{../figures/d-cliques-mnist-vs-fully-connected.png}
-    \caption{\label{fig:d-cliques-example-convergence-speed} Convergence Speed
-    on MNIST}
+    \includegraphics[width=\textwidth]{figures/convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node} CIFAR10}
     \end{subfigure}
-    
-\caption{\label{fig:d-cliques-example} D-Cliques topology and convergence
-speed on MNIST.}
+\caption{\label{fig:convergence-speed-dc-vs-fc-2-shards-per-node} Convergence Speed of D-Cliques constructed with Greedy Swap Compared to Fully-Connected on 100 Nodes (2 shards/node).}
 \end{figure}
 
-Figure~\ref{fig:d-cliques-example-convergence-speed} illustrates the
-performance of D-Cliques on MNIST with $n=100$ nodes. Observe that the
+
+Figure~\ref{fig:convergence-speed-dc-vs-fc-2-shards-per-node} illustrates the
+convergence speed of D-Cliques with $n=100$ nodes on MNIST (with Clique Averaging) and CIFAR10 (with Clique Averaging and Momentum). Observe that the
 convergence speed is
 very close
 to that of a fully-connected topology, and significantly better than with
 a ring or a grid (see Figure~\ref{fig:iid-vs-non-iid-problem}). With 
 100 nodes, it offers a reduction of $\approx90\%$ in the number of edges
-compared to a fully-connected topology. Nonetheless, there is still
-significant variance in the accuracy across nodes, which is due to the bias
-introduced by inter-clique edges. We address this issue in the next section.
+compared to a fully-connected topology.
 
 \subsection{D-Cliques Converge Faster than Random Graphs}
 
-We demonstrate the advantages of D-Cliques over alternative sparse topologies
-that have a similar number of edges. First, we consider topologies in which
-the neighbors of each node are selected at random (hence without any clique
-structure).
-Specifically, for $n=100$ nodes, we
-construct a random topology such that each node has exactly 10 edges, which is
-similar to the average 9.9 edges of our D-Cliques topology 
-(Figure~\ref{fig:d-cliques-figure}). To better understand the role of
-the clique structure beyond merely ensuring class representativity among
-neighbors,
-we also compare to a random topology similar to the one described above except
-that edges are
-chosen such that each node has neighbors of all possible classes. Finally, we
-also implement an analog of Clique Averaging for these random topologies,
-where all nodes de-bias their gradient based on the class distribution of
-their neighbors. In the latter case, since nodes do not form a clique, each
-node obtains a different average gradient.
-
-The results for MNIST and CIFAR10 are shown in
-Figure~\ref{fig:d-cliques-comparison-to-non-clustered-topologies}. For MNIST,
-a purely random topology has higher variance and lower convergence speed than
-D-Cliques (with or without Clique Averaging), while a random topology with
-class representativity performs similarly as D-Cliques without Clique
-Averaging. However and perhaps surprisingly, a random topology with unbiased
-gradient performs slightly worse than without it. In any case, D-Cliques with
-Clique Averaging outperforms all random topologies, showing that the clique
-structure has a small but noticeable effect on the average accuracy and
-significantly reduces the variance across nodes in this setup.
-
-\begin{figure}[t]
-     \centering     
-         \begin{subfigure}[b]{0.35\textwidth}
-% To regenerate the figure, from directory results/mnist
-% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET  random-10/all/2021-07-23-11:59:56-CEST  random-10-diverse/all/2021-03-17-20:28:35-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes represented)' --add-min-max --legend 'lower right' --ymin 80 --ymax 92.5 --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png --font-size 13 --linestyles 'solid' 'dashed' 'dotted' 'dashdot'
-         \centering
-         \includegraphics[width=\textwidth]{../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies}
-                  \caption{MNIST with Linear Model}
-         \end{subfigure}
-                 \hfill                      
-% To regenerate the figure, from directory results/cifar10
-% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-07-23-14:33:48-CEST  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' --add-min-max --legend 'upper left' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png --ymax 119 --font-size 13  --linestyles 'solid' 'dashed' 'dotted' 'dashdot' 'solid' --markers '' '' '' '' 'o'
-        \begin{subfigure}[b]{0.35\textwidth}
-        \centering
-         \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}
-         \caption{CIFAR10 with LeNet}
-     \end{subfigure} 
- \caption{\label{fig:d-cliques-comparison-to-non-clustered-topologies} Comparison to Non-Clustered Topologies} 
-\end{figure}
-
-On the harder CIFAR10 dataset with a deep convolutional network, the
-differences are much more dramatic:
-D-Cliques with Clique Averaging and momentum turns out to be critical for fast
-convergence.
-Crucially, all random topologies fail to converge to a good solution. This
-confirms that our clique structure is important to reduce variance
-across nodes and improve the convergence. The difference with the previous
-experiment seems to be due to both the use of a higher capacity model and to
-the intrinsic characteristics of the datasets.
-
-While the previous experiments suggest that our clique structure is
-instrumental in obtaining good performance, one may wonder whether
-intra-clique full connectivity is actually necessary.
-Figure~\ref{fig:d-cliques-intra-connectivity} shows the convergence speed of
-a D-Cliques topology where cliques have been sparsified by randomly
-removing 1 or 5 undirected edges per clique (out of 45). Strikingly, both for MNIST and
-CIFAR10, removing just a single edge from the cliques has a
-significant effect on the
-convergence speed. On CIFAR10, it even entirely negates the
-benefits of D-Cliques.
-
-Overall, these results show that achieving fast convergence on non-IID
-data with sparse topologies requires a very careful design, as we have
-proposed with D-Cliques.
+%We demonstrate the advantages of D-Cliques over alternative sparse topologies
+%that have a similar number of edges. First, we consider topologies in which
+%the neighbors of each node are selected at random (hence without any clique
+%structure).
+%Specifically, for $n=100$ nodes, we
+%construct a random topology such that each node has exactly 10 edges, which is
+%similar to the average 9.9 edges of our D-Cliques topology 
+%(Figure~\ref{fig:d-cliques-figure}). To better understand the role of
+%the clique structure beyond merely ensuring class representativity among
+%neighbors,
+%we also compare to a random topology similar to the one described above except
+%that edges are
+%chosen such that each node has neighbors of all possible classes. Finally, we
+%also implement an analog of Clique Averaging for these random topologies,
+%where all nodes de-bias their gradient based on the class distribution of
+%their neighbors. In the latter case, since nodes do not form a clique, each
+%node obtains a different average gradient.
+%
+%The results for MNIST and CIFAR10 are shown in
+%Figure~\ref{fig:d-cliques-comparison-to-non-clustered-topologies}. For MNIST,
+%a purely random topology has higher variance and lower convergence speed than
+%D-Cliques (with or without Clique Averaging), while a random topology with
+%class representativity performs similarly as D-Cliques without Clique
+%Averaging. However and perhaps surprisingly, a random topology with unbiased
+%gradient performs slightly worse than without it. In any case, D-Cliques with
+%Clique Averaging outperforms all random topologies, showing that the clique
+%structure has a small but noticeable effect on the average accuracy and
+%significantly reduces the variance across nodes in this setup.
+%
+%\begin{figure}[t]
+%     \centering     
+%         \begin{subfigure}[b]{0.35\textwidth}
+%% To regenerate the figure, from directory results/mnist
+%% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET  random-10/all/2021-07-23-11:59:56-CEST  random-10-diverse/all/2021-03-17-20:28:35-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes represented)' --add-min-max --legend 'lower right' --ymin 80 --ymax 92.5 --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png --font-size 13 --linestyles 'solid' 'dashed' 'dotted' 'dashdot'
+%         \centering
+%         \includegraphics[width=\textwidth]{../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies}
+%                  \caption{MNIST with Linear Model}
+%         \end{subfigure}
+%                 \hfill                      
+%% To regenerate the figure, from directory results/cifar10
+%% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-07-23-14:33:48-CEST  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' --add-min-max --legend 'upper left' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png --ymax 119 --font-size 13  --linestyles 'solid' 'dashed' 'dotted' 'dashdot' 'solid' --markers '' '' '' '' 'o'
+%        \begin{subfigure}[b]{0.35\textwidth}
+%        \centering
+%         \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}
+%         \caption{CIFAR10 with LeNet}
+%     \end{subfigure} 
+% \caption{\label{fig:d-cliques-comparison-to-non-clustered-topologies} Comparison to Non-Clustered Topologies} 
+%\end{figure}
+%
+%On the harder CIFAR10 dataset with a deep convolutional network, the
+%differences are much more dramatic:
+%D-Cliques with Clique Averaging and momentum turns out to be critical for fast
+%convergence.
+%Crucially, all random topologies fail to converge to a good solution. This
+%confirms that our clique structure is important to reduce variance
+%across nodes and improve the convergence. The difference with the previous
+%experiment seems to be due to both the use of a higher capacity model and to
+%the intrinsic characteristics of the datasets.
+%
+%While the previous experiments suggest that our clique structure is
+%instrumental in obtaining good performance, one may wonder whether
+%intra-clique full connectivity is actually necessary.
+%Figure~\ref{fig:d-cliques-intra-connectivity} shows the convergence speed of
+%a D-Cliques topology where cliques have been sparsified by randomly
+%removing 1 or 5 undirected edges per clique (out of 45). Strikingly, both for MNIST and
+%CIFAR10, removing just a single edge from the cliques has a
+%significant effect on the
+%convergence speed. On CIFAR10, it even entirely negates the
+%benefits of D-Cliques.
+%
+%Overall, these results show that achieving fast convergence on non-IID
+%data with sparse topologies requires a very careful design, as we have
+%proposed with D-Cliques.
 
 \subsection{Cliques built with Greedy Swap Converge Significantly Faster than Random Cliques}
 
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name d-cliques/random-cliques d-cliques/greedy-swap --nodes:name 2-shards-uneq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-29-22:12:59-CEST-labostrex114 all/2021-09-28-23:18:49-CEST-labostrex119 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 80 --ymax 92.5 --yaxis test-accuracy --labels 'd-cliques random' 'd-cliques greedy-swap' --save-figure ../mlsys2022style/figures/convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+% CIFAR10
+%  python $TOOLS/analyze/filter.py all --dataset:name cifar10 --topology:name d-cliques/random-cliques d-cliques/greedy-swap --nodes:name 2-shards-eq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-10-04-21:18:33-CEST-labostrex117 all/2021-10-03-19:53:21-CEST-labostrex117 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 0 --ymax 100 --yaxis test-accuracy --labels 'd-cliques random' 'd-cliques greedy-swap' --save-figure ../mlsys2022style/figures/convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+
+\begin{figure}[htbp]
+    \centering        
+    \begin{subfigure}[b]{0.23\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{figures/convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node} MNIST}
+    \end{subfigure}
+    \hfill
+    \begin{subfigure}[b]{0.23\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{figures/convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node} CIFAR10}
+    \end{subfigure}
+\caption{\label{fig:convergence-speed-dc-random-vs-dc-gs-2-shards-per-node} Convergence Speed of D-Cliques constructed Randomly vs Greedy Swap on 100 Nodes (2 shards/node).}
+\end{figure}
+
 \subsection{Clique Averaging and Momentum are Necessary}
 
-% To regenerate figure, from results/mnist:
-% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py fully-connected/all/2021-03-10-09:25:19-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET  no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET --add-min-max --yaxis test-accuracy --labels '100 nodes non-IID fully-connected' '100 nodes non-IID d-cliques w/o clique avg.' '100 nodes d-cliques non-IID w/ clique avg.' --legend 'lower right' --ymin 89 --ymax 92.5 --font-size 13 --save-figure ../../figures/d-clique-mnist-clique-avg.png --linestyles 'solid' 'dashed' 'dotted'
-\begin{figure}[t]
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name d-cliques/greedy-swap --nodes:name 2-shards-uneq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-29-03:53:42-CEST-labostrex119 all/2021-09-28-23:18:49-CEST-labostrex119 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 89 --ymax 92.5 --yaxis test-accuracy --labels 'd-cliques w/o c-avg.' 'd-cliques w/ c-avg.' --save-figure ../mlsys2022style/figures/convergence-speed-mnist-dc-no-c-avg-vs-c-avg-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+\begin{figure}[htbp]
          \centering
-         \includegraphics[width=0.35\textwidth]{../figures/d-clique-mnist-clique-avg}
-\caption{\label{fig:d-clique-mnist-clique-avg} Effect of Clique Averaging on MNIST. Y-axis starts at 89.}
+         \includegraphics[width=0.35\textwidth]{figures/convergence-speed-mnist-dc-no-c-avg-vs-c-avg-2-shards-per-node}
+\caption{\label{fig:d-clique-mnist-clique-avg} Effect of Clique Averaging on MNIST. Y axis starts at 89.}
 \end{figure}
 
 As illustrated in Figure~\ref{fig:d-clique-mnist-clique-avg}, Clique Averaging
@@ -206,72 +232,65 @@ fully-connected topology. Note that Clique Averaging induces a small
 additional cost, as gradients
 and models need to be sent in two separate rounds of messages. Nonetheless, compared to fully connecting all nodes, the total number of messages is reduced by $\approx 80\%$.
 
-\begin{figure}[t]
-    \centering 
-    % To regenerate figure, from results/cifar10
-    % python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET  no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET no-init-no-clique-avg-no-momentum/fully-connected-cliques/all/2021-03-26-13:47:35-CET/ --legend 'upper right' --add-min-max --labels '1-node IID w/ momentum'  '100 nodes non-IID d-cliques w/ momentum' '100 nodes non-IID d-cliques w/o momentum'  --font-size 14 --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-momentum-non-iid-effect.png --ymax 100 --linestyles 'solid' 'dashed' 'dotted'         
-    \begin{subfigure}[b]{0.35\textwidth}
+
+% CIFAR10
+%  python $TOOLS/analyze/filter.py all --dataset:name cifar10 --topology:name d-cliques/greedy-swap --nodes:name 2-shards-eq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% w/o Clique Averaging
+% python $TOOLS/analyze/diff.py --rundirs all/2021-10-03-23:37:42-CEST-labostrex117 all/2021-10-04-03:13:46-CEST-labostrex117  --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 0 --ymax 100 --yaxis test-accuracy --labels 'd-cliques w/o momentum' 'd-cliques w/ momentum' --save-figure ../mlsys2022style/figures/convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+% w/ Clique Averaging
+% python $TOOLS/analyze/diff.py --rundirs all/2021-10-03-16:10:34-CEST-labostrex117 all/2021-10-03-19:53:21-CEST-labostrex117 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 0 --ymax 100 --yaxis test-accuracy --labels 'd-cliques w/o momentum' 'd-cliques w/ momentum' --save-figure ../mlsys2022style/figures/convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node.png --linestyles 'solid' 'dashed' --font-size 18
+
+\begin{figure}[htbp]
+    \centering        
+    \begin{subfigure}[b]{0.23\textwidth}
     \centering
-    \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-momentum-non-iid-effect}
-    \caption{\label{fig:d-cliques-cifar10-momentum-non-iid-effect} Without Clique Averaging }
+    \includegraphics[width=\textwidth]{figures/convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node} Without Clique Averaging }
     \end{subfigure}
     \hfill
-    % To regenerate figure, from results/cifar10
-    % python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py 1-node-iid/all/2021-03-10-13:52:58-CET no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET --legend 'upper right' --add-min-max --labels '1-node IID w/ momentum' '100 nodes non-IID d-clique w/ momentum' --font-size 14 --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-momentum-non-iid-clique-avg-effect.png --ymax 100 --linestyles 'solid' 'dashed' 'dotted' 
-    \begin{subfigure}[b]{0.35\textwidth}
+    \begin{subfigure}[b]{0.23\textwidth}
     \centering
-    \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-momentum-non-iid-clique-avg-effect}
-    \caption{\label{fig:d-cliques-cifar10-momentum-non-iid-clique-avg-effect} With Clique Averaging}
+    \includegraphics[width=\textwidth]{figures/convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node}
+    \caption{\label{fig:convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node} With Clique Averaging}
     \end{subfigure}
-\caption{\label{fig:cifar10-momentum} Non-IID Effect of Momentum on CIFAR10 with LeNet}
+\caption{\label{fig:cifar10-c-avg-momentum} Effect of Clique Averaging and Momentum on CIFAR10 with LeNet.}
 \end{figure}
 
 As shown in
-Figure~\ref{fig:d-cliques-cifar10-momentum-non-iid-clique-avg-effect}, 
+Figure~\ref{fig:cifar10-c-avg-momentum}, 
 the use of Clique Averaging restores the benefits of momentum and closes the gap
 with the centralized setting.
 
-\subsection{Full Intraclique Connectivity is Necessary}
+\subsection{D-Cliques Tolerate Intra-Connectivity Failures}
 
-\begin{figure*}[t]
+% From directory 'results-v2':
+% MNIST
+% python $TOOLS/analyze/filter.py all --dataset:name mnist --topology:name d-cliques/greedy-swap --nodes:name 2-shards-uneq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
+% w/o Clique Gradient
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-29-03:53:42-CEST-labostrex119 all/2021-10-01-21:44:14-CEST-labostrex113 all/2021-10-02-06:53:40-CEST-labostrex113 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 89 --ymax 92.5 --yaxis test-accuracy --labels 'full intra-connectivity' '-1 edge/clique' '-5 edges/clique' --save-figure ../mlsys2022style/figures/d-cliques-wo-clique-avg-impact-of-edge-removal.png --linestyles 'solid' 'dashed' 'dotted' --font-size 18
+% w/ Clique Gradient
+% python $TOOLS/analyze/diff.py --rundirs all/2021-09-28-23:18:49-CEST-labostrex119 all/2021-10-01-17:08:42-CEST-labostrex113 all/2021-10-02-02:17:43-CEST-labostrex113 --pass-through | python $TOOLS/plot/convergence.py --add-min-max --ymin 89 --ymax 92.5 --yaxis test-accuracy  --labels 'full intra-connectivity' '-1 edge/clique' '-5 edges/clique' --save-figure ../mlsys2022style/figures/d-cliques-w-clique-avg-impact-of-edge-removal.png --linestyles 'solid' 'dashed' 'dotted' --font-size 18
+\begin{figure}[htbp]
      \centering
 
-\begin{subfigure}[htbp]{0.4\textwidth}
+\begin{subfigure}[htbp]{0.23\textwidth}
      \centering   
-% To regenerate the figure, from directory results/mnist
-% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET rm-1-edge/all/2021-03-18-17:28:27-CET rm-1-edge-unbiased-grad/all/2021-03-18-17:28:47-CET --add-min-max --ymin 85 --ymax 92.5 --legend 'lower right' --yaxis test-accuracy --labels 'fcc, clique grad.' 'fcc -1 edge/clique, no clique grad.' 'fcc -1 edge/clique, clique grad.' --save-figure ../../figures/d-cliques-mnist-clique-clustering-fcc-minus-1-edge.png  --font-size 13  --linestyle 'solid' 'dashed' 'dotted' 
-         \includegraphics[width=\textwidth]{../figures/d-cliques-mnist-clique-clustering-fcc-minus-1-edge}     
-\caption{\label{fig:d-cliques-mnist-clique-clustering-minus-1-edge} MNIST (-1 edge/clique)}
+         \includegraphics[width=\textwidth]{figures/d-cliques-wo-clique-avg-impact-of-edge-removal}     
+\caption{\label{fig:d-cliques-wo-clique-avg-impact-of-edge-removal} Without Clique Averaging }
 \end{subfigure}
 \hfill
-\begin{subfigure}[htbp]{0.4\textwidth}
+\begin{subfigure}[htbp]{0.23\textwidth}
      \centering
-% To regenerate the figure, from directory results/cifar10
-% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET rm-1-edge/all/2021-03-18-17:29:58-CET rm-1-edge-unbiased-grad/all/2021-03-18-17:30:17-CET --add-min-max --ymax 80 --legend 'upper left' --yaxis test-accuracy --labels 'fcc, clique grad.' 'fcc -1 edge/clique, no clique grad.' 'fcc -1 edge/clique, clique grad.' --save-figure ../../figures/d-cliques-cifar10-clique-clustering-fcc-minus-1-edge.png --font-size 13 --linestyle 'solid' 'dashed' 'dotted'
-         \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-clique-clustering-fcc-minus-1-edge}
-\caption{\label{fig:d-cliques-cifar10-clique-clustering-minus-1-edge} CIFAR10 (-1 edge/clique)}
+         \includegraphics[width=\textwidth]{figures/d-cliques-w-clique-avg-impact-of-edge-removal}
+\caption{\label{fig:d-cliques-w-clique-avg-impact-of-edge-removal} With Clique Averaging}
 \end{subfigure}
+\caption{\label{fig:d-cliques-intra-connectivity} MNIST: Impact of Intra-clique Connectivity Failures. Y axis starts at 89.}
+\end{figure}
 
-%\begin{subfigure}[htbp]{0.35\textwidth}
-%     \centering  
-%% To regenerate the figure, from directory results/mnist
-%% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET rm-5-edges/all/2021-03-18-17:29:10-CET rm-5-edges-unbiased-grad/all/2021-03-18-17:29:36-CET --add-min-max --ymin 85 --ymax 92.5 --legend 'lower right' --yaxis test-accuracy --labels 'fcc, clique grad.' 'fcc -5 edges/clique, no clique grad.' 'fcc -5 edges/clique, clique grad.' --save-figure ../../figures/d-cliques-mnist-clique-clustering-fcc-minus-5-edges.png  --font-size 13 --linestyle 'solid' 'dashed' 'dotted'   
-%         \includegraphics[width=\textwidth]{../figures/d-cliques-mnist-clique-clustering-fcc-minus-5-edges}     
-%\caption{\label{fig:d-cliques-mnist-clique-clustering-minus-5-edges} MNIST (-5 edges/clique)}
-%\end{subfigure}
-%\hfill
-%\begin{subfigure}[htbp]{0.35\textwidth}
-%     \centering
-%% To regenerate the figure, from directory results/cifar10
-%% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET rm-5-edges/all/2021-03-18-17:30:38-CET rm-5-edges-unbiased-grad/all/2021-03-18-17:31:04-CET --add-min-max --ymax 80 --legend 'upper left' --yaxis test-accuracy --labels 'fcc, clique grad.' 'fcc -5 edges/clique, no clique grad.'  'fcc -5 edges/clique, clique grad.' --save-figure ../../figures/d-cliques-cifar10-clique-clustering-fcc-minus-5-edges.png --font-size 13 --linestyle 'solid' 'dashed' 'dotted'
-%         \includegraphics[width=\textwidth]{../figures/d-cliques-cifar10-clique-clustering-fcc-minus-5-edges}
-%\caption{\label{fig:d-cliques-cifar10-clique-clustering-minus-5-edges} CIFAR10 (-5 edges/clique)}
-%\end{subfigure}
-
-\caption{\label{fig:d-cliques-intra-connectivity} Importance of Intra-Clique Full-Connectivity}
-\end{figure*}
+% CIFAR10
+% python $TOOLS/analyze/filter.py all --dataset:name cifar10 --topology:name d-cliques/greedy-swap --nodes:name 2-shards-eq-classes --meta:seed 1 --nodes:nb-nodes 100 | python $TOOLS/analyze/diff.py
 
-\subsection{D-Cliques appear to Scale with Sparser Inter-Clique Topologies}
+\subsection{D-Cliques Scale with Sparser Inter-Clique Topologies}
 
 In this last series of experiments, we evaluate the effect of choosing sparser
 inter-clique topologies on the convergence speed for a larger network of 1000
diff --git a/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node.png b/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..959007f20e288f4d9cbfc338648d26359a931055
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-1-class-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..d952518d4db139adb158e7319b765eadffee3604
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-cifar10-dc-fc-vs-fc-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..1bdf7fc40c52e81a26af648d0dcc0bf220eb92eb
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-cifar10-dc-random-vs-dc-gs-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..af4490a544e0f8bd11f587487d36e99bd570a2f6
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-cifar10-w-c-avg-no-mom-vs-mom-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..512f1d8a88c1d94ae7cfde6c23482ef724b7d4fc
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-cifar10-wo-c-avg-no-mom-vs-mom-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node.png b/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..8660b5a92b68c12305033e5dcea9e00523c57eee
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-1-class-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..6a2a0cff0055474e62e8c0f7fe453c1120d38809
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-mnist-dc-fc-vs-fc-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-mnist-dc-no-c-avg-vs-c-avg-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-mnist-dc-no-c-avg-vs-c-avg-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc12fa66d2f0ab0ad8aafc5f54c0fa92098e9fd8
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-mnist-dc-no-c-avg-vs-c-avg-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node.png b/mlsys2022style/figures/convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node.png
new file mode 100644
index 0000000000000000000000000000000000000000..11b44ef1ff37e8131cd1bc53a4fe163d0953bf7e
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-mnist-dc-random-vs-dc-gs-2-shards-per-node.png differ
diff --git a/mlsys2022style/figures/convergence-speed-mnist-dc-rm-1-edge-vs-full.png b/mlsys2022style/figures/convergence-speed-mnist-dc-rm-1-edge-vs-full.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a485b828ff612739aa9352130dd289c9e9c0647
Binary files /dev/null and b/mlsys2022style/figures/convergence-speed-mnist-dc-rm-1-edge-vs-full.png differ
diff --git a/mlsys2022style/figures/d-cliques-ideal-w-clique-avg-impact-of-edge-removal.png b/mlsys2022style/figures/d-cliques-ideal-w-clique-avg-impact-of-edge-removal.png
new file mode 100644
index 0000000000000000000000000000000000000000..40019cf866e7c11762fd0eefc69790b3a1a8d4e3
Binary files /dev/null and b/mlsys2022style/figures/d-cliques-ideal-w-clique-avg-impact-of-edge-removal.png differ
diff --git a/mlsys2022style/figures/d-cliques-ideal-wo-clique-avg-impact-of-edge-removal.png b/mlsys2022style/figures/d-cliques-ideal-wo-clique-avg-impact-of-edge-removal.png
new file mode 100644
index 0000000000000000000000000000000000000000..c4e9a291af91a1e93fcd997275363ac07afe2aef
Binary files /dev/null and b/mlsys2022style/figures/d-cliques-ideal-wo-clique-avg-impact-of-edge-removal.png differ
diff --git a/mlsys2022style/figures/d-cliques-w-clique-avg-impact-of-edge-removal.png b/mlsys2022style/figures/d-cliques-w-clique-avg-impact-of-edge-removal.png
new file mode 100644
index 0000000000000000000000000000000000000000..46b8e770d7c9f84aeb5ffe9b480612dec1ee9741
Binary files /dev/null and b/mlsys2022style/figures/d-cliques-w-clique-avg-impact-of-edge-removal.png differ
diff --git a/mlsys2022style/figures/d-cliques-wo-clique-avg-impact-of-edge-removal.png b/mlsys2022style/figures/d-cliques-wo-clique-avg-impact-of-edge-removal.png
new file mode 100644
index 0000000000000000000000000000000000000000..40019cf866e7c11762fd0eefc69790b3a1a8d4e3
Binary files /dev/null and b/mlsys2022style/figures/d-cliques-wo-clique-avg-impact-of-edge-removal.png differ