diff --git a/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png b/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png
index a85f283d5f3b2c2b77efa225416bb2490aa952fd..4ecd8427e1c3dc504b81deec70041abe0491de91 100644
Binary files a/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png and b/figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png differ
diff --git a/figures/d-cliques-mnist-clique-clustering-fcc.png b/figures/d-cliques-mnist-clique-clustering-fcc.png
index 980ed24a1a87cd2b2aa1d27ec1ef9759fa8bc2a9..8401f7594cfacd0a70818db6adec708ffcb029f1 100644
Binary files a/figures/d-cliques-mnist-clique-clustering-fcc.png and b/figures/d-cliques-mnist-clique-clustering-fcc.png differ
diff --git a/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png b/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png
index 404e2c9fbd95184cd5bdcff76b166c0a5ebb79b6..ff066d43b0f69ca31037362f16c2f2dd3b65018f 100644
Binary files a/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png and b/figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png differ
diff --git a/main.tex b/main.tex
index 2a2c99f843320c2a1a3771193396298d7e77e158..a3b662489b3643fff909627ee5ec641ecca932f6 100644
--- a/main.tex
+++ b/main.tex
@@ -204,7 +204,7 @@ design is presented in Section~\ref{section:d-cliques}) along with an
 empirical illustration of its benefits. In
 Section~\ref{section:clique-averaging-momentum}, we
 show how to further reduce bias with Clique Averaging and how to use it to
-implement momentum.  We present the results or our extensive experimental
+implement momentum.  We present the results of our extensive experimental
 study in  Section~\ref{section:non-clustered}. We review some related work in
  Section~\ref{section:related-work}, and conclude with promising directions
  for future work in Section~\ref{section:conclusion}.
@@ -489,13 +489,13 @@ amount of messages traveling on the network, it also introduces its own
 bias.
 Figure~\ref{fig:connected-cliques-bias} illustrates the problem on the
 simple case of two cliques connected by one inter-clique edge (here,
-between the green node of the left clique and the purple node of the right
+between the green node of the left clique and the pink node of the right
 clique). Let us focus on node A. With weights computed as in \eqref{eq:metro},
 node A's self-weight is $\frac{12}
 {110}$, the weight between A and the green node connected to B is
 $\frac{10}{110}$, and
 all other neighbors of A have a weight of $\frac{11}{110}$. Therefore, the
-gradient at A is biased towards its own class (purple) and against the green
+gradient at A is biased towards its own class (pink) and against the green
 class. A similar bias holds for all other nodes
 without inter-clique edges with respect to their respective classes. For node
 B, all its edge weights (including its self-weight) are equal to $\frac{1}
@@ -648,14 +648,14 @@ significantly reduces the variance across nodes in this setup.
      \centering     
          \begin{subfigure}[b]{0.48\textwidth}
 % To regenerate the figure, from directory results/mnist
-% python ../../../learn-topology/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET  random-10/all/2021-03-17-20:28:12-CET  random-10-diverse/all/2021-03-17-20:28:35-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes represented)' --add-min-max --legend 'lower right' --ymin 88 --ymax 92.5 --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png --font-size 13
+% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py fully-connected-cliques/all/2021-03-10-10:19:44-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-12-11:12:49-CET  random-10/all/2021-07-23-11:59:56-CEST  random-10-diverse/all/2021-03-17-20:28:35-CET --labels 'd-clique (fcc)' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes represented)' --add-min-max --legend 'lower right' --ymin 80 --ymax 92.5 --yaxis test-accuracy --save-figure ../../figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies.png --font-size 13
          \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-linear-comparison-to-non-clustered-topologies}
                   \caption{MNIST with Linear Model}
          \end{subfigure}
                  \hfill                      
 % To regenerate the figure, from directory results/cifar10
-% python ../../../learn-topology/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-03-17-20:30:03-CET  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' --add-min-max --legend 'upper left' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png --ymax 119 --font-size 13
+% python ../../../../Software/non-iid-topology-simulator/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-13-18:32:55-CET no-init-no-clique-avg/fully-connected-cliques/all/2021-03-13-18:34:35-CET random-10/all/2021-07-23-14:33:48-CEST  random-10-diverse/all/2021-03-17-20:30:41-CET random-10-diverse-unbiased-gradient/all/2021-03-17-20:31:14-CET --labels 'd-clique (fcc) clique avg.' 'd-clique (fcc) no clique avg.' '10 random edges' '10 random edges (all classes repr.)' '10 random (all classes repr.) with unbiased grad.' --add-min-max --legend 'upper left' --yaxis test-accuracy --save-figure ../../figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies.png --ymax 119 --font-size 13
         \begin{subfigure}[b]{0.48\textwidth}
         \centering
          \includegraphics[width=\textwidth]{figures/d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}
@@ -695,7 +695,7 @@ proposed with D-Cliques.
 \begin{subfigure}[htbp]{0.48\textwidth}
      \centering   
 % To regenerate the figure, from directory results/mnist
-% python ../../../learn-topology/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET rm-1-edge/all/2021-03-18-17:28:27-CET rm-5-edges/all/2021-03-18-17:29:10-CET rm-1-edge-unbiased-grad/all/2021-03-18-17:28:47-CET rm-5-edges-unbiased-grad/all/2021-03-18-17:29:36-CET --add-min-max --ymin 85 --ymax 92.5 --legend 'lower right' --yaxis test-accuracy --labels 'fcc with clique grad.' 'fcc -1 edge/clique, no clique avg.' 'fcc -5 edges/clique, no clique avg.'  'fcc -1 edge/clique, clique avg.' 'fcc -5 edges/clique, clique avg.' --save-figure ../../figures/d-cliques-mnist-clique-clustering-fcc.png  --font-size 13   
+% python ../../../learn-topology/tools/plot_convergence.py no-init/fully-connected-cliques/all/2021-03-12-11:12:01-CET rm-1-edge/all/2021-03-18-17:28:27-CET rm-5-edges/all/2021-03-18-17:29:10-CET rm-1-edge-unbiased-grad/all/2021-03-18-17:28:47-CET rm-5-edges-unbiased-grad/all/2021-03-18-17:29:36-CET --add-min-max --ymin 85 --ymax 92.5 --legend 'lower right' --yaxis test-accuracy --labels 'fcc with clique grad.' 'fcc -1 edge/clique, no clique grad.' 'fcc -5 edges/clique, no clique grad.'  'fcc -1 edge/clique, clique grad.' 'fcc -5 edges/clique, clique grad.' --save-figure ../../figures/d-cliques-mnist-clique-clustering-fcc.png  --font-size 13   
          \includegraphics[width=\textwidth]{figures/d-cliques-mnist-clique-clustering-fcc}     
 \caption{\label{fig:d-cliques-mnist-clique-clustering} MNIST}
 \end{subfigure}
@@ -717,9 +717,9 @@ proposed with D-Cliques.
 
 So far, we have used a fully-connected inter-clique topology for D-Cliques,
 which has the advantage of bounding the
-average shortest path to $2$ between any pair of nodes. This choice requires $
+average shortest path to $3$ between any pair of nodes. This choice requires $
 \frac{n}{c}(\frac{n}{c} - 1)$ inter-clique edges, which scales quadratically
-in the number of nodes $n$ for a given clique size $c$. This can become significant at larger scales when $n$ is
+in the number of nodes $n$ for a given clique size $c$\footnote{We consider \textit{directed} edges in the analysis: the number of undirected edges is half and does not affect asymptotic behavior.}. This can become significant at larger scales when $n$ is
 large compared to $c$.
 
 In this last series of experiments, we evaluate the effect of choosing sparser
@@ -730,7 +730,7 @@ to create cliques as a starting point.
 
 The inter-clique topology with (almost) fewest possible edges is a 
 \textit{ring}, which
-uses $\frac{n}{c} - 1$ inter-clique edges and therefore scales linearly in
+uses $\frac{2n}{c}$ inter-clique edges and therefore scales linearly in
 $n$.
 We also consider another topology that scales linearly and achieves a
 logarithmic
@@ -741,18 +741,23 @@ assembled in
 larger groups of $c$ cliques that are connected internally with one edge per
 pair of cliques, but with only one edge between pairs of larger groups. The
 topology is built recursively such that $c$ groups will themselves form a
-larger group at the next level up. This results in at most $nc$ edges per node if edges are evenly distributed, and therefore also scales linearly in the number of nodes.
+larger group at the next level up. This results in at most $c$ edges per node 
+if edges are evenly distributed: i.e., each group within the same level adds 
+at most $c-1$ edges to other groups, leaving one node per group with $c-1$ 
+edges that can receive an additional edge to connect with other groups at the next level.
+Since nodes have at most $c$ edges, $n$ nodes have at most $nc$ edges, therefore
+this fractal scheme also scales linearly in the number of nodes.
 
 Finally, we propose to connect cliques according to a
 small-world-like topology~\cite{watts2000small} applied on top of a
 ring~\cite{stoica2003chord}. In this scheme, cliques are first arranged in a
 ring. Then each clique adds symmetric edges, both clockwise and
-counter-clockwise on the ring, with the $ns$ closest cliques in sets of
+counter-clockwise on the ring, with the $m$ closest cliques in sets of
 cliques that are exponentially bigger the further they are on the ring (see
 Algorithm~\ref{Algorithm:Smallworld} in the appendix for
 details on the construction). This ensures a good connectivity with other
 cliques that are close on the ring, while still keeping the average shortest
-path small. This scheme uses $\frac{n}{c}*2(ns)\log(\frac{n}{c})$ inter-clique edges and
+path small. This scheme uses $\frac{n}{c}*2(m)\log(\frac{n}{c})$ inter-clique edges and
 therefore grows in the order of $O(n\log(n))$ with the number of nodes.
 
 Figure~\ref{fig:d-cliques-cifar10-convolutional} shows the convergence
diff --git a/results/cifar10/random-10/experiments.sh b/results/cifar10/random-10/experiments.sh
index dc2eeb99061ff42500ffbf66909c4c701e229c51..fa3888d84f5ed69a0809ca5a826b224194452d10 100755
--- a/results/cifar10/random-10/experiments.sh
+++ b/results/cifar10/random-10/experiments.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+TOOLS=../../$(cat ../../toolspath); CWD="$(pwd)"; cd $TOOLS
 BSZS='
     20
     '
@@ -8,7 +8,7 @@ LRS='
     '
 for BSZ in $BSZS; 
     do for LR in $LRS;
-        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology random-10 --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
+        do python simulate.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 1 1 1 1 1 1 1 1 1 1 --dist-optimization d-psgd --topology random-10 --metric dissimilarity --learning-momentum 0.9 --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --single-process --nb-logging-processes 10 --dataset cifar10 --model gn-lenet --accuracy-logging-interval 10 --validation-set-ratio 0.5
     done;
 done;
 
diff --git a/results/mnist/random-10/experiments.sh b/results/mnist/random-10/experiments.sh
index ba5dc2ab6e3f8d6a0a258f95b5134224313e785d..34237776ecb22b664a7bb5cb0c263c542c552b8c 100755
--- a/results/mnist/random-10/experiments.sh
+++ b/results/mnist/random-10/experiments.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-TOOLS=../../../../learn-topology/tools; CWD="$(pwd)"; cd $TOOLS
+TOOLS=../../$(cat ../../toolspath); CWD="$(pwd)"; cd $TOOLS
 BSZS='
     128
     '
@@ -8,7 +8,7 @@ LRS='
     '
 for BSZ in $BSZS; 
     do for LR in $LRS;
-        do python sgp-mnist.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology random-10 --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear
+        do python simulate.py --nb-nodes 100 --nb-epochs 100 --local-classes 1 --seed 1 --nodes-per-class 10 10 10 10 10 10 10 10 10 10 --global-train-ratios 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 0.802568 --dist-optimization d-psgd --topology random-10 --metric dissimilarity --learning-momentum 0. --sync-per-mini-batch 1 --results-directory $CWD/all --learning-rate $LR --batch-size $BSZ "$@" --parallel-training --nb-workers 10 --dataset mnist --model linear
     done;
 done;
 
diff --git a/results/toolspath b/results/toolspath
new file mode 100644
index 0000000000000000000000000000000000000000..188c1f8d4314da5be13ca5efd5c6bb8f002e9837
--- /dev/null
+++ b/results/toolspath
@@ -0,0 +1 @@
+../../../Software/non-iid-topology-simulator/tools/