diff --git a/main.bib b/main.bib
index 4050742108ab0b3eaf983658a868286aa8b87df0..52b6cbd1217fbadadd30ceb4e822b83b15957fa2 100644
--- a/main.bib
+++ b/main.bib
@@ -605,12 +605,11 @@ pages={211-252}
   year={2010}
 }
 
-@incollection{lian2017d-psgd,
+@inproceedings{lian2017d-psgd,
   title = {{Can Decentralized Algorithms Outperform Centralized Algorithms? A Case Study for Decentralized Parallel Stochastic Gradient Descent}},
   author = {Lian, Xiangru and Zhang, Ce and Zhang, Huan and Hsieh, Cho-Jui and Zhang, Wei and Liu, Ji},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year = {2017},
-  publisher = {Curran Associates, Inc.},
+  booktitle = {NIPS},
+  year = {2017}
 }
 
 @article{nedic2016sgp, 
@@ -674,8 +673,7 @@ pages={211-252}
   title = 	 {{$D^2$: Decentralized Training over Decentralized Data}},
   author = 	 {Tang, Hanlin and Lian, Xiangru and Yan, Ming and Zhang, Ce and Liu, Ji},
   booktitle = 	 {ICML},
-  year = 	 {2018},
-  publisher = 	 {PMLR}
+  year = 	 {2018}
 }
 
 @article{xiao2007distributed,
@@ -721,7 +719,7 @@ pages={211-252}
   title={Small worlds: The dynamics of networks between order and randomness},
   author={Watts, Duncan J},
   year={2000},
-  publisher={Princeton University Press Princeton}
+  publisher={Princeton University Press}
 }
 
 % Random Model Walk !!!
@@ -790,15 +788,6 @@ pages={211-252}
       primaryClass={cs.CV}
 }
 
-@misc{kong2021consensus,
-      title={Consensus Control for Decentralized Deep Learning}, 
-      author={Lingjing Kong and Tao Lin and Anastasia Koloskova and Martin Jaggi and Sebastian U. Stich},
-      year={2021},
-      eprint={2102.04828},
-      archivePrefix={arXiv},
-      primaryClass={cs.LG}
-}
-
 @article{krizhevsky2009learning,
   title={{Learning Multiple Layers of Features from Tiny Images}},
   author={Krizhevsky, Alex},
@@ -832,8 +821,7 @@ pages={211-252}
     title = {On the importance of initialization and momentum in deep learning}, 
     author = {Ilya Sutskever and James Martens and George Dahl and Geoffrey Hinton}, 
     booktitle = {ICML}, 
-    year = {2013}, 
-    publisher = {PMLR}
+    year = {2013}
 }
 
 @article{lecun1998gradient,
diff --git a/main.tex b/main.tex
index 682998c66160e8a46d4b5c33c5fd4c243ff733a0..c5c3f98e890388d7778809177fa70b79eb6ceb4c 100644
--- a/main.tex
+++ b/main.tex
@@ -807,7 +807,7 @@ We have proposed D-Cliques, a sparse topology that recovers the convergence spee
 
 \begin{itemize}
   \item Clustering does not seem to make a difference in MNIST, even when using a higher-capacity model (LeNet) instead of a linear model. (Fig.\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies})     
-  \item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{kong2021consensus}.
+  \item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{consensus_distance}.
 \end{itemize}
  
 \end{document}