Skip to content
Snippets Groups Projects
Commit c5bcd972 authored by aurelien.bellet's avatar aurelien.bellet
Browse files

small fixes in refs

parent ed8898db
No related branches found
No related tags found
No related merge requests found
......@@ -605,12 +605,11 @@ pages={211-252}
year={2010}
}
@incollection{lian2017d-psgd,
@inproceedings{lian2017d-psgd,
title = {{Can Decentralized Algorithms Outperform Centralized Algorithms? A Case Study for Decentralized Parallel Stochastic Gradient Descent}},
author = {Lian, Xiangru and Zhang, Ce and Zhang, Huan and Hsieh, Cho-Jui and Zhang, Wei and Liu, Ji},
booktitle = {Advances in Neural Information Processing Systems},
year = {2017},
publisher = {Curran Associates, Inc.},
booktitle = {NIPS},
year = {2017}
}
@article{nedic2016sgp,
......@@ -674,8 +673,7 @@ pages={211-252}
title = {{$D^2$: Decentralized Training over Decentralized Data}},
author = {Tang, Hanlin and Lian, Xiangru and Yan, Ming and Zhang, Ce and Liu, Ji},
booktitle = {ICML},
year = {2018},
publisher = {PMLR}
year = {2018}
}
@article{xiao2007distributed,
......@@ -721,7 +719,7 @@ pages={211-252}
title={Small worlds: The dynamics of networks between order and randomness},
author={Watts, Duncan J},
year={2000},
publisher={Princeton University Press Princeton}
publisher={Princeton University Press}
}
% Random Model Walk !!!
......@@ -790,15 +788,6 @@ pages={211-252}
primaryClass={cs.CV}
}
@misc{kong2021consensus,
title={Consensus Control for Decentralized Deep Learning},
author={Lingjing Kong and Tao Lin and Anastasia Koloskova and Martin Jaggi and Sebastian U. Stich},
year={2021},
eprint={2102.04828},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@article{krizhevsky2009learning,
title={{Learning Multiple Layers of Features from Tiny Images}},
author={Krizhevsky, Alex},
......@@ -832,8 +821,7 @@ pages={211-252}
title = {On the importance of initialization and momentum in deep learning},
author = {Ilya Sutskever and James Martens and George Dahl and Geoffrey Hinton},
booktitle = {ICML},
year = {2013},
publisher = {PMLR}
year = {2013}
}
@article{lecun1998gradient,
......
......@@ -807,7 +807,7 @@ We have proposed D-Cliques, a sparse topology that recovers the convergence spee
\begin{itemize}
\item Clustering does not seem to make a difference in MNIST, even when using a higher-capacity model (LeNet) instead of a linear model. (Fig.\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies})
\item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{kong2021consensus}.
\item Except for the random 10 topology, convergence speed seems to be correlated with scattering in CIFAR-10 with LeNet model (Fig.\ref{fig:d-cliques-cifar10-linear-comparison-to-non-clustered-topologies}). There is also more difference between topologies both in convergence speed and scattering than for MNIST (Fig.~\ref{fig:d-cliques-mnist-comparison-to-non-clustered-topologies}). Scattering computed similar to Consensus Control for Decentralized Deep Learning~\cite{consensus_distance}.
\end{itemize}
\end{document}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment