Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
@INPROCEEDINGS{Vanhaesebrouck2017a,
author = {Paul Vanhaesebrouck and Aur\'elien Bellet and Marc Tommasi},
title = {{D}ecentralized {C}ollaborative {L}earning of {P}ersonalized {M}odels over {N}etworks},
booktitle = {{AISTATS}},
year = {2017}
}
@INPROCEEDINGS{Zantedeschi2020a,
author = {Valentina Zantedeschi and Aur\'elien Bellet and Marc Tommasi},
title = {{F}ully {D}ecentralized {J}oint {L}earning of {P}ersonalized
{M}odels and {C}ollaboration {G}raphs},
booktitle = {{AISTATS}},
year = {2020}
}
@inproceedings{smith2017federated,
title={{Federated Multi-Task Learning}},
author={Smith, Virginia and Chiang, Chao-Kai and Sanjabi, Maziar and Talwalkar, Ameet S.},
booktitle={NIPS},
year={2017}
}
@inproceedings{perso_fl_mean,
title={{Lower Bounds and Optimal Algorithms for Personalized Federated Learning}},
author={Filip Hanzely and Slavomír Hanzely and Samuel Horváth and Peter Richtarik},
booktitle={NeurIPS},
year={2020}
}
@inproceedings{maml,
title={{Personalized Federated Learning with Theoretical Guarantees: A Model-Agnostic Meta-Learning Approach}},
author={Alireza Fallah and Aryan Mokhtari and Asuman Ozdaglar},
booktitle={NeurIPS},
year={2020}
}
@inproceedings{moreau,
title={{Personalized Federated Learning with Moreau Envelopes}},
author={Canh T. Dinh and Nguyen H. Tran and Tuan Dung Nguyen},
booktitle={NeurIPS},
year={2020}
}
@techreport{momentum_noniid,
title={{Quasi-Global Momentum: Accelerating Decentralized Deep Learning on Heterogeneous Data}},
author={Tao Lin and Sai Praneeth Karimireddy and Sebastian U. Stich and Martin Jaggi},
year={2021},
institution = {arXiv:2102.04761}
}
@techreport{tornado,
title={TornadoAggregate: Accurate and Scalable Federated Learning via the Ring-Based Architecture},
author={Jin-Woo Lee and Jaehoon Oh and Sungsu Lim and Se-Young Yun and Jae-Gil Lee},
year={2020},
institution = {arXiv:2012.03214}
}
@techreport{cross_gradient,
title={{Cross-Gradient Aggregation for Decentralized Learning from Non-IID data}},
author={Yasaman Esfandiari and Sin Yong Tan and Zhanhong Jiang and Aditya Balu and Ethan Herron and Chinmay Hegde and Soumik Sarkar},
year={2021},
institution = {arXiv:2103.02051}
}
@techreport{consensus_distance,
title={{Consensus Control for Decentralized Deep Learning}},
author={Lingjing Kong and Tao Lin and Anastasia Koloskova and Martin Jaggi and Sebastian U. Stich},
year={2021},
institution = {arXiv:2102.04828}
}
@INPROCEEDINGS{Colin2016a,
author = {Igor Colin and Aur\'elien Bellet and Joseph Salmon and St\'ephan Cl\'emen\c{c}on},
title = {{G}ossip {D}ual {A}veraging for {D}ecentralized {O}ptimization of {P}airwise {F}unctions},
booktitle = {{ICML}},
year = {2016}
}
@inproceedings{scaffold,
title={{SCAFFOLD: Stochastic Controlled Averaging for On-Device Federated Learning}},
author={Sai Praneeth Karimireddy and Satyen Kale and Mehryar Mohri and Sashank J. Reddi and Sebastian U. Stich and Ananda Theertha Suresh},
booktitle={ICML},
year={2020}
}
@inproceedings{marfoq,
title={{Throughput-Optimal Topology Design for Cross-Silo Federated Learning}},
author={Othmane Marfoq and Chuan Xu and Giovanni Neglia and Richard Vidal},
booktitle={NeurIPS},
year={2020}
}
@inproceedings{Lian2018,
Author = {Xiangru Lian and Wei Zhang and Ce Zhang and Ji Liu},
Booktitle = {ICML},
Title = {{Asynchronous Decentralized Parallel Stochastic Gradient Descent}},
Year = {2018}}
@inproceedings{fedprox,
author = {Tian Li and Anit Kumar Sahu and Manzil Zaheer and Maziar Sanjabi and Ameet Talwalkar and Virginia Smith},
title = {{Federated Optimization in Heterogeneous Networks}},
booktitle = {MLSys},
year = {2020}
}
@inproceedings{quagmire,
title={{The Non-IID Data Quagmire of Decentralized Machine Learning}},
author={Kevin Hsieh and Amar Phanishayee and Onur Mutlu and Phillip B. Gibbons},
booktitle={ICML},
year={2020}
}
@inproceedings{mcmahan2016communication,
title={Communication-efficient learning of deep networks from decentralized data},
author={McMahan, H. Brendan and Moore, Eider and Ramage, Daniel and Hampson, Seth and Ag\"uera y Arcas, Blaise},
booktitle={AISTATS},
year={2017}
}
@inproceedings{neglia2020,
title={Decentralized gradient methods: does topology matter?},
author={Giovanni Neglia and Chuan Xu and Don Towsley and Gianmarco Calbi},
booktitle={AISTATS},
year={2020}
}
@techreport{amp_dec,
title={{Privacy Amplification by Decentralization}},
author={Edwige Cyffers and Aurélien Bellet},
year={2020},
institution = {arXiv:2012.05326}
}
@article{Duchi2012a,
Author = {John C. Duchi and Alekh Agarwal and Martin J. Wainwright},
Date-Modified = {2014-10-30 15:23:27 +0000},
Journal = {{IEEE} {T}ransactions on {A}utomatic {C}ontrol},
Keywords = {optimization, distributed},
Number = {3},
Owner = {aurelien},
Pages = {592--606},
Timestamp = {2013.09.16},
Title = {{D}ual {A}veraging for {D}istributed {O}ptimization: {C}onvergence {A}nalysis and {N}etwork {S}caling},
Volume = {57},
Year = {2012}}
@article{jelasity,
Author = {István Hegedüs and Gábor Danner and Márk Jelasity},
Journal = {Journal of Parallel and Distributed Computing},
Pages = {109--124},
Title = {{Decentralized learning works: An empirical comparison of gossip learning and federated learning}},
Volume = {148},
Year = {2021}}
@article{Nedic18,
Author = {Angelia Nedić and Alex Olshevsky and Michael G. Rabbat},
Journal = {Proceedings of the IEEE},
Number = {5},
Pages = {953--976},
Title = {{Network Topology and Communication-Computation Tradeoffs in Decentralized Optimization}},
Volume = {106},
Year = {2018}}
@techreport{kairouz2019advances,
title={{Advances and Open Problems in Federated Learning}},
author={Peter Kairouz and others},
year={2019},
institution = {arXiv:1912.04977}
}
@article{tibshirani1996regression,
title={Regression shrinkage and selection via the lasso},
author={Tibshirani, Robert},
journal={Journal of the Royal Statistical Society. Series B (Methodological)},
pages={267--288},
year={1996},
publisher={JSTOR}
}
@article{candes2009exact,
title={Exact matrix completion via convex optimization},
author={Cand{\`e}s, Emmanuel J and Recht, Benjamin},
journal={Foundations of Computational mathematics},
volume={9},
number={6},
pages={717--772},
year={2009},
publisher={Springer}
}
@article{candes2010power,
title={The power of convex relaxation: Near-optimal matrix completion},
author={Cand{\`e}s, Emmanuel J and Tao, Terence},
journal={IEEE Transactions on Information Theory},
volume={56},
number={5},
pages={2053--2080},
year={2010},
publisher={IEEE}
}
@article{recht2011simpler,
title={A simpler approach to matrix completion},
author={Recht, Benjamin},
journal={Journal of Machine Learning Research},
volume={12},
number={Dec},
pages={3413--3430},
year={2011}
}
@article{gross2010quantum,
title={Quantum state tomography via compressed sensing},
author={Gross, David and Liu, Yi-Kai and Flammia, Steven T and Becker, Stephen and Eisert, Jens},
journal={Physical review letters},
volume={105},
number={15},
pages={150401},
year={2010},
publisher={APS}
}
@article{gross2011recovering,
title={Recovering low-rank matrices from few coefficients in any basis},
author={Gross, David},
journal={IEEE Transactions on Information Theory},
volume={57},
number={3},
pages={1548--1566},
year={2011},
publisher={IEEE}
}
@article{koltchinskii2011nuclear,
title={Nuclear-norm penalization and optimal rates for noisy low-rank matrix completion},
author={Koltchinskii, Vladimir and Lounici, Karim and Tsybakov, Alexandre B},
journal={The Annals of Statistics},
pages={2302--2329},
year={2011},
publisher={JSTOR}
}
@article{bhojanapalli2016global,
title={Global Optimality of Local Search for Low Rank Matrix Recovery},
author={Bhojanapalli, Srinadh and Neyshabur, Behnam and Srebro, Nathan},
journal={arXiv preprint arXiv:1605.07221},
year={2016}
}
@article{koren2009matrix,
title={Matrix factorization techniques for recommender systems},
author={Koren, Yehuda and Bell, Robert and Volinsky, Chris and others},
journal={Computer},
volume={42},
number={8},
pages={30--37},
year={2009},
publisher={Institute of Electrical and Electronics Engineers, Inc., 3 Park Avenue, 17 th Fl New York NY 10016-5997 United States}
}
@article{candes2015phase,
title={Phase retrieval via matrix completion},
author={Candes, Emmanuel J and Eldar, Yonina C and Strohmer, Thomas and Voroninski, Vladislav},
journal={SIAM review},
volume={57},
number={2},
pages={225--251},
year={2015},
publisher={SIAM}
}
@inproceedings{ji2010robust,
title={Robust video denoising using low rank matrix completion.},
author={Ji, Hui and Liu, Chaoqiang and Shen, Zuowei and Xu, Yuhong},
booktitle={CVPR},
pages={1791--1798},
year={2010},
organization={Citeseer}
}
@inproceedings{wu2010robust,
title={Robust photometric stereo via low-rank matrix completion and recovery},
author={Wu, Lun and Ganesh, Arvind and Shi, Boxin and Matsushita, Yasuyuki and Wang, Yongtian and Ma, Yi},
booktitle={Asian Conference on Computer Vision},
pages={703--717},
year={2010},
organization={Springer}
}
@inproceedings{goldberg2010transduction,
title={Transduction with matrix completion: Three birds with one stone},
author={Goldberg, Andrew and Recht, Ben and Xu, Junming and Nowak, Robert and Zhu, Xiaojin},
booktitle={Advances in neural information processing systems},
pages={757--765},
year={2010}
}
@inproceedings{xie2014learning,
title={Learning from the past: intelligent on-line weather monitoring based on matrix completion},
author={Xie, Kun and Wang, Lele and Wang, Xin and Wen, Jigang and Xie, Gaogang},
booktitle={Distributed Computing Systems (ICDCS), 2014 IEEE 34th International Conference on},
pages={176--185},
year={2014},
organization={IEEE}
}
@inproceedings{cabral2013unifying,
title={Unifying nuclear norm and bilinear factorization approaches for low-rank matrix decomposition},
author={Cabral, Ricardo and De La Torre, Fernando and Costeira, Jo{\~a}o P and Bernardino, Alexandre},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={2488--2495},
year={2013}
}
@inproceedings{cabral2011matrix,
title={Matrix Completion for Multi-label Image Classification.},
author={Cabral, Ricardo Silveira and De la Torre, Fernando and Costeira, Jo{\~a}o Paulo and Bernardino, Alexandre},
booktitle={NIPS},
volume={201},
number={1},
pages={2},
year={2011}
}
@inproceedings{zhou2012multi,
title={Multi-task learning: Theory, algorithms, and applications},
author={Zhou, Jiayu and Chen, Jianhui and Ye, Jieping},
booktitle={U RL https://www. siam. org/meetings/sdm12/zhou\_chen\_ye. pdf},
year={2012}
}
@article{toh1999sdpt3,
title={SDPT3—a MATLAB software package for semidefinite programming, version 1.3},
author={Toh, Kim-Chuan and Todd, Michael J and T{\"u}t{\"u}nc{\"u}, Reha H},
journal={Optimization methods and software},
volume={11},
number={1-4},
pages={545--581},
year={1999},
publisher={Taylor \& Francis}
}
@article{sturm1999using,
title={Using SeDuMi 1.02, a MATLAB toolbox for optimization over symmetric cones},
author={Sturm, Jos F},
journal={Optimization methods and software},
volume={11},
number={1-4},
pages={625--653},
year={1999},
publisher={Taylor \& Francis}
}
@article{cai2010singular,
title={A singular value thresholding algorithm for matrix completion},
author={Cai, Jian-Feng and Cand{\`e}s, Emmanuel J and Shen, Zuowei},
journal={SIAM Journal on Optimization},
volume={20},
number={4},
pages={1956--1982},
year={2010},
publisher={SIAM}
}
@article{ma2011fixed,
title={Fixed point and Bregman iterative methods for matrix rank minimization},
author={Ma, Shiqian and Goldfarb, Donald and Chen, Lifeng},
journal={Mathematical Programming},
volume={128},
number={1-2},
pages={321--353},
year={2011},
publisher={Springer}
}
@inproceedings{hazan2008sparse,
title={Sparse approximate solutions to semidefinite programs},
author={Hazan, Elad},
booktitle={Latin American Symposium on Theoretical Informatics},
pages={306--316},
year={2008},
organization={Springer}
}
@inproceedings{jaggi2010simple,
title={A simple algorithm for nuclear norm regularized problems},
author={Jaggi, Martin and Sulovsk, Marek and others},
booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)},
pages={471--478},
year={2010}
}
@article{bellet2014distributed,
title={Distributed Frank-Wolfe algorithm: A unified framework for communication-efficient sparse learning},
author={Bellet, Aur{\'e}lien and Liang, Yingyu and Garakani, Alireza Bagheri and Balcan, Maria-Florina and Sha, Fei},
journal={CoRR, abs/1404.2644},
year={2014},
publisher={Citeseer}
}
@inproceedings{wang2016parallel,
title={Parallel and distributed block-coordinate Frank-Wolfe algorithms},
author={Wang, Yu-Xiang and Sadhanala, Veeranjaneyulu and Dai, Wei and Neiswanger, Willie and Sra, Suvrit and Xing, Eric},
booktitle={International Conference on Machine Learning},
pages={1548--1557},
year={2016}
}
@article{lacoste2012block,
title={Block-coordinate Frank-Wolfe optimization for structural SVMs},
author={Lacoste-Julien, Simon and Jaggi, Martin and Schmidt, Mark and Pletscher, Patrick},
journal={arXiv preprint arXiv:1207.4747},
year={2012}
}
@article{frank1956algorithm,
title={An algorithm for quadratic programming},
author={Frank, Marguerite and Wolfe, Philip},
journal={Naval research logistics quarterly},
volume={3},
number={1-2},
pages={95--110},
year={1956},
publisher={Wiley Online Library}
}
@article{clarkson2010coresets,
title={Coresets, sparse greedy approximation, and the Frank-Wolfe algorithm},
author={Clarkson, Kenneth L},
journal={ACM Transactions on Algorithms (TALG)},
volume={6},
number={4},
pages={63},
year={2010},
publisher={ACM}
}
@inproceedings{jaggi2013revisiting,
title={Revisiting Frank-Wolfe: Projection-Free Sparse Convex Optimization.},
author={Jaggi, Martin},
booktitle={ICML (1)},
pages={427--435},
year={2013}
}
@article{pong2010trace,
title={Trace norm regularization: Reformulations, algorithms, and multi-task learning},
author={Pong, Ting Kei and Tseng, Paul and Ji, Shuiwang and Ye, Jieping},
journal={SIAM Journal on Optimization},
volume={20},
number={6},
pages={3465--3489},
year={2010},
publisher={SIAM}
}
@inproceedings{harchaoui2012large,
title={Large-scale image classification with trace-norm regularization},
author={Harchaoui, Zaid and Douze, Matthijs and Paulin, Mattis and Dudik, Miroslav and Malick, J{\'e}r{\^o}me},
booktitle={Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on},
pages={3386--3393},
year={2012},
organization={IEEE}
}
@article{ILSVRC15,
Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = {{ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal = {International Journal of Computer Vision (IJCV)},
doi = {10.1007/s11263-015-0816-y},
volume={115},
number={3},
pages={211-252}
}
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={770--778},
year={2016}
}
@misc{chollet2015keras,
title={Keras},
author={Chollet, Fran\c{c}ois},
year={2015},
publisher={GitHub},
howpublished={\url{https://github.com/fchollet/keras}},
}
@article{bach2008consistency,
title={Consistency of trace norm minimization},
author={Bach, Francis R},
journal={Journal of Machine Learning Research},
volume={9},
number={Jun},
pages={1019--1048},
year={2008}
}
@inproceedings{Zaharia:2010:SCC:1863103.1863113,
author = {Zaharia, Matei and Chowdhury, Mosharaf and Franklin, Michael J. and Shenker, Scott and Stoica, Ion},
title = {Spark: Cluster Computing with Working Sets},
booktitle = {Proceedings of the 2Nd USENIX Conference on Hot Topics in Cloud Computing},
series = {HotCloud'10},
year = {2010},
location = {Boston, MA},
pages = {10--10},
numpages = {1},
url = {http://dl.acm.org/citation.cfm?id=1863103.1863113},
acmid = {1863113},
publisher = {USENIX Association},
address = {Berkeley, CA, USA},
}
@misc{reduce,
publisher={stackoverflow},
howpublished={\url{http://stackoverflow.com/questions/37422222/how-to-force-spark-to-perform-reduction-locally}},
}
@misc{ht,
publisher={hortonworks},
howpublished={\url{https://community.hortonworks.com/questions/52561/spark-and-hyper-threading.html}},
}
@article{wai2017decentralized,
title={Decentralized Frank-Wolfe Algorithm for Convex and Non-convex Problems},
author={Wai, Hoi-To and Lafond, Jean and Scaglione, Anna and Moulines, Eric},
journal={IEEE Transactions on Automatic Control},
year={2017},
publisher={IEEE}
}
@inproceedings{mcdonald2009efficient,
title={Efficient large-scale distributed training of conditional maximum entropy models},
author={Mcdonald, Ryan and Mohri, Mehryar and Silberman, Nathan and Walker, Dan and Mann, Gideon S},
booktitle={Advances in Neural Information Processing Systems},
pages={1231--1239},
year={2009}
}
@inproceedings{zinkevich2010parallelized,
title={Parallelized stochastic gradient descent},
author={Zinkevich, Martin and Weimer, Markus and Li, Lihong and Smola, Alex J},
booktitle={Advances in neural information processing systems},
pages={2595--2603},
year={2010}
}
@article{dean2008mapreduce,
title={MapReduce: simplified data processing on large clusters},
author={Dean, Jeffrey and Ghemawat, Sanjay},
journal={Communications of the ACM},
volume={51},
number={1},
pages={107--113},
year={2008},
publisher={ACM}
}
@article{kuczynski1992estimating,
title={Estimating the largest eigenvalue by the power and Lanczos algorithms with a random start},
author={Kuczy{\'n}ski, J and Wo{\'z}niakowski, H},
journal={SIAM journal on matrix analysis and applications},
volume={13},
number={4},
pages={1094--1122},
year={1992},
publisher={SIAM}
}
@inproceedings{lacoste2015global,
title={On the global linear convergence of Frank-Wolfe optimization variants},
author={Lacoste-Julien, Simon and Jaggi, Martin},
booktitle={Advances in Neural Information Processing Systems},
pages={496--504},
year={2015}
}
@inproceedings{garber2015faster,
title={Faster Rates for the Frank-Wolfe Method over Strongly-Convex Sets.},
author={Garber, Dan and Hazan, Elad},
booktitle={ICML},
pages={541--549},
year={2015}
}
@article{wolfe1970convergence,
title={Convergence theory in nonlinear programming},
author={Wolfe, Philip},
journal={Integer and nonlinear programming},
pages={1--36},
year={1970},
publisher={North-Holland Amsterdam}
}
@article{guelat1986some,
title={Some comments on Wolfe's ‘away step’},
author={Gu{\'e}lat, Jacques and Marcotte, Patrice},
journal={Mathematical Programming},
volume={35},
number={1},
pages={110--119},
year={1986},
publisher={Springer}
}
@article{beck2004conditional,
title={A conditional gradient method with linear rate of convergence for solving convex linear systems},
author={Beck, Amir and Teboulle, Marc},
journal={Mathematical Methods of Operations Research},
volume={59},
number={2},
pages={235--247},
year={2004},
publisher={Springer}
}
@article{pena2016neumann,
title={On the von Neumann and Frank--Wolfe Algorithms with Away Steps},
author={Pena, Javier and Rodr{\'\i}guez, Daniel and Soheili, Negar},
journal={SIAM Journal on Optimization},
volume={26},
number={1},
pages={499--512},
year={2016},
publisher={SIAM}
}
@article{damla2008linear,
title={Linear convergence of a modified Frank--Wolfe algorithm for computing minimum-volume enclosing ellipsoids},
author={Damla Ahipasaoglu, S and Sun, Peng and Todd, Michael J},
journal={Optimisation Methods and Software},
volume={23},
number={1},
pages={5--19},
year={2008},
publisher={Taylor \& Francis}
}
@article{nanculef2014novel,
title={A novel Frank--Wolfe algorithm. Analysis and applications to large-scale SVM training},
author={{\~N}anculef, Ricardo and Frandi, Emanuele and Sartori, Claudio and Allende, H{\'e}ctor},
journal={Information Sciences},
volume={285},
pages={66--99},
year={2014},
publisher={Elsevier}
}
@inproceedings{liu2017approximate,
title={Approximate Conditional Gradient Descent on Multi-Class Classification.},
author={Liu, Zhuanghua and Tsang, Ivor},
booktitle={AAAI},
pages={2301--2307},
year={2017}
}
@article{moharrerdistributing,
title={Distributing Frank-Wolfe via Map-Reduce},
author={Moharrer, Armin and Ioannidis, Stratis}
}
@inproceedings{dudik2012lifted,
title={Lifted coordinate descent for learning with trace-norm regularization},
author={Dudik, Miroslav and Harchaoui, Zaid and Malick, J{\'e}r{\^o}me},
booktitle={Artificial Intelligence and Statistics},
pages={327--336},
year={2012}
}
@article{toh2010accelerated,
title={An accelerated proximal gradient algorithm for nuclear norm regularized linear least squares problems},
author={Toh, Kim-Chuan and Yun, Sangwoon},
journal={Pacific Journal of Optimization},
volume={6},
number={615-640},
pages={15},
year={2010}
}
@inproceedings{lian2017d-psgd,
title = {{Can Decentralized Algorithms Outperform Centralized Algorithms? A Case Study for Decentralized Parallel Stochastic Gradient Descent}},
author = {Lian, Xiangru and Zhang, Ce and Zhang, Huan and Hsieh, Cho-Jui and Zhang, Wei and Liu, Ji},
booktitle = {NIPS},
year = {2017}
}
@article{nedic2016sgp,
author={{Nedić}, Angelia and {Olshevsky}, Alex},
journal={IEEE Transactions on Automatic Control},
title={Stochastic Gradient-Push for Strongly Convex Functions on Time-Varying Directed Graphs},
year={2016},
volume={61},
number={12},
pages={3936-3947},
}
@article{assran2019stochastic,
title={Stochastic Gradient Push for Distributed Deep Learning},
author={Mahmoud Assran and Nicolas Loizou and Nicolas Ballas and Michael Rabbat},
year={2019},
journal={International Conference on Machine Learning}
}
@incollection{ketkar2017introduction,
title={Introduction to pytorch},
author={Ketkar, Nikhil},
booktitle={Deep learning with python},
pages={195--208},
year={2017},
publisher={Springer}
}
@article{boyd2006randomized,
title={{Randomized Gossip Algorithms}},
author={Boyd, Stephen and Ghosh, Arpita and Prabhakar, Balaji and Shah, Devavrat},
journal={IEEE Transactions on Information Theory},
volume={52},
number={6},
pages={2508--2530},
year={2006},
publisher={IEEE},
doi={10.1109/TIT.2006.874516}
}
@article{kempe2003gossip,
title={{Gossip-based Computation of Aggregate Information}},
author={Kempe, David and Dobra, Alin and Gehrke, Johannes},
journal={Foundations of Computer Science},
year={2003},
organization={IEEE}
}
@article{nedic2018network,
title={{Network Topology and Communication-Computation Tradeoffs in Decentralized Optimization}},
author={Nedi{\'c}, Angelia and Olshevsky, Alex and Rabbat, Michael G},
journal={Proceedings of the IEEE},
volume={106},
number={5},
pages={953--976},
year={2018},
publisher={IEEE}
}
@inproceedings{tang18a,
title = {{$D^2$: Decentralized Training over Decentralized Data}},
author = {Tang, Hanlin and Lian, Xiangru and Yan, Ming and Zhang, Ce and Liu, Ji},
booktitle = {ICML},
year = {2018}
}
@article{xiao2007distributed,
title={{Distributed Average Consensus with Least-Mean-Square Deviation}},
author={Xiao, Lin and Boyd, Stephen and Kim, Seung-Jean},
journal={Journal of parallel and distributed computing},
volume={67},
number={1},
pages={33--46},
year={2007},
publisher={Elsevier}
}
@misc{mnistWebsite,
title={{The MNIST database of handwritten digits}},
author={LeCun, Yann and Cortes, Corinna and Burges, Christopher J.C.},
year={2020},
howpublished={\url{http://yann.lecun.com/exdb/mnist/}}
}
@misc{shallue2018measuring,
title={{Measuring the Effects of Data Parallelism on Neural Network Training}},
author={Christopher J. Shallue and Jaehoon Lee and Joseph Antognini and Jascha Sohl-Dickstein and Roy Frostig and George E. Dahl},
year={2018},
eprint={1811.03600},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@article{watts1998collective,
title={Collective dynamics of ‘small-world’networks},
author={Watts, Duncan J and Strogatz, Steven H},
journal={nature},
volume={393},
number={6684},
pages={440--442},
year={1998},
publisher={Nature Publishing Group}
}
@book{watts2000small,
title={Small worlds: The dynamics of networks between order and randomness},
author={Watts, Duncan J},
year={2000},
publisher={Princeton University Press}
}
% Random Model Walk !!!
@article{ormandi2013gossip,
title={Gossip learning with linear models on fully distributed data},
author={Orm{\'a}ndi, R{\'o}bert and Heged{\H{u}}s, Istv{\'a}n and Jelasity, M{\'a}rk},
journal={Concurrency and Computation: Practice and Experience},
volume={25},
number={4},
pages={556--571},
year={2013},
publisher={Wiley Online Library}
}
% Random Model Walk application to mobile computing
@phdthesis{berta2020collaborative,
title={Collaborative Mobile Gossip Learning},
author={Berta, {\'A}rp{\'a}d},
year={2020},
school={szte}
}
% Scalable SGD (fully connected topology but not complete averaging every step and asynchronous local updates)
@misc{nadiradze2020swarmsgd,
title={SwarmSGD: Scalable Decentralized SGD with Local Updates},
author={Giorgi Nadiradze and Amirmojtaba Sabour and Dan Alistarh and Aditya Sharma and Ilia Markov and Vitaly Aksenov},
year={2020},
eprint={1910.12308},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
% Theoretical analysis of fully decentralized sgd
% Cite this instead: https://proceedings.icml.cc/paper/2020/file/6c2e49911b68d315555d5b3eb0dd45bf-Paper.pdf
@article{koloskova2020unified,
title={A unified theory of decentralized sgd with changing topology and local updates},
author={Koloskova, Anastasia and Loizou, Nicolas and Boreiri, Sadra and Jaggi, Martin and Stich, Sebastian U},
journal={arXiv preprint arXiv:2003.10422},
year={2020}
}
@misc{gaur2020training,
title={{Training Deep Neural Networks Without Batch Normalization}},
author={Divya Gaur and Joachim Folz and Andreas Dengel},
year={2020},
eprint={2008.07970},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{you2017large,
title={Large Batch Training of Convolutional Networks},
author={Yang You and Igor Gitman and Boris Ginsburg},
year={2017},
eprint={1708.03888},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{wu2018group,
title={Group Normalization},
author={Yuxin Wu and Kaiming He},
year={2018},
eprint={1803.08494},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{krizhevsky2009learning,
title={{Learning Multiple Layers of Features from Tiny Images}},
author={Krizhevsky, Alex},
year={2009},
howpublished={\url{https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf}},
}
@article{xiao2004fast,
title={Fast linear iterations for distributed averaging},
author={Xiao, Lin and Boyd, Stephen},
journal={Systems \& Control Letters},
volume={53},
number={1},
pages={65--78},
year={2004},
publisher={Elsevier}
}
@article{jelasity2007gossip,
title={Gossip-based peer sampling},
author={Jelasity, M{\'a}rk and Voulgaris, Spyros and Guerraoui, Rachid and Kermarrec, Anne-Marie and Van Steen, Maarten},
journal={ACM Transactions on Computer Systems (TOCS)},
volume={25},
number={3},
pages={8--es},
year={2007},
publisher={ACM New York, NY, USA}
}
@InProceedings{pmlr-v28-sutskever13,
title = {On the importance of initialization and momentum in deep learning},
author = {Ilya Sutskever and James Martens and George Dahl and Geoffrey Hinton},
booktitle = {ICML},
year = {2013}
}
@article{lecun1998gradient,
title={{Gradient-based Learning Applied to Document Recognition}},
author={LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
journal={Proceedings of the IEEE},
volume={86},
number={11},
pages={2278--2324},
year={1998},
publisher={Ieee}
}
@article{stoica2003chord,
title={Chord: a scalable peer-to-peer lookup protocol for internet applications},
author={Stoica, Ion and Morris, Robert and Liben-Nowell, David and Karger, David R and Kaashoek, M Frans and Dabek, Frank and Balakrishnan, Hari},
journal={IEEE/ACM Transactions on networking},
volume={11},
number={1},
pages={17--32},
year={2003},
publisher={IEEE}
}