From 7575bd984c6e58eda1a668b38abfaf70565fa581 Mon Sep 17 00:00:00 2001 From: Rishi Sharma <rishi.sharma@epfl.ch> Date: Thu, 9 Dec 2021 08:53:09 +0100 Subject: [PATCH] Separate files shared_params --- eval/ip_addr_4Machines.json | 6 +++++ eval/ip_addr_5Machines.json | 7 +++++ src/decentralizepy/sharing/PartialModel.py | 31 +++++++++++----------- 3 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 eval/ip_addr_4Machines.json create mode 100644 eval/ip_addr_5Machines.json diff --git a/eval/ip_addr_4Machines.json b/eval/ip_addr_4Machines.json new file mode 100644 index 0000000..8b7765a --- /dev/null +++ b/eval/ip_addr_4Machines.json @@ -0,0 +1,6 @@ +{ + "0": "10.90.41.130", + "1": "10.90.41.131", + "2": "10.90.41.132", + "3": "10.90.41.133" +} \ No newline at end of file diff --git a/eval/ip_addr_5Machines.json b/eval/ip_addr_5Machines.json new file mode 100644 index 0000000..5501242 --- /dev/null +++ b/eval/ip_addr_5Machines.json @@ -0,0 +1,7 @@ +{ + "0": "10.90.41.129", + "1": "10.90.41.130", + "2": "10.90.41.131", + "3": "10.90.41.132", + "4": "10.90.41.133" +} \ No newline at end of file diff --git a/src/decentralizepy/sharing/PartialModel.py b/src/decentralizepy/sharing/PartialModel.py index 69875ad..424fdcb 100644 --- a/src/decentralizepy/sharing/PartialModel.py +++ b/src/decentralizepy/sharing/PartialModel.py @@ -1,6 +1,7 @@ import json import logging import os +from pathlib import Path import numpy import torch @@ -28,6 +29,10 @@ class PartialModel(Sharing): self.alpha = alpha self.dict_ordered = dict_ordered self.communication_round = 0 + self.folder_path = os.path.join( + self.log_dir, "shared_params/{}".format(self.rank) + ) + Path(self.folder_path).mkdir(parents=True, exist_ok=True) def extract_top_gradients(self): logging.info("Summing up gradients") @@ -48,26 +53,20 @@ class PartialModel(Sharing): with torch.no_grad(): _, G_topk = self.extract_top_gradients() - if self.communication_round: - with open( - os.path.join( - self.log_dir, "{}_shared_params.json".format(self.rank) - ), - "r", - ) as inf: - shared_params = json.load(inf) - else: - shared_params = dict() - shared_params["order"] = list(self.model.state_dict().keys()) - shapes = dict() - for k, v in self.model.state_dict().items(): - shapes[k] = list(v.shape) - shared_params["shapes"] = shapes + shared_params = dict() + shared_params["order"] = list(self.model.state_dict().keys()) + shapes = dict() + for k, v in self.model.state_dict().items(): + shapes[k] = list(v.shape) + shared_params["shapes"] = shapes shared_params[self.communication_round] = G_topk.tolist() with open( - os.path.join(self.log_dir, "{}_shared_params.json".format(self.rank)), + os.path.join( + self.folder_path, + "{}_shared_params.json".format(self.communication_round + 1), + ), "w", ) as of: json.dump(shared_params, of) -- GitLab