diff --git a/exps/LFNA/basic-same.py b/exps/LFNA/basic-same.py index 99a7861..b565f4d 100644 --- a/exps/LFNA/basic-same.py +++ b/exps/LFNA/basic-same.py @@ -58,6 +58,7 @@ def main(args): # build model model = get_model(**model_kwargs) print(model) + model.analyze_weights() # build optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr, amsgrad=True) criterion = torch.nn.MSELoss() @@ -168,7 +169,7 @@ if __name__ == "__main__": parser.add_argument( "--epochs", type=int, - default=1000, + default=300, help="The total number of epochs.", ) parser.add_argument( diff --git a/lib/models/xcore.py b/lib/models/xcore.py index 08c03a6..143278c 100644 --- a/lib/models/xcore.py +++ b/lib/models/xcore.py @@ -40,13 +40,10 @@ def get_model(config: Dict[Text, Any], **kwargs): norm_cls = super_name2norm[kwargs["norm_cls"]] sub_layers, last_dim = [], kwargs["input_dim"] for i, hidden_dim in enumerate(kwargs["hidden_dims"]): - sub_layers.extend( - [ - norm_cls(last_dim, elementwise_affine=False), - SuperLinear(last_dim, hidden_dim), - act_cls(), - ] - ) + if last_dim > 1: + sub_layers.append(norm_cls(last_dim, elementwise_affine=False)) + sub_layers.append(SuperLinear(last_dim, hidden_dim)) + sub_layers.append(act_cls()) last_dim = hidden_dim sub_layers.append(SuperLinear(last_dim, kwargs["output_dim"])) model = SuperSequential(*sub_layers) diff --git a/lib/xlayers/super_module.py b/lib/xlayers/super_module.py index 58a6993..c99be99 100644 --- a/lib/xlayers/super_module.py +++ b/lib/xlayers/super_module.py @@ -66,6 +66,15 @@ class SuperModule(abc.ABC, nn.Module): container.append(name, buf, False) return container + def analyze_weights(self): + with torch.no_grad(): + for name, param in self.named_parameters(): + shapestr = "[{:10s}] shape={:}".format(name, list(param.shape)) + finalstr = shapestr + "{:.2f} +- {:.2f}".format( + param.mean(), param.std() + ) + print(finalstr) + @property def abstract_search_space(self): raise NotImplementedError