update

2023-05-14 10:57:08 +08:00
parent a37e99a057
commit 993d55076e
4 changed files with 281 additions and 1 deletions
--- a/zero-cost-nas/foresight/pruners/measures/gradsign.py
+++ b/zero-cost-nas/foresight/pruners/measures/gradsign.py
@@ -0,0 +1,76 @@
 # Copyright 2021 Samsung Electronics Co., Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #     http://www.apache.org/licenses/LICENSE-2.0
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 import torch
 from torch import nn
 import numpy as np
 from . import measure
 def get_flattened_metric(net, metric):
    grad_list = []
    for layer in net.modules():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            grad_list.append(metric(layer).flatten())
    flattened_grad = np.concatenate(grad_list)
    return flattened_grad
 def get_grad_conflict(net, inputs, targets, loss_fn):
    N = inputs.shape[0]
    batch_grad = []
    for i in range(N):
        net.zero_grad()
        outputs = net.forward(inputs[[i]])
        loss = loss_fn(outputs, targets[[i]])
        loss.backward()
        flattened_grad = get_flattened_metric(net, lambda
            l: l.weight.grad.data.clone().cpu().numpy() if l.weight.grad is not None else torch.zeros_like(
            l.weight).clone().cpu().numpy())
        batch_grad.append(flattened_grad)
    batch_grad = np.stack(batch_grad)
    direction_code = np.sign(batch_grad)
    direction_code = abs(direction_code.sum(axis=0))
    score = np.nansum(direction_code)
    return score
 def get_gradsign(input, target, net, device, loss_fn):
    s = []
    net = net.to(device)
    x, target = input, target
    # x2 = torch.clone(x)
    # x2 = x2.to(device)
    x, target = x.to(device), target.to(device)
    s.append(get_grad_conflict(net=net, inputs=x, targets=target, loss_fn=loss_fn))
    s = np.mean(s)
    return s
@measure('gradsign', bn=True)
 def compute_gradsign(net, inputs, targets, split_data=1, loss_fn=None):
    device = inputs.device
    # Compute gradients (but don't apply them)
    net.zero_grad()
    try:
        gradsign = get_gradsign(inputs, targets, net, device, loss_fn)
    except Exception as e:
        print(e)
        gradsign= np.nan
    return gradsign
--- a/zero-cost-nas/foresight/pruners/measures/ntk.py
+++ b/zero-cost-nas/foresight/pruners/measures/ntk.py
@@ -0,0 +1,94 @@
 # Copyright 2021 Samsung Electronics Co., Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #     http://www.apache.org/licenses/LICENSE-2.0
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 import torch
 import numpy as np
 from . import measure
 def recal_bn(network, inputs, targets, recalbn, device):
    for m in network.modules():
        if isinstance(m, torch.nn.BatchNorm2d):
            m.running_mean.data.fill_(0)
            m.running_var.data.fill_(0)
            m.num_batches_tracked.data.zero_()
            m.momentum = None
    network.train()
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(zip(inputs, targets)):
            if i >= recalbn: break
            inputs = inputs.cuda(device=device, non_blocking=True)
            _, _ = network(inputs)
    return network
 def get_ntk_n(inputs, targets, network, device, recalbn=0, train_mode=False, num_batch=1):
    device = device
    # if recalbn > 0:
    #     network = recal_bn(network, xloader, recalbn, device)
    #     if network_2 is not None:
    #         network_2 = recal_bn(network_2, xloader, recalbn, device)
    network.eval()
    networks = []
    networks.append(network)
    ntks = []
    # if train_mode:
    #     networks.train()
    # else:
    #     networks.eval()
    ######
    grads = [[] for _ in range(len(networks))]
    for i in range(num_batch):
        if num_batch > 0 and i >= num_batch: break
        inputs = inputs.cuda(device=device, non_blocking=True)
        for net_idx, network in enumerate(networks):
            network.zero_grad()
            # print(inputs.size())
            inputs_ = inputs.clone().cuda(device=device, non_blocking=True)
            logit = network(inputs_)
            if isinstance(logit, tuple):
                logit = logit[1]  # 201 networks: return features and logits
            for _idx in range(len(inputs_)):
                logit[_idx:_idx + 1].backward(torch.ones_like(logit[_idx:_idx + 1]), retain_graph=True)
                grad = []
                for name, W in network.named_parameters():
                    if 'weight' in name and W.grad is not None:
                        grad.append(W.grad.view(-1).detach())
                grads[net_idx].append(torch.cat(grad, -1))
                network.zero_grad()
                torch.cuda.empty_cache()
    ######
    grads = [torch.stack(_grads, 0) for _grads in grads]
    ntks = [torch.einsum('nc,mc->nm', [_grads, _grads]) for _grads in grads]
    for ntk in ntks:
        eigenvalues, _ = torch.linalg.eigh(ntk)  # ascending
        conds = np.nan_to_num((eigenvalues[0] / eigenvalues[-1]).item(), copy=True, nan=100000.0)
    return conds
@measure('ntk', bn=True)
 def compute_ntk(net, inputs, targets, split_data=1, loss_fn=None):
    device = inputs.device
    # Compute gradients (but don't apply them)
    net.zero_grad()
    try:
        conds = get_ntk_n(inputs, targets, net, device)
    except Exception as e:
        print(e)
        conds= np.nan
    return conds
--- a/zero-cost-nas/foresight/pruners/measures/zen.py
+++ b/zero-cost-nas/foresight/pruners/measures/zen.py
@@ -0,0 +1,110 @@
 # Copyright 2021 Samsung Electronics Co., Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #     http://www.apache.org/licenses/LICENSE-2.0
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 import torch
 from torch import nn
 import numpy as np
 from . import measure
 def network_weight_gaussian_init(net: nn.Module):
    with torch.no_grad():
        for n, m in net.named_modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight)
                if hasattr(m, 'bias') and m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                try:
                    nn.init.ones_(m.weight)
                    nn.init.zeros_(m.bias)
                except:
                    pass
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight)
                if hasattr(m, 'bias') and m.bias is not None:
                    nn.init.zeros_(m.bias)
            else:
                continue
    return net
 def get_zen(gpu, model, mixup_gamma=1e-2, resolution=32, batch_size=64, repeat=32,
                      fp16=False):
    info = {}
    nas_score_list = []
    if gpu is not None:
        device = torch.device(gpu)
    else:
        device = torch.device('cpu')
    if fp16:
        dtype = torch.half
    else:
        dtype = torch.float32
    with torch.no_grad():
        for repeat_count in range(repeat):
            network_weight_gaussian_init(model)
            input = torch.randn(size=[batch_size, 3, resolution, resolution], device=device, dtype=dtype)
            input2 = torch.randn(size=[batch_size, 3, resolution, resolution], device=device, dtype=dtype)
            mixup_input = input + mixup_gamma * input2
            output = model.forward_pre_GAP(input)
            mixup_output = model.forward_pre_GAP(mixup_input)
            nas_score = torch.sum(torch.abs(output - mixup_output), dim=[1, 2, 3])
            nas_score = torch.mean(nas_score)
            # compute BN scaling
            log_bn_scaling_factor = 0.0
            for m in model.modules():
                if isinstance(m, nn.BatchNorm2d):
                    try:
                        bn_scaling_factor = torch.sqrt(torch.mean(m.running_var))
                        log_bn_scaling_factor += torch.log(bn_scaling_factor)
                    except:
                        pass
                pass
            pass
            nas_score = torch.log(nas_score) + log_bn_scaling_factor
            nas_score_list.append(float(nas_score))
    std_nas_score = np.std(nas_score_list)
    avg_precision = 1.96 * std_nas_score / np.sqrt(len(nas_score_list))
    avg_nas_score = np.mean(nas_score_list)
    info = float(avg_nas_score)
    return info
@measure('zen', bn=True)
 def compute_zen(net, inputs, targets, split_data=1, loss_fn=None):
    device = inputs.device
    # Compute gradients (but don't apply them)
    net.zero_grad()
    try:
        zen = get_zen(device,net)
    except Exception as e:
        print(e)
        zen= np.nan
    return zen
--- a/zero-cost-nas/foresight/pruners/predictive.py
+++ b/zero-cost-nas/foresight/pruners/predictive.py
@@ -108,7 +108,7 @@ def find_measures(net_orig,                  # neural network
    measures = {}
    for k,v in measures_arr.items():
-        if k in ['jacob_cov', 'meco', 'zico']:
+        if k in ['jacob_cov', 'var', 'cor', 'norm', 'meco', 'zico', 'ntk', 'gradsign', 'zen']:
            measures[k] = v
        else:
            measures[k] = sum_arr(v)