95 lines
3.4 KiB
Python
95 lines
3.4 KiB
Python
# Copyright 2021 Samsung Electronics Co., Ltd.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# =============================================================================
|
|
|
|
import torch
|
|
import numpy as np
|
|
|
|
from . import measure
|
|
|
|
|
|
def recal_bn(network, inputs, targets, recalbn, device):
|
|
for m in network.modules():
|
|
if isinstance(m, torch.nn.BatchNorm2d):
|
|
m.running_mean.data.fill_(0)
|
|
m.running_var.data.fill_(0)
|
|
m.num_batches_tracked.data.zero_()
|
|
m.momentum = None
|
|
network.train()
|
|
with torch.no_grad():
|
|
for i, (inputs, targets) in enumerate(zip(inputs, targets)):
|
|
if i >= recalbn: break
|
|
inputs = inputs.cuda(device=device, non_blocking=True)
|
|
_, _ = network(inputs)
|
|
return network
|
|
|
|
|
|
def get_ntk_n(inputs, targets, network, device, recalbn=0, train_mode=False, num_batch=1):
|
|
device = device
|
|
# if recalbn > 0:
|
|
# network = recal_bn(network, xloader, recalbn, device)
|
|
# if network_2 is not None:
|
|
# network_2 = recal_bn(network_2, xloader, recalbn, device)
|
|
network.eval()
|
|
networks = []
|
|
networks.append(network)
|
|
ntks = []
|
|
# if train_mode:
|
|
# networks.train()
|
|
# else:
|
|
# networks.eval()
|
|
######
|
|
grads = [[] for _ in range(len(networks))]
|
|
for i in range(num_batch):
|
|
if num_batch > 0 and i >= num_batch: break
|
|
inputs = inputs.cuda(device=device, non_blocking=True)
|
|
for net_idx, network in enumerate(networks):
|
|
network.zero_grad()
|
|
# print(inputs.size())
|
|
inputs_ = inputs.clone().cuda(device=device, non_blocking=True)
|
|
logit = network(inputs_)
|
|
if isinstance(logit, tuple):
|
|
logit = logit[1] # 201 networks: return features and logits
|
|
for _idx in range(len(inputs_)):
|
|
logit[_idx:_idx + 1].backward(torch.ones_like(logit[_idx:_idx + 1]), retain_graph=True)
|
|
grad = []
|
|
for name, W in network.named_parameters():
|
|
if 'weight' in name and W.grad is not None:
|
|
grad.append(W.grad.view(-1).detach())
|
|
grads[net_idx].append(torch.cat(grad, -1))
|
|
network.zero_grad()
|
|
torch.cuda.empty_cache()
|
|
######
|
|
grads = [torch.stack(_grads, 0) for _grads in grads]
|
|
ntks = [torch.einsum('nc,mc->nm', [_grads, _grads]) for _grads in grads]
|
|
for ntk in ntks:
|
|
eigenvalues, _ = torch.linalg.eigh(ntk) # ascending
|
|
conds = np.nan_to_num((eigenvalues[-1] / eigenvalues[0]).item(), copy=True, nan=100000.0)
|
|
return conds
|
|
|
|
@measure('ntk', bn=True)
|
|
def compute_ntk(net, inputs, targets, split_data=1, loss_fn=None):
|
|
device = inputs.device
|
|
# Compute gradients (but don't apply them)
|
|
net.zero_grad()
|
|
|
|
|
|
try:
|
|
conds = get_ntk_n(inputs, targets, net, device)
|
|
except Exception as e:
|
|
print(e)
|
|
conds= np.nan
|
|
|
|
return conds
|