# Copyright 2021 Samsung Electronics Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= import torch import numpy as np from . import measure def recal_bn(network, inputs, targets, recalbn, device): for m in network.modules(): if isinstance(m, torch.nn.BatchNorm2d): m.running_mean.data.fill_(0) m.running_var.data.fill_(0) m.num_batches_tracked.data.zero_() m.momentum = None network.train() with torch.no_grad(): for i, (inputs, targets) in enumerate(zip(inputs, targets)): if i >= recalbn: break inputs = inputs.cuda(device=device, non_blocking=True) _, _ = network(inputs) return network def get_ntk_n(inputs, targets, network, device, recalbn=0, train_mode=False, num_batch=1): device = device # if recalbn > 0: # network = recal_bn(network, xloader, recalbn, device) # if network_2 is not None: # network_2 = recal_bn(network_2, xloader, recalbn, device) network.eval() networks = [] networks.append(network) ntks = [] # if train_mode: # networks.train() # else: # networks.eval() ###### grads = [[] for _ in range(len(networks))] for i in range(num_batch): if num_batch > 0 and i >= num_batch: break inputs = inputs.cuda(device=device, non_blocking=True) for net_idx, network in enumerate(networks): network.zero_grad() # print(inputs.size()) inputs_ = inputs.clone().cuda(device=device, non_blocking=True) logit = network(inputs_) if isinstance(logit, tuple): logit = logit[1] # 201 networks: return features and logits for _idx in range(len(inputs_)): logit[_idx:_idx + 1].backward(torch.ones_like(logit[_idx:_idx + 1]), retain_graph=True) grad = [] for name, W in network.named_parameters(): if 'weight' in name and W.grad is not None: grad.append(W.grad.view(-1).detach()) grads[net_idx].append(torch.cat(grad, -1)) network.zero_grad() torch.cuda.empty_cache() ###### grads = [torch.stack(_grads, 0) for _grads in grads] ntks = [torch.einsum('nc,mc->nm', [_grads, _grads]) for _grads in grads] for ntk in ntks: eigenvalues, _ = torch.linalg.eigh(ntk) # ascending conds = np.nan_to_num((eigenvalues[0] / eigenvalues[-1]).item(), copy=True, nan=100000.0) return conds @measure('ntk', bn=True) def compute_ntk(net, inputs, targets, split_data=1, loss_fn=None): device = inputs.device # Compute gradients (but don't apply them) net.zero_grad() try: conds = get_ntk_n(inputs, targets, net, device) except Exception as e: print(e) conds= np.nan return conds