update scripts-cluster
This commit is contained in:
		| @@ -7,6 +7,7 @@ import torchvision.transforms as transforms | ||||
| from utils import Cutout | ||||
| from .TieredImageNet import TieredImageNet | ||||
|  | ||||
|  | ||||
| Dataset2Class = {'cifar10' : 10, | ||||
|                  'cifar100': 100, | ||||
|                  'tiered'  : -1, | ||||
| @@ -59,11 +60,11 @@ def get_datasets(name, root, cutout): | ||||
|   else: raise TypeError("Unknow dataset : {:}".format(name)) | ||||
|  | ||||
|   if name == 'cifar10': | ||||
|     train_data = dset.CIFAR10(root, train=True, transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR10(root, train=True, transform=test_transform , download=True) | ||||
|     train_data = dset.CIFAR10(root, train=True , transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR10(root, train=False, transform=test_transform , download=True) | ||||
|   elif name == 'cifar100': | ||||
|     train_data = dset.CIFAR100(root, train=True, transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR100(root, train=True, transform=test_transform , download=True) | ||||
|     train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True) | ||||
|     test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True) | ||||
|   elif name == 'imagenet-1k' or name == 'imagenet-100': | ||||
|     train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform) | ||||
|     test_data  = dset.ImageFolder(osp.join(root, 'val'), train_transform) | ||||
|   | ||||
| @@ -1,12 +1,5 @@ | ||||
| from .model_search    import Network | ||||
| from .model_search_v1 import NetworkV1 | ||||
| from .model_search_f1 import NetworkF1 | ||||
| # acceleration model | ||||
| from .model_search_f1_acc2 import NetworkFACC1 | ||||
| from .model_search_acc2 import NetworkACC2 | ||||
| from .model_search_v3 import NetworkV3 | ||||
| from .model_search_v4 import NetworkV4 | ||||
| from .model_search_v5 import NetworkV5 | ||||
| from .CifarNet import NetworkCIFAR | ||||
| from .ImageNet import NetworkImageNet | ||||
|  | ||||
|   | ||||
| @@ -128,7 +128,7 @@ class Transition(nn.Module): | ||||
|  | ||||
|     self.ops2 = nn.ModuleList( | ||||
|                   [nn.Sequential( | ||||
|                       nn.MaxPool2d(3, stride=1, padding=1), | ||||
|                       nn.MaxPool2d(3, stride=2, padding=1), | ||||
|                       nn.BatchNorm2d(C, affine=True)), | ||||
|                    nn.Sequential( | ||||
|                       nn.MaxPool2d(3, stride=2, padding=1), | ||||
| @@ -144,7 +144,8 @@ class Transition(nn.Module): | ||||
|     if self.training and drop_prob > 0.: | ||||
|       X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob) | ||||
|  | ||||
|     X2 = self.ops2[0] (X0+X1) | ||||
|     #X2 = self.ops2[0] (X0+X1) | ||||
|     X2 = self.ops2[0] (s0) | ||||
|     X3 = self.ops2[1] (s1) | ||||
|     if self.training and drop_prob > 0.: | ||||
|       X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob) | ||||
|   | ||||
| @@ -1,180 +0,0 @@ | ||||
| # gumbel softmax | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
|  | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights, cpu_weights): | ||||
|     use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights]) | ||||
|     if use_sum > 3: | ||||
|       return sum(w * op(x) for w, op in zip(weights, self._ops)) | ||||
|     else: | ||||
|       clist = [] | ||||
|       for j, cpu_weight in enumerate(cpu_weights): | ||||
|         if abs(cpu_weight) > 1e-10: | ||||
|           clist.append( weights[j] * self._ops[j](x) ) | ||||
|       assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights) | ||||
|       return sum(clist) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     cpu_weights = weights.tolist() | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       for j, h in enumerate(states): | ||||
|         x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkACC2(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkACC2, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|     self.tau        = 5 | ||||
|     self.use_gumbel = True | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_gumbel(self, use_gumbel): | ||||
|     self.use_gumbel = use_gumbel | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal, self.alphas_reduce] | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True) | ||||
|         else               : weights = F.softmax(self.alphas_reduce, dim=-1) | ||||
|       else: | ||||
|         if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True) | ||||
|         else               : weights = F.softmax(self.alphas_normal, dim=-1) | ||||
|  | ||||
|       s0, s1 = s1, cell(s0, s1, weights) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=gene_reduce, reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,167 +0,0 @@ | ||||
| # share parameters | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
|  | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .construct_utils import Transition | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights): | ||||
|     return sum(w * op(x) for w, op in zip(weights, self._ops)) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       for j, h in enumerate(states): | ||||
|         x = self._ops[offset+j](h, weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkF1(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkF1, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       if reduction: | ||||
|         cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier) | ||||
|       else: | ||||
|         cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     #nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     return -1 | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return -1 | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal]  | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         s0, s1 = s1, cell(s0, s1) | ||||
|       else: | ||||
|         weights = F.softmax(self.alphas_normal, dim=-1) | ||||
|         s0, s1 = s1, cell(s0, s1, weights) | ||||
|       #print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size())) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       #gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=None       , reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,183 +0,0 @@ | ||||
| # share parameters | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
|  | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .construct_utils import Transition | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights, cpu_weights): | ||||
|     use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights]) | ||||
|     if use_sum > 3: | ||||
|       return sum(w * op(x) for w, op in zip(weights, self._ops)) | ||||
|     else: | ||||
|       clist = [] | ||||
|       for j, cpu_weight in enumerate(cpu_weights): | ||||
|         if abs(cpu_weight) > 1e-10: | ||||
|           clist.append( weights[j] * self._ops[j](x) ) | ||||
|       assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights) | ||||
|       return sum(clist) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     cpu_weights = weights.tolist() | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       for j, h in enumerate(states): | ||||
|         x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkFACC1(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkFACC1, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|     self.tau     = 5 | ||||
|     self.use_gumbel = True | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       if reduction: | ||||
|         cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier) | ||||
|       else: | ||||
|         cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     #nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_gumbel(self, use_gumbel): | ||||
|     self.use_gumbel = use_gumbel | ||||
|    | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal]  | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         s0, s1 = s1, cell(s0, s1) | ||||
|       else: | ||||
|         if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True) | ||||
|         else               : weights = F.softmax(self.alphas_normal, dim=-1) | ||||
|         s0, s1 = s1, cell(s0, s1, weights) | ||||
|       #print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size())) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       #gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=None       , reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,161 +0,0 @@ | ||||
| # share parameters | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights): | ||||
|     return sum(w * op(x) for w, op in zip(weights, self._ops)) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       for j, h in enumerate(states): | ||||
|         x = self._ops[offset+j](h, weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkV1(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkV1, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     #nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     return -1 | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return -1 | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal]  | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         weights = F.softmax(self.alphas_normal, dim=-1) | ||||
|       else: | ||||
|         weights = F.softmax(self.alphas_normal, dim=-1) | ||||
|       s0, s1 = s1, cell(s0, s1, weights) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=gene_reduce, reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,171 +0,0 @@ | ||||
| # random selection | ||||
| import torch | ||||
| import random | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
| from .construct_utils import random_select, all_select | ||||
|  | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights, cpu_weights): | ||||
|     return sum(w * op(x) for w, op in zip(weights, self._ops)) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     cpu_weights = weights.tolist() | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       if i == 0: | ||||
|         indicator = all_select( len(states) ) | ||||
|       else: | ||||
|         indicator = random_select( len(states), 0.5 ) | ||||
|       for j, h in enumerate(states): | ||||
|         if indicator[j] == 0: continue | ||||
|         x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) / sum(indicator) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkV3(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkV3, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|     self.tau        = 5 | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal, self.alphas_reduce] | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         weights = F.softmax(self.alphas_reduce, dim=-1) | ||||
|       else: | ||||
|         weights = F.softmax(self.alphas_reduce, dim=-1) | ||||
|       s0, s1 = s1, cell(s0, s1, weights) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=gene_reduce, reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,176 +0,0 @@ | ||||
| # random selection | ||||
| import torch | ||||
| import random | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
| from .construct_utils import random_select, all_select | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights, cpu_weights): | ||||
|     indicators = random_select( len(cpu_weights), 0.5 ) | ||||
|     clist, ws = [], [] | ||||
|     for w, indicator, op in zip(weights, indicators, self._ops): | ||||
|       if indicator: | ||||
|         clist.append( w * op(x) ) | ||||
|         ws.append( w ) | ||||
|     return sum(clist) / sum(ws) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     cpu_weights = weights.tolist() | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       if i == 0: | ||||
|         indicator = all_select( len(states) ) | ||||
|       else: | ||||
|         indicator = random_select( len(states), 0.5 ) | ||||
|       for j, h in enumerate(states): | ||||
|         if indicator[j] == 0: continue | ||||
|         x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) / sum(indicator) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkV4(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkV4, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|     self.tau        = 5 | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal, self.alphas_reduce] | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         weights = F.softmax(self.alphas_reduce, dim=-1) | ||||
|       else: | ||||
|         weights = F.softmax(self.alphas_reduce, dim=-1) | ||||
|       s0, s1 = s1, cell(s0, s1, weights) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=gene_reduce, reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -1,174 +0,0 @@ | ||||
| # gumbel softmax | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| from torch.nn.parameter import Parameter | ||||
| from .operations import OPS, FactorizedReduce, ReLUConvBN | ||||
| from .genotypes import PRIMITIVES, Genotype | ||||
| from .construct_utils import random_select, all_select | ||||
|  | ||||
|  | ||||
| class MixedOp(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, stride): | ||||
|     super(MixedOp, self).__init__() | ||||
|     self._ops = nn.ModuleList() | ||||
|     for primitive in PRIMITIVES: | ||||
|       op = OPS[primitive](C, stride, False) | ||||
|       self._ops.append(op) | ||||
|  | ||||
|   def forward(self, x, weights, cpu_weights): | ||||
|     clist = [] | ||||
|     for j, cpu_weight in enumerate(cpu_weights): | ||||
|       if abs(cpu_weight) > 1e-10: | ||||
|         clist.append( weights[j] * self._ops[j](x) ) | ||||
|     assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights) | ||||
|     if len(clist) == 1: return clist[0] | ||||
|     else              : return sum(clist) | ||||
|  | ||||
|  | ||||
| class Cell(nn.Module): | ||||
|  | ||||
|   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): | ||||
|     super(Cell, self).__init__() | ||||
|     self.reduction = reduction | ||||
|  | ||||
|     if reduction_prev: | ||||
|       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) | ||||
|     else: | ||||
|       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) | ||||
|     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) | ||||
|     self._steps = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     self._ops = nn.ModuleList() | ||||
|     for i in range(self._steps): | ||||
|       for j in range(2+i): | ||||
|         stride = 2 if reduction and j < 2 else 1 | ||||
|         op = MixedOp(C, stride) | ||||
|         self._ops.append(op) | ||||
|  | ||||
|   def forward(self, s0, s1, weights): | ||||
|     s0 = self.preprocess0(s0) | ||||
|     s1 = self.preprocess1(s1) | ||||
|  | ||||
|     cpu_weights = weights.tolist() | ||||
|     states = [s0, s1] | ||||
|     offset = 0 | ||||
|     for i in range(self._steps): | ||||
|       clist = [] | ||||
|       if i == 0: indicator = all_select( len(states) ) | ||||
|       else     : indicator = random_select( len(states), 0.6 ) | ||||
|  | ||||
|       for j, h in enumerate(states): | ||||
|         if indicator[j] == 0: continue | ||||
|         x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j]) | ||||
|         clist.append( x ) | ||||
|       s = sum(clist) | ||||
|       offset += len(states) | ||||
|       states.append(s) | ||||
|  | ||||
|     return torch.cat(states[-self._multiplier:], dim=1) | ||||
|  | ||||
|  | ||||
| class NetworkV5(nn.Module): | ||||
|  | ||||
|   def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3): | ||||
|     super(NetworkV5, self).__init__() | ||||
|     self._C = C | ||||
|     self._num_classes = num_classes | ||||
|     self._layers = layers | ||||
|     self._steps  = steps | ||||
|     self._multiplier = multiplier | ||||
|  | ||||
|     C_curr = stem_multiplier*C | ||||
|     self.stem = nn.Sequential( | ||||
|       nn.Conv2d(3, C_curr, 3, padding=1, bias=False), | ||||
|       nn.BatchNorm2d(C_curr) | ||||
|     ) | ||||
|   | ||||
|     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C | ||||
|     reduction_prev, cells = False, [] | ||||
|     for i in range(layers): | ||||
|       if i in [layers//3, 2*layers//3]: | ||||
|         C_curr *= 2 | ||||
|         reduction = True | ||||
|       else: | ||||
|         reduction = False | ||||
|       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) | ||||
|       reduction_prev = reduction | ||||
|       cells.append( cell ) | ||||
|       C_prev_prev, C_prev = C_prev, multiplier*C_curr | ||||
|     self.cells = nn.ModuleList(cells) | ||||
|  | ||||
|     self.global_pooling = nn.AdaptiveAvgPool2d(1) | ||||
|     self.classifier = nn.Linear(C_prev, num_classes) | ||||
|     self.tau        = 5 | ||||
|  | ||||
|     # initialize architecture parameters | ||||
|     k = sum(1 for i in range(self._steps) for n in range(2+i)) | ||||
|     num_ops = len(PRIMITIVES) | ||||
|  | ||||
|     self.alphas_normal = Parameter(torch.Tensor(k, num_ops)) | ||||
|     self.alphas_reduce = Parameter(torch.Tensor(k, num_ops)) | ||||
|     nn.init.normal_(self.alphas_normal, 0, 0.001) | ||||
|     nn.init.normal_(self.alphas_reduce, 0, 0.001) | ||||
|  | ||||
|   def set_tau(self, tau): | ||||
|     self.tau = tau | ||||
|  | ||||
|   def get_tau(self): | ||||
|     return self.tau | ||||
|  | ||||
|   def arch_parameters(self): | ||||
|     return [self.alphas_normal, self.alphas_reduce] | ||||
|  | ||||
|   def base_parameters(self): | ||||
|     lists = list(self.stem.parameters()) + list(self.cells.parameters()) | ||||
|     lists += list(self.global_pooling.parameters()) | ||||
|     lists += list(self.classifier.parameters()) | ||||
|     return lists | ||||
|  | ||||
|   def forward(self, inputs): | ||||
|     batch, C, H, W = inputs.size() | ||||
|     s0 = s1 = self.stem(inputs) | ||||
|     for i, cell in enumerate(self.cells): | ||||
|       if cell.reduction: | ||||
|         weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True) | ||||
|       else: | ||||
|         weights = F.gumbel_softmax(self.alphas_normal, self.tau, True) | ||||
|       s0, s1 = s1, cell(s0, s1, weights) | ||||
|     out = self.global_pooling(s1) | ||||
|     out = out.view(batch, -1) | ||||
|     logits = self.classifier(out) | ||||
|     return logits | ||||
|  | ||||
|   def genotype(self): | ||||
|  | ||||
|     def _parse(weights): | ||||
|       gene, n, start = [], 2, 0 | ||||
|       for i in range(self._steps): | ||||
|         end = start + n | ||||
|         W = weights[start:end].copy() | ||||
|         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] | ||||
|         for j in edges: | ||||
|           k_best = None | ||||
|           for k in range(len(W[j])): | ||||
|             if k != PRIMITIVES.index('none'): | ||||
|               if k_best is None or W[j][k] > W[j][k_best]: | ||||
|                 k_best = k | ||||
|           gene.append((PRIMITIVES[k_best], j, float(W[j][k_best]))) | ||||
|         start = end | ||||
|         n += 1 | ||||
|       return gene | ||||
|  | ||||
|     with torch.no_grad(): | ||||
|       gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy()) | ||||
|       gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy()) | ||||
|  | ||||
|       concat = range(2+self._steps-self._multiplier, self._steps+2) | ||||
|       genotype = Genotype( | ||||
|         normal=gene_normal, normal_concat=concat, | ||||
|         reduce=gene_reduce, reduce_concat=concat | ||||
|       ) | ||||
|     return genotype | ||||
| @@ -2,6 +2,7 @@ import torch | ||||
| import torch.nn as nn | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| def count_parameters_in_MB(model): | ||||
|   if isinstance(model, nn.Module): | ||||
|     return np.sum(np.prod(v.size()) for v in model.parameters())/1e6 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user