update scripts-cluster

This commit is contained in:
Xuanyi Dong 2019-03-31 22:49:43 +08:00
parent 280c9f3099
commit 4bac459bf9
20 changed files with 118 additions and 1248 deletions

1
.gitignore vendored
View File

@ -102,3 +102,4 @@ main_main.py
# Device # Device
scripts-nas/.nfs00* scripts-nas/.nfs00*
*/.nfs00* */.nfs00*
*.DS_Store

View File

@ -1,17 +1,16 @@
# GDAS # Searching for A Robust Neural Architecture in Four GPU Hours
By Xuanyi Dong and Yi Yang
University of Technology Sydney We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).
Requirements ## Requirements
- PyTorch 1.0 - PyTorch 1.0.1
- Python 3.6 - Python 3.6
- opencv - opencv
``` ```
conda install pytorch torchvision cuda100 -c pytorch conda install pytorch torchvision cuda100 -c pytorch
``` ```
## Algorithm ## Usages
Train the searched CNN on CIFAR Train the searched CNN on CIFAR
``` ```
@ -26,6 +25,11 @@ CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14 CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14
``` ```
Evaluate a trained CNN model
```
CUDA_VISIBLE_DEVICES=0 python ./exps-cnn/evaluate.py --data_path $TORCH_HOME/cifar.python --checkpoint ${checkpoint-path}
CUDA_VISIBLE_DEVICES=0 python ./exps-cnn/evaluate.py --data_path $TORCH_HOME/ILSVRC2012 --checkpoint ${checkpoint-path}
```
Train the searched RNN Train the searched RNN
``` ```
@ -36,3 +40,13 @@ CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh DARTS_V1
CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh DARTS_V2 CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh DARTS_V2
CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh GDAS CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh GDAS
``` ```
## Citation
```
@inproceedings{dong2019search,
title={Searching for A Robust Neural Architecture in Four GPU Hours},
author={Dong, Xuanyi and Yang, Yi},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2019}
}
```

49
exps-cnn/evaluate.py Normal file
View File

@ -0,0 +1,49 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from nas import model_types as models
from train_utils import main_procedure
from train_utils_imagenet import main_procedure_imagenet
from scheduler import load_config
parser = argparse.ArgumentParser("Evaluate-CNN")
parser.add_argument('--data_path', type=str, help='Path to dataset.')
parser.add_argument('--checkpoint', type=str, help='Choose between Cifar10/100 and ImageNet.')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
def main():
assert os.path.isdir( args.data_path ), 'invalid data-path : {:}'.format(args.data_path)
assert os.path.isfile( args.checkpoint ), 'invalid checkpoint : {:}'.format(args.checkpoint)
checkpoint = torch.load( args.checkpoint )
xargs = checkpoint['args']
config = load_config(xargs.model_config)
genotype = models[xargs.arch]
# clear GPU cache
torch.cuda.empty_cache()
if xargs.dataset == 'imagenet':
main_procedure_imagenet(config, args.data_path, xargs, genotype, xargs.init_channels, xargs.layers, checkpoint['state_dict'], None)
else:
main_procedure(config, xargs.dataset, args.data_path, xargs, genotype, xargs.init_channels, xargs.layers, checkpoint['state_dict'], None)
if __name__ == '__main__':
main()

View File

@ -19,7 +19,7 @@ from train_utils_imagenet import main_procedure_imagenet
from scheduler import load_config from scheduler import load_config
parser = argparse.ArgumentParser("cifar") parser = argparse.ArgumentParser("Train-CNN")
parser.add_argument('--data_path', type=str, help='Path to dataset') parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--dataset', type=str, choices=['imagenet', 'cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.') parser.add_argument('--dataset', type=str, choices=['imagenet', 'cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
parser.add_argument('--arch', type=str, choices=models.keys(), help='the searched model.') parser.add_argument('--arch', type=str, choices=models.keys(), help='the searched model.')
@ -38,6 +38,7 @@ args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available' assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None: if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000) args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed) random.seed(args.manualSeed)
@ -72,9 +73,9 @@ def main():
# clear GPU cache # clear GPU cache
torch.cuda.empty_cache() torch.cuda.empty_cache()
if args.dataset == 'imagenet': if args.dataset == 'imagenet':
main_procedure_imagenet(config, args.data_path, args, genotype, args.init_channels, args.layers, log) main_procedure_imagenet(config, args.data_path, args, genotype, args.init_channels, args.layers, None, log)
else: else:
main_procedure(config, args.dataset, args.data_path, args, genotype, args.init_channels, args.layers, log) main_procedure(config, args.dataset, args.data_path, args, genotype, args.init_channels, args.layers, None, log)
log.close() log.close()

View File

@ -2,7 +2,7 @@ import os, sys, time
from copy import deepcopy from copy import deepcopy
import torch import torch
import torchvision.transforms as transforms import torchvision.transforms as transforms
from shutil import copyfile
from utils import print_log, obtain_accuracy, AverageMeter from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time from utils import time_string, convert_secs2time
@ -11,6 +11,7 @@ from utils import Cutout
from nas import NetworkCIFAR as Network from nas import NetworkCIFAR as Network
from datasets import get_datasets from datasets import get_datasets
def obtain_best(accuracies): def obtain_best(accuracies):
if len(accuracies) == 0: return (0, 0) if len(accuracies) == 0: return (0, 0)
tops = [value for key, value in accuracies.items()] tops = [value for key, value in accuracies.items()]
@ -18,7 +19,7 @@ def obtain_best(accuracies):
return s2b[-1] return s2b[-1]
def main_procedure(config, dataset, data_path, args, genotype, init_channels, layers, log): def main_procedure(config, dataset, data_path, args, genotype, init_channels, layers, pure_evaluate, log):
train_data, test_data, class_num = get_datasets(dataset, data_path, config.cutout) train_data, test_data, class_num = get_datasets(dataset, data_path, config.cutout)
@ -57,10 +58,17 @@ def main_procedure(config, dataset, data_path, args, genotype, init_channels, la
else: else:
raise ValueError('Can not find the schedular type : {:}'.format(config.type)) raise ValueError('Can not find the schedular type : {:}'.format(config.type))
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
if os.path.isfile(checkpoint_path):
checkpoint = torch.load( checkpoint_path )
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
checkpoint_best = os.path.join(args.save_path, 'checkpoint-{:}-best.pth'.format(dataset))
if pure_evaluate:
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
basemodel.load_state_dict( pure_evaluate )
with torch.no_grad():
valid_acc1, valid_acc5, valid_los = _train(test_loader, model, criterion, optimizer, 'test', -1, config, args.print_freq, log)
return (valid_acc1, valid_acc5)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load( checkpoint_path )
start_epoch = checkpoint['epoch'] start_epoch = checkpoint['epoch']
basemodel.load_state_dict(checkpoint['state_dict']) basemodel.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint['optimizer'])
@ -96,12 +104,14 @@ def main_procedure(config, dataset, data_path, args, genotype, init_channels, la
'accuracies': accuracies}, 'accuracies': accuracies},
checkpoint_path) checkpoint_path)
best_acc = obtain_best( accuracies ) best_acc = obtain_best( accuracies )
if accuracies[epoch] == best_acc: copyfile(checkpoint_path, checkpoint_best)
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log) print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
print_log('----> Save into {:}'.format(checkpoint_path), log) print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time # measure elapsed time
epoch_time.update(time.time() - start_time) epoch_time.update(time.time() - start_time)
start_time = time.time() start_time = time.time()
return obtain_best( accuracies )
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log): def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):

View File

@ -3,7 +3,7 @@ from copy import deepcopy
import torch import torch
import torch.nn as nn import torch.nn as nn
import torchvision.transforms as transforms import torchvision.transforms as transforms
from shutil import copyfile
from utils import print_log, obtain_accuracy, AverageMeter from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time from utils import time_string, convert_secs2time
@ -37,7 +37,7 @@ class CrossEntropyLabelSmooth(nn.Module):
return loss return loss
def main_procedure_imagenet(config, data_path, args, genotype, init_channels, layers, log): def main_procedure_imagenet(config, data_path, args, genotype, init_channels, layers, pure_evaluate, log):
# training data and testing data # training data and testing data
train_data, valid_data, class_num = get_datasets('imagenet-1k', data_path, -1) train_data, valid_data, class_num = get_datasets('imagenet-1k', data_path, -1)
@ -48,8 +48,6 @@ def main_procedure_imagenet(config, data_path, args, genotype, init_channels, la
valid_queue = torch.utils.data.DataLoader( valid_queue = torch.utils.data.DataLoader(
valid_data, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) valid_data, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)
class_num = 1000
print_log('-------------------------------------- main-procedure', log) print_log('-------------------------------------- main-procedure', log)
print_log('config : {:}'.format(config), log) print_log('config : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log) print_log('genotype : {:}'.format(genotype), log)
@ -84,9 +82,16 @@ def main_procedure_imagenet(config, data_path, args, genotype, init_channels, la
checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth') checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth')
if os.path.isfile(checkpoint_path): checkpoint_best = os.path.join(args.save_path, 'checkpoint-imagenet-best.pth')
checkpoint = torch.load( checkpoint_path )
if pure_evaluate:
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
basemodel.load_state_dict( pure_evaluate )
with torch.no_grad():
valid_acc1, valid_acc5, valid_los = _train(valid_queue, model, criterion, None, 'test' , -1, config, args.print_freq, log)
return (valid_acc1, valid_acc5)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load( checkpoint_path )
start_epoch = checkpoint['epoch'] start_epoch = checkpoint['epoch']
basemodel.load_state_dict(checkpoint['state_dict']) basemodel.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint['optimizer'])
@ -122,12 +127,14 @@ def main_procedure_imagenet(config, data_path, args, genotype, init_channels, la
'accuracies': accuracies}, 'accuracies': accuracies},
checkpoint_path) checkpoint_path)
best_acc = obtain_best( accuracies ) best_acc = obtain_best( accuracies )
if accuracies[epoch] == best_acc: copyfile(checkpoint_path, checkpoint_best)
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log) print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
print_log('----> Save into {:}'.format(checkpoint_path), log) print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time # measure elapsed time
epoch_time.update(time.time() - start_time) epoch_time.update(time.time() - start_time)
start_time = time.time() start_time = time.time()
return obtain_best( accuracies )
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log): def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):

View File

@ -7,6 +7,7 @@ import torchvision.transforms as transforms
from utils import Cutout from utils import Cutout
from .TieredImageNet import TieredImageNet from .TieredImageNet import TieredImageNet
Dataset2Class = {'cifar10' : 10, Dataset2Class = {'cifar10' : 10,
'cifar100': 100, 'cifar100': 100,
'tiered' : -1, 'tiered' : -1,
@ -60,10 +61,10 @@ def get_datasets(name, root, cutout):
if name == 'cifar10': if name == 'cifar10':
train_data = dset.CIFAR10(root, train=True , transform=train_transform, download=True) train_data = dset.CIFAR10(root, train=True , transform=train_transform, download=True)
test_data = dset.CIFAR10(root, train=True, transform=test_transform , download=True) test_data = dset.CIFAR10(root, train=False, transform=test_transform , download=True)
elif name == 'cifar100': elif name == 'cifar100':
train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True) train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
test_data = dset.CIFAR100(root, train=True, transform=test_transform , download=True) test_data = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
elif name == 'imagenet-1k' or name == 'imagenet-100': elif name == 'imagenet-1k' or name == 'imagenet-100':
train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform) train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
test_data = dset.ImageFolder(osp.join(root, 'val'), train_transform) test_data = dset.ImageFolder(osp.join(root, 'val'), train_transform)

View File

@ -1,12 +1,5 @@
from .model_search import Network from .model_search import Network
from .model_search_v1 import NetworkV1
from .model_search_f1 import NetworkF1
# acceleration model # acceleration model
from .model_search_f1_acc2 import NetworkFACC1
from .model_search_acc2 import NetworkACC2
from .model_search_v3 import NetworkV3
from .model_search_v4 import NetworkV4
from .model_search_v5 import NetworkV5
from .CifarNet import NetworkCIFAR from .CifarNet import NetworkCIFAR
from .ImageNet import NetworkImageNet from .ImageNet import NetworkImageNet

View File

@ -128,7 +128,7 @@ class Transition(nn.Module):
self.ops2 = nn.ModuleList( self.ops2 = nn.ModuleList(
[nn.Sequential( [nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1), nn.MaxPool2d(3, stride=2, padding=1),
nn.BatchNorm2d(C, affine=True)), nn.BatchNorm2d(C, affine=True)),
nn.Sequential( nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1), nn.MaxPool2d(3, stride=2, padding=1),
@ -144,7 +144,8 @@ class Transition(nn.Module):
if self.training and drop_prob > 0.: if self.training and drop_prob > 0.:
X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob) X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
X2 = self.ops2[0] (X0+X1) #X2 = self.ops2[0] (X0+X1)
X2 = self.ops2[0] (s0)
X3 = self.ops2[1] (s1) X3 = self.ops2[1] (s1)
if self.training and drop_prob > 0.: if self.training and drop_prob > 0.:
X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob) X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)

View File

@ -1,180 +0,0 @@
# gumbel softmax
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
if use_sum > 3:
return sum(w * op(x) for w, op in zip(weights, self._ops))
else:
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkACC2(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkACC2, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
self.use_gumbel = True
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_gumbel(self, use_gumbel):
self.use_gumbel = use_gumbel
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
else : weights = F.softmax(self.alphas_reduce, dim=-1)
else:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
else : weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -1,167 +0,0 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .construct_utils import Transition
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkF1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkF1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
else:
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
return -1
def get_tau(self):
return -1
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
s0, s1 = s1, cell(s0, s1)
else:
weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=None , reduce_concat=concat
)
return genotype

View File

@ -1,183 +0,0 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .construct_utils import Transition
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
if use_sum > 3:
return sum(w * op(x) for w, op in zip(weights, self._ops))
else:
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkFACC1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkFACC1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
self.tau = 5
self.use_gumbel = True
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
else:
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_gumbel(self, use_gumbel):
self.use_gumbel = use_gumbel
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
s0, s1 = s1, cell(s0, s1)
else:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
else : weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=None , reduce_concat=concat
)
return genotype

View File

@ -1,161 +0,0 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
return -1
def get_tau(self):
return -1
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_normal, dim=-1)
else:
weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -1,171 +0,0 @@
# random selection
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0:
indicator = all_select( len(states) )
else:
indicator = random_select( len(states), 0.5 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist) / sum(indicator)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV3(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV3, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_reduce, dim=-1)
else:
weights = F.softmax(self.alphas_reduce, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -1,176 +0,0 @@
# random selection
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
indicators = random_select( len(cpu_weights), 0.5 )
clist, ws = [], []
for w, indicator, op in zip(weights, indicators, self._ops):
if indicator:
clist.append( w * op(x) )
ws.append( w )
return sum(clist) / sum(ws)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0:
indicator = all_select( len(states) )
else:
indicator = random_select( len(states), 0.5 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist) / sum(indicator)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV4(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV4, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_reduce, dim=-1)
else:
weights = F.softmax(self.alphas_reduce, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -1,174 +0,0 @@
# gumbel softmax
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
if len(clist) == 1: return clist[0]
else : return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0: indicator = all_select( len(states) )
else : indicator = random_select( len(states), 0.6 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV5(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV5, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
else:
weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -2,6 +2,7 @@ import torch
import torch.nn as nn import torch.nn as nn
import numpy as np import numpy as np
def count_parameters_in_MB(model): def count_parameters_in_MB(model):
if isinstance(model, nn.Module): if isinstance(model, nn.Module):
return np.sum(np.prod(v.size()) for v in model.parameters())/1e6 return np.sum(np.prod(v.size()) for v in model.parameters())/1e6

View File

@ -9,4 +9,5 @@ bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 PTB-GDAS 1 "bash ./scripts-
## CNN ## CNN
``` ```
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 CIFAR10-CUT-GDAS-F1 1 "bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10 cut" bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 CIFAR10-CUT-GDAS-F1 1 "bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10 cut"
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 IMAGENET-GDAS-F1 1 "bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14"
``` ```

View File

@ -6,9 +6,11 @@ sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
`pwd`/hadoop-data \ `pwd`/hadoop-data \
afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
tar xvf ./hadoop-data/cifar.python.tar -C ./data/data/ export TORCH_HOME="./data/data/"
tar xvf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
#tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
cifar_dir="./data/data/cifar.python" cifar_dir="${TORCH_HOME}/cifar.python"
if [ -d ${cifar_dir} ]; then if [ -d ${cifar_dir} ]; then
echo "Find cifar-dir: "${cifar_dir} echo "Find cifar-dir: "${cifar_dir}
else else
@ -16,7 +18,6 @@ else
exit 1 exit 1
fi fi
echo "CHECK-DATA-DIR DONE" echo "CHECK-DATA-DIR DONE"
export TORCH_HOME="./data/data/"
# config python # config python

View File

@ -24,6 +24,8 @@ if [ ! -f ${PY_C} ]; then
PY_C="python" PY_C="python"
else else
echo "Cluster Run with Python: "${PY_C} echo "Cluster Run with Python: "${PY_C}
echo "Unzip ILSVRC2012"
tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
fi fi
${PY_C} --version ${PY_C} --version