update README

This commit is contained in:
D-X-Y 2019-09-28 20:18:18 +10:00
parent 180702ab8e
commit f8f3f382e0
18 changed files with 9 additions and 779 deletions

View File

@ -50,7 +50,7 @@ Highlight: we equip one-shot NAS with an architecture sampler and train network
<img src="https://d-x-y.github.com/resources/paper-icon/ICCV-2019-SETN.png" width="450">
### Usage
Train the searched SETN-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
Please use the following scripts to train the searched SETN-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
```
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10 SETN 96 -1
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 SETN 96 -1
@ -64,12 +64,13 @@ Searching codes come soon!
We proposed a gradient-based searching algorithm using differentiable architecture sampling (improving DARTS with Gumbel-softmax sampling).
<img src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="350">
<img src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="300">
The old version is located at [`others/GDAS`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS) and a paddlepaddle implementation is locate at [`others/paddlepaddle`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/paddlepaddle).
### Usage
Train the searched GDAS-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
Please use the following scripts to train the searched GDAS-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
```
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10 GDAS_V1 96 -1
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 GDAS_V1 96 -1

View File

@ -1,10 +0,0 @@
{
"dataset" : ["str", "cifar"],
"arch" : ["str", "densenet"],
"depth" : ["int", 100],
"growthRate": ["int", 12],
"reduction" : ["float", 0.5],
"bottleneck": ["bool", 1],
"super_type": ["str" , "basic"],
"zero_init_residual" : ["bool", "0"]
}

View File

@ -1,9 +0,0 @@
{
"dataset" : ["str", "imagenet"],
"arch" : ["str", "MobileNetV2"],
"width_mult" : ["float", 1.0],
"dropout" : ["float", 0.0],
"input_channel" : ["int", 32],
"last_channel" : ["int", 1280],
"block_name" : ["str", "InvertedResidual"]
}

View File

@ -1,9 +0,0 @@
{
"dataset" : ["str", "imagenet"],
"arch" : ["str", "MobileNetV2"],
"width_mult" : ["float", 1.0],
"dropout" : ["float", 0.2],
"input_channel" : ["int", 32],
"last_channel" : ["int", 1280],
"block_name" : ["str", "InvertedResidual"]
}

View File

@ -1,6 +0,0 @@
{
"dataset" : ["str", "imagenet"],
"arch" : ["str", "ShuffleNetV2"],
"width_multi" : ["float", 2.0],
"stages" : ["int", [24, 244, 488, 976, 2048]]
}

View File

@ -1,6 +0,0 @@
{
"dataset" : ["str", "imagenet"],
"arch" : ["str", "ShuffleNetV2"],
"width_multi" : ["float", 1.5],
"stages" : ["int", [24, 176, 352, 704, 1024]]
}

View File

@ -1,6 +0,0 @@
{
"dataset" : ["str", "imagenet"],
"arch" : ["str", "ShuffleNetV2"],
"width_multi" : ["float", 1.0],
"stages" : ["int", [24, 116, 232, 464, 1024]]
}

View File

@ -1,14 +0,0 @@
{
"scheduler": ["str", "cos"],
"eta_min" : ["float", "0.0"],
"epochs" : ["int", "150"],
"warmup" : ["int", "0"],
"gamma" : ["float", "0.98"],
"optim" : ["str", "SGD"],
"LR" : ["float", "0.05"],
"decay" : ["float", "0.00004"],
"momentum" : ["float", "0.9"],
"nesterov" : ["bool", "0"],
"criterion": ["str", "Softmax"],
"auxiliary": ["float", "-1"]
}

View File

@ -1,15 +0,0 @@
{
"scheduler": ["str", "cos"],
"eta_min" : ["float", "0.0"],
"epochs" : ["int", "150"],
"warmup" : ["int", "0"],
"gamma" : ["float", "0.98"],
"optim" : ["str", "SGD"],
"LR" : ["float", "0.05"],
"decay" : ["float", "0.00004"],
"momentum" : ["float", "0.9"],
"nesterov" : ["bool", "0"],
"criterion": ["str", "SmoothSoftmax"],
"label_smooth": ["float", 0.1],
"auxiliary": ["float", "-1"]
}

View File

@ -1,14 +0,0 @@
{
"scheduler": ["str", "linear"],
"LR_min" : ["float", "0.0"],
"epochs" : ["int", "240"],
"warmup" : ["int", "0"],
"optim" : ["str", "SGD"],
"LR" : ["float", "0.5"],
"decay" : ["float", "0.00004"],
"momentum" : ["float", "0.9"],
"nesterov" : ["bool", "1"],
"criterion": ["str", "SmoothSoftmax"],
"label_smooth": ["float", 0.1],
"auxiliary": ["float", "-1"]
}

View File

@ -1,14 +0,0 @@
{
"scheduler": ["str", "exponential"],
"epochs" : ["int", "480"],
"warmup" : ["int", "0"],
"gamma" : ["float", "0.98"],
"optim" : ["str", "SGD"],
"LR" : ["float", "0.045"],
"decay" : ["float", "0.00004"],
"momentum" : ["float", "0.9"],
"nesterov" : ["bool", "0"],
"criterion": ["str", "SmoothSoftmax"],
"label_smooth": ["float", 0.1],
"auxiliary": ["float", "-1"]
}

View File

@ -1,105 +0,0 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
import torch.nn as nn
import torch.nn.functional as F
from .initialization import initialize_resnet
class Bottleneck(nn.Module):
def __init__(self, nChannels, growthRate):
super(Bottleneck, self).__init__()
interChannels = 4*growthRate
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(interChannels)
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat((x, out), 1)
return out
class SingleLayer(nn.Module):
def __init__(self, nChannels, growthRate):
super(SingleLayer, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = torch.cat((x, out), 1)
return out
class Transition(nn.Module):
def __init__(self, nChannels, nOutChannels):
super(Transition, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = F.avg_pool2d(out, 2)
return out
class DenseNet(nn.Module):
def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
super(DenseNet, self).__init__()
if bottleneck: nDenseBlocks = int( (depth-4) / 6 )
else : nDenseBlocks = int( (depth-4) / 3 )
self.message = 'CifarDenseNet : block : {:}, depth : {:}, reduction : {:}, growth-rate = {:}, class = {:}'.format('bottleneck' if bottleneck else 'basic', depth, reduction, growthRate, nClasses)
nChannels = 2*growthRate
self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
nOutChannels = int(math.floor(nChannels*reduction))
self.trans1 = Transition(nChannels, nOutChannels)
nChannels = nOutChannels
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
nOutChannels = int(math.floor(nChannels*reduction))
self.trans2 = Transition(nChannels, nOutChannels)
nChannels = nOutChannels
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
self.act = nn.Sequential(
nn.BatchNorm2d(nChannels), nn.ReLU(inplace=True),
nn.AvgPool2d(8))
self.fc = nn.Linear(nChannels, nClasses)
self.apply(initialize_resnet)
def get_message(self):
return self.message
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
layers = []
for i in range(int(nDenseBlocks)):
if bottleneck:
layers.append(Bottleneck(nChannels, growthRate))
else:
layers.append(SingleLayer(nChannels, growthRate))
nChannels += growthRate
return nn.Sequential(*layers)
def forward(self, inputs):
out = self.conv1( inputs )
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.dense3(out)
features = self.act(out)
features = features.view(features.size(0), -1)
out = self.fc(features)
return features, out

View File

@ -1,172 +0,0 @@
# Deep Residual Learning for Image Recognition, CVPR 2016
import torch.nn as nn
from .initialization import initialize_resnet
def conv3x3(in_planes, out_planes, stride=1, groups=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
def conv1x1(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
super(BasicBlock, self).__init__()
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
super(Bottleneck, self).__init__()
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = conv3x3(width, width, stride, groups)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block_name, layers, deep_stem, num_classes, zero_init_residual, groups, width_per_group):
super(ResNet, self).__init__()
#planes = [int(width_per_group * groups * 2 ** i) for i in range(4)]
if block_name == 'BasicBlock' : block= BasicBlock
elif block_name == 'Bottleneck': block= Bottleneck
else : raise ValueError('invalid block-name : {:}'.format(block_name))
if not deep_stem:
self.conv = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
else:
self.conv = nn.Sequential(
nn.Conv2d( 3, 32, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
self.inplanes = 64
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64 , layers[0], stride=1, groups=groups, base_width=width_per_group)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, groups=groups, base_width=width_per_group)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, groups=groups, base_width=width_per_group)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, groups=groups, base_width=width_per_group)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
self.message = 'block = {:}, layers = {:}, deep_stem = {:}, num_classes = {:}'.format(block, layers, deep_stem, num_classes)
self.apply( initialize_resnet )
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride, groups, base_width):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if stride == 2:
downsample = nn.Sequential(
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
conv1x1(self.inplanes, planes * block.expansion, 1),
nn.BatchNorm2d(planes * block.expansion),
)
elif stride == 1:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
else: raise ValueError('invalid stride [{:}] for downsample'.format(stride))
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, groups, base_width))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, 1, None, groups, base_width))
return nn.Sequential(*layers)
def get_message(self):
return self.message
def forward(self, x):
x = self.conv(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.fc(features)
return features, logits

View File

@ -1,101 +0,0 @@
# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
from torch import nn
from .initialization import initialize_resnet
class ConvBNReLU(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
self.bn = nn.BatchNorm2d(out_planes)
self.relu = nn.ReLU6(inplace=True)
def forward(self, x):
out = self.conv( x )
out = self.bn ( out )
out = self.relu( out )
return out
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes, width_mult, input_channel, last_channel, block_name, dropout):
super(MobileNetV2, self).__init__()
if block_name == 'InvertedResidual':
block = InvertedResidual
else:
raise ValueError('invalid block name : {:}'.format(block_name))
inverted_residual_setting = [
# t, c, n, s
[1, 16 , 1, 1],
[6, 24 , 2, 2],
[6, 32 , 3, 2],
[6, 64 , 4, 2],
[6, 96 , 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# building first layer
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * max(1.0, width_mult))
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(self.last_channel, num_classes),
)
self.message = 'MobileNetV2 : width_mult={:}, in-C={:}, last-C={:}, block={:}, dropout={:}'.format(width_mult, input_channel, last_channel, block_name, dropout)
# weight initialization
self.apply( initialize_resnet )
def get_message(self):
return self.message
def forward(self, inputs):
features = self.features(inputs)
vectors = features.mean([2, 3])
predicts = self.classifier(vectors)
return features, predicts

View File

@ -1,133 +0,0 @@
import functools
import torch
import torch.nn as nn
__all__ = ['ShuffleNetV2']
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride):
super(InvertedResidual, self).__init__()
if not (1 <= stride <= 3):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = oup // 2
assert (self.stride != 1) or (inp == branch_features << 1)
pw_conv11 = functools.partial(nn.Conv2d, kernel_size=1, stride=1, padding=0, bias=False)
dw_conv33 = functools.partial(self.depthwise_conv, kernel_size=3, stride=self.stride, padding=1)
if self.stride > 1:
self.branch1 = nn.Sequential(
dw_conv33(inp, inp),
nn.BatchNorm2d(inp),
pw_conv11(inp, branch_features),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
self.branch2 = nn.Sequential(
pw_conv11(inp if (self.stride > 1) else branch_features, branch_features),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
dw_conv33(branch_features, branch_features),
nn.BatchNorm2d(branch_features),
pw_conv11(branch_features, branch_features),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
@staticmethod
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1)
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
class ShuffleNetV2(nn.Module):
def __init__(self, num_classes, stages):
super(ShuffleNetV2, self).__init__()
self.stage_out_channels = stages
assert len(stages) == 5, 'invalid stages : {:}'.format(stages)
self.message = 'stages: ' + ' '.join([str(x) for x in stages])
input_channels = 3
output_channels = self.stage_out_channels[0]
self.conv1 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
input_channels = output_channels
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
stage_names = ['stage{:}'.format(i) for i in [2, 3, 4]]
stage_repeats = [4, 8, 4]
for name, repeats, output_channels in zip(
stage_names, stage_repeats, self.stage_out_channels[1:]):
seq = [InvertedResidual(input_channels, output_channels, 2)]
for i in range(repeats - 1):
seq.append(InvertedResidual(output_channels, output_channels, 1))
setattr(self, name, nn.Sequential(*seq))
input_channels = output_channels
output_channels = self.stage_out_channels[-1]
self.conv5 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
self.fc = nn.Linear(output_channels, num_classes)
def get_message(self):
return self.message
def forward(self, inputs):
x = self.conv1( inputs )
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
features = x.mean([2, 3]) # globalpool
predicts = self.fc(features)
return features, predicts
#@staticmethod
#def _getStages(mult):
# stages = {
# '0.5': [24, 48, 96 , 192, 1024],
# '1.0': [24, 116, 232, 464, 1024],
# '1.5': [24, 176, 352, 704, 1024],
# '2.0': [24, 244, 488, 976, 2048],
# }
# return stages[str(mult)]

View File

@ -11,15 +11,12 @@ from .clone_weights import init_from_model
def get_cifar_models(config):
from .CifarResNet import CifarResNet
from .CifarDenseNet import DenseNet
from .CifarWideResNet import CifarWideResNet
super_type = getattr(config, 'super_type', 'basic')
if super_type == 'basic':
if config.arch == 'resnet':
return CifarResNet(config.module, config.depth, config.class_num, config.zero_init_residual)
elif config.arch == 'densenet':
return DenseNet(config.growthRate, config.depth, config.reduction, config.class_num, config.bottleneck)
elif config.arch == 'wideresnet':
return CifarWideResNet(config.depth, config.wide_factor, config.class_num, config.dropout)
else:
@ -44,10 +41,8 @@ def get_cifar_models(config):
def get_imagenet_models(config):
super_type = getattr(config, 'super_type', 'basic')
if super_type == 'basic':
return get_imagenet_models_basic(config)
# NAS searched architecture
elif super_type.startswith('infer'):
if super_type.startswith('infer'):
assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
infer_mode = super_type.split('-')[1]
if infer_mode == 'shape':
@ -65,20 +60,6 @@ def get_imagenet_models(config):
raise ValueError('invalid super-type : {:}'.format(super_type))
def get_imagenet_models_basic(config):
from .ImagenetResNet import ResNet
from .MobileNet import MobileNetV2
from .ShuffleNetV2 import ShuffleNetV2
if config.arch == 'resnet':
return ResNet(config.block_name, config.layers, config.deep_stem, config.class_num, config.zero_init_residual, config.groups, config.width_per_group)
elif config.arch == 'MobileNetV2':
return MobileNetV2(config.class_num, config.width_mult, config.input_channel, config.last_channel, config.block_name, config.dropout)
elif config.arch == 'ShuffleNetV2':
return ShuffleNetV2(config.class_num, config.stages)
else:
raise ValueError('invalid arch : {:}'.format( config.arch ))
def obtain_model(config):
if config.dataset == 'cifar':
return get_cifar_models(config)

View File

@ -1,139 +0,0 @@
# SphereFace: Deep Hypersphere Embedding for Face Recognition
#
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
def myphi(x,m):
x = x * m
return 1-x**2/math.factorial(2)+x**4/math.factorial(4)-x**6/math.factorial(6) + \
x**8/math.factorial(8) - x**9/math.factorial(9)
class AngleLinear(nn.Module):
def __init__(self, in_features, out_features, m = 4, phiflag=True):
super(AngleLinear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = nn.Parameter(torch.Tensor(in_features,out_features))
self.weight.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
self.phiflag = phiflag
self.m = m
self.mlambda = [
lambda x: x**0,
lambda x: x**1,
lambda x: 2*x**2-1,
lambda x: 4*x**3-3*x,
lambda x: 8*x**4-8*x**2+1,
lambda x: 16*x**5-20*x**3+5*x
]
def forward(self, input):
x = input # size=(B,F) F is feature len
w = self.weight # size=(F,Classnum) F=in_features Classnum=out_features
ww = w.renorm(2,1,1e-5).mul(1e5)
xlen = x.pow(2).sum(1).pow(0.5) # size=B
wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum
cos_theta = x.mm(ww) # size=(B,Classnum)
cos_theta = cos_theta / xlen.view(-1,1) / wlen.view(1,-1)
cos_theta = cos_theta.clamp(-1,1)
if self.phiflag:
cos_m_theta = self.mlambda[self.m](cos_theta)
with torch.no_grad():
theta = cos_theta.acos()
k = (self.m*theta/3.14159265).floor()
n_one = k*0.0 - 1
phi_theta = (n_one**k) * cos_m_theta - 2*k
else:
theta = cos_theta.acos()
phi_theta = myphi(theta,self.m)
phi_theta = phi_theta.clamp(-1*self.m,1)
cos_theta = cos_theta * xlen.view(-1,1)
phi_theta = phi_theta * xlen.view(-1,1)
output = (cos_theta,phi_theta)
return output # size=(B,Classnum,2)
class SphereFace20(nn.Module):
def __init__(self, classnum=10574):
super(SphereFace20, self).__init__()
self.classnum = classnum
#input = B*3*112*96
self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
self.relu1_1 = nn.PReLU(64)
self.conv1_2 = nn.Conv2d(64,64,3,1,1)
self.relu1_2 = nn.PReLU(64)
self.conv1_3 = nn.Conv2d(64,64,3,1,1)
self.relu1_3 = nn.PReLU(64)
self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
self.relu2_1 = nn.PReLU(128)
self.conv2_2 = nn.Conv2d(128,128,3,1,1)
self.relu2_2 = nn.PReLU(128)
self.conv2_3 = nn.Conv2d(128,128,3,1,1)
self.relu2_3 = nn.PReLU(128)
self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
self.relu2_4 = nn.PReLU(128)
self.conv2_5 = nn.Conv2d(128,128,3,1,1)
self.relu2_5 = nn.PReLU(128)
self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
self.relu3_1 = nn.PReLU(256)
self.conv3_2 = nn.Conv2d(256,256,3,1,1)
self.relu3_2 = nn.PReLU(256)
self.conv3_3 = nn.Conv2d(256,256,3,1,1)
self.relu3_3 = nn.PReLU(256)
self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_4 = nn.PReLU(256)
self.conv3_5 = nn.Conv2d(256,256,3,1,1)
self.relu3_5 = nn.PReLU(256)
self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_6 = nn.PReLU(256)
self.conv3_7 = nn.Conv2d(256,256,3,1,1)
self.relu3_7 = nn.PReLU(256)
self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
self.relu3_8 = nn.PReLU(256)
self.conv3_9 = nn.Conv2d(256,256,3,1,1)
self.relu3_9 = nn.PReLU(256)
self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
self.relu4_1 = nn.PReLU(512)
self.conv4_2 = nn.Conv2d(512,512,3,1,1)
self.relu4_2 = nn.PReLU(512)
self.conv4_3 = nn.Conv2d(512,512,3,1,1)
self.relu4_3 = nn.PReLU(512)
self.fc5 = nn.Linear(512*7*6,512)
self.fc6 = AngleLinear(512, self.classnum)
def forward(self, x):
x = self.relu1_1(self.conv1_1(x))
x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
x = self.relu2_1(self.conv2_1(x))
x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
x = self.relu3_1(self.conv3_1(x))
x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
x = self.relu4_1(self.conv4_1(x))
x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
x = x.view(x.size(0),-1)
features = self.fc5(x)
logits = self.fc6(features)
return features, logits

View File

@ -2,7 +2,7 @@
We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/GDAS.png" width="520">
<img src="https://github.com/D-X-Y/NAS-Projects/blob/master/others/GDAS/data/GDAS.png" width="520">
Figure-1. We utilize a DAG to represent the search space of a neural cell. Different operations (colored arrows) transform one node (square) to its intermediate features (little circles). Meanwhile, each node is the sum of the intermediate features transformed from the previous nodes. As indicated by the solid connections, the neural cell in the proposed GDAS is a sampled sub-graph of this DAG. Specifically, among the intermediate features between every two nodes, GDAS samples one feature in a differentiable way.
### Requirements
@ -51,9 +51,10 @@ You can also find some pre-trained models in [Google Driver](https://drive.googl
### Experimental Results
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/imagenet-results.png" width="700">
Figure-2. Top-1 and top-5 errors on ImageNet.
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/imagenet-results.png" width="700">
Figure-2. Top-1 and top-5 errors on ImageNet.
### Correction