update README
This commit is contained in:
parent
180702ab8e
commit
f8f3f382e0
@ -50,7 +50,7 @@ Highlight: we equip one-shot NAS with an architecture sampler and train network
|
||||
<img src="https://d-x-y.github.com/resources/paper-icon/ICCV-2019-SETN.png" width="450">
|
||||
|
||||
### Usage
|
||||
Train the searched SETN-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
|
||||
Please use the following scripts to train the searched SETN-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10 SETN 96 -1
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 SETN 96 -1
|
||||
@ -64,12 +64,13 @@ Searching codes come soon!
|
||||
|
||||
We proposed a gradient-based searching algorithm using differentiable architecture sampling (improving DARTS with Gumbel-softmax sampling).
|
||||
|
||||
<img src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="350">
|
||||
<img src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="300">
|
||||
|
||||
The old version is located at [`others/GDAS`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS) and a paddlepaddle implementation is locate at [`others/paddlepaddle`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/paddlepaddle).
|
||||
|
||||
### Usage
|
||||
Train the searched GDAS-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
|
||||
|
||||
Please use the following scripts to train the searched GDAS-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10 GDAS_V1 96 -1
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 GDAS_V1 96 -1
|
||||
|
@ -1,10 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "cifar"],
|
||||
"arch" : ["str", "densenet"],
|
||||
"depth" : ["int", 100],
|
||||
"growthRate": ["int", 12],
|
||||
"reduction" : ["float", 0.5],
|
||||
"bottleneck": ["bool", 1],
|
||||
"super_type": ["str" , "basic"],
|
||||
"zero_init_residual" : ["bool", "0"]
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "imagenet"],
|
||||
"arch" : ["str", "MobileNetV2"],
|
||||
"width_mult" : ["float", 1.0],
|
||||
"dropout" : ["float", 0.0],
|
||||
"input_channel" : ["int", 32],
|
||||
"last_channel" : ["int", 1280],
|
||||
"block_name" : ["str", "InvertedResidual"]
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "imagenet"],
|
||||
"arch" : ["str", "MobileNetV2"],
|
||||
"width_mult" : ["float", 1.0],
|
||||
"dropout" : ["float", 0.2],
|
||||
"input_channel" : ["int", 32],
|
||||
"last_channel" : ["int", 1280],
|
||||
"block_name" : ["str", "InvertedResidual"]
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "imagenet"],
|
||||
"arch" : ["str", "ShuffleNetV2"],
|
||||
"width_multi" : ["float", 2.0],
|
||||
"stages" : ["int", [24, 244, 488, 976, 2048]]
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "imagenet"],
|
||||
"arch" : ["str", "ShuffleNetV2"],
|
||||
"width_multi" : ["float", 1.5],
|
||||
"stages" : ["int", [24, 176, 352, 704, 1024]]
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"dataset" : ["str", "imagenet"],
|
||||
"arch" : ["str", "ShuffleNetV2"],
|
||||
"width_multi" : ["float", 1.0],
|
||||
"stages" : ["int", [24, 116, 232, 464, 1024]]
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
{
|
||||
"scheduler": ["str", "cos"],
|
||||
"eta_min" : ["float", "0.0"],
|
||||
"epochs" : ["int", "150"],
|
||||
"warmup" : ["int", "0"],
|
||||
"gamma" : ["float", "0.98"],
|
||||
"optim" : ["str", "SGD"],
|
||||
"LR" : ["float", "0.05"],
|
||||
"decay" : ["float", "0.00004"],
|
||||
"momentum" : ["float", "0.9"],
|
||||
"nesterov" : ["bool", "0"],
|
||||
"criterion": ["str", "Softmax"],
|
||||
"auxiliary": ["float", "-1"]
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
{
|
||||
"scheduler": ["str", "cos"],
|
||||
"eta_min" : ["float", "0.0"],
|
||||
"epochs" : ["int", "150"],
|
||||
"warmup" : ["int", "0"],
|
||||
"gamma" : ["float", "0.98"],
|
||||
"optim" : ["str", "SGD"],
|
||||
"LR" : ["float", "0.05"],
|
||||
"decay" : ["float", "0.00004"],
|
||||
"momentum" : ["float", "0.9"],
|
||||
"nesterov" : ["bool", "0"],
|
||||
"criterion": ["str", "SmoothSoftmax"],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary": ["float", "-1"]
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
{
|
||||
"scheduler": ["str", "linear"],
|
||||
"LR_min" : ["float", "0.0"],
|
||||
"epochs" : ["int", "240"],
|
||||
"warmup" : ["int", "0"],
|
||||
"optim" : ["str", "SGD"],
|
||||
"LR" : ["float", "0.5"],
|
||||
"decay" : ["float", "0.00004"],
|
||||
"momentum" : ["float", "0.9"],
|
||||
"nesterov" : ["bool", "1"],
|
||||
"criterion": ["str", "SmoothSoftmax"],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary": ["float", "-1"]
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
{
|
||||
"scheduler": ["str", "exponential"],
|
||||
"epochs" : ["int", "480"],
|
||||
"warmup" : ["int", "0"],
|
||||
"gamma" : ["float", "0.98"],
|
||||
"optim" : ["str", "SGD"],
|
||||
"LR" : ["float", "0.045"],
|
||||
"decay" : ["float", "0.00004"],
|
||||
"momentum" : ["float", "0.9"],
|
||||
"nesterov" : ["bool", "0"],
|
||||
"criterion": ["str", "SmoothSoftmax"],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary": ["float", "-1"]
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
##################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
|
||||
##################################################
|
||||
import math, torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
def __init__(self, nChannels, growthRate):
|
||||
super(Bottleneck, self).__init__()
|
||||
interChannels = 4*growthRate
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(interChannels)
|
||||
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = self.conv2(F.relu(self.bn2(out)))
|
||||
out = torch.cat((x, out), 1)
|
||||
return out
|
||||
|
||||
|
||||
class SingleLayer(nn.Module):
|
||||
def __init__(self, nChannels, growthRate):
|
||||
super(SingleLayer, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = torch.cat((x, out), 1)
|
||||
return out
|
||||
|
||||
|
||||
class Transition(nn.Module):
|
||||
def __init__(self, nChannels, nOutChannels):
|
||||
super(Transition, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = F.avg_pool2d(out, 2)
|
||||
return out
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
if bottleneck: nDenseBlocks = int( (depth-4) / 6 )
|
||||
else : nDenseBlocks = int( (depth-4) / 3 )
|
||||
|
||||
self.message = 'CifarDenseNet : block : {:}, depth : {:}, reduction : {:}, growth-rate = {:}, class = {:}'.format('bottleneck' if bottleneck else 'basic', depth, reduction, growthRate, nClasses)
|
||||
|
||||
nChannels = 2*growthRate
|
||||
self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
nOutChannels = int(math.floor(nChannels*reduction))
|
||||
self.trans1 = Transition(nChannels, nOutChannels)
|
||||
|
||||
nChannels = nOutChannels
|
||||
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
nOutChannels = int(math.floor(nChannels*reduction))
|
||||
self.trans2 = Transition(nChannels, nOutChannels)
|
||||
|
||||
nChannels = nOutChannels
|
||||
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
|
||||
self.act = nn.Sequential(
|
||||
nn.BatchNorm2d(nChannels), nn.ReLU(inplace=True),
|
||||
nn.AvgPool2d(8))
|
||||
self.fc = nn.Linear(nChannels, nClasses)
|
||||
|
||||
self.apply(initialize_resnet)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
|
||||
layers = []
|
||||
for i in range(int(nDenseBlocks)):
|
||||
if bottleneck:
|
||||
layers.append(Bottleneck(nChannels, growthRate))
|
||||
else:
|
||||
layers.append(SingleLayer(nChannels, growthRate))
|
||||
nChannels += growthRate
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, inputs):
|
||||
out = self.conv1( inputs )
|
||||
out = self.trans1(self.dense1(out))
|
||||
out = self.trans2(self.dense2(out))
|
||||
out = self.dense3(out)
|
||||
features = self.act(out)
|
||||
features = features.view(features.size(0), -1)
|
||||
out = self.fc(features)
|
||||
return features, out
|
@ -1,172 +0,0 @@
|
||||
# Deep Residual Learning for Image Recognition, CVPR 2016
|
||||
import torch.nn as nn
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1):
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
|
||||
super(BasicBlock, self).__init__()
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
|
||||
super(Bottleneck, self).__init__()
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = nn.BatchNorm2d(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups)
|
||||
self.bn2 = nn.BatchNorm2d(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block_name, layers, deep_stem, num_classes, zero_init_residual, groups, width_per_group):
|
||||
super(ResNet, self).__init__()
|
||||
|
||||
#planes = [int(width_per_group * groups * 2 ** i) for i in range(4)]
|
||||
if block_name == 'BasicBlock' : block= BasicBlock
|
||||
elif block_name == 'Bottleneck': block= Bottleneck
|
||||
else : raise ValueError('invalid block-name : {:}'.format(block_name))
|
||||
|
||||
if not deep_stem:
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
|
||||
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
|
||||
else:
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d( 3, 32, kernel_size=3, stride=2, padding=1, bias=False),
|
||||
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
||||
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
||||
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
|
||||
self.inplanes = 64
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64 , layers[0], stride=1, groups=groups, base_width=width_per_group)
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
self.message = 'block = {:}, layers = {:}, deep_stem = {:}, num_classes = {:}'.format(block, layers, deep_stem, num_classes)
|
||||
|
||||
self.apply( initialize_resnet )
|
||||
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride, groups, base_width):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
if stride == 2:
|
||||
downsample = nn.Sequential(
|
||||
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
|
||||
conv1x1(self.inplanes, planes * block.expansion, 1),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
elif stride == 1:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
else: raise ValueError('invalid stride [{:}] for downsample'.format(stride))
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, groups, base_width))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, 1, None, groups, base_width))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
features = self.avgpool(x)
|
||||
features = features.view(features.size(0), -1)
|
||||
logits = self.fc(features)
|
||||
|
||||
return features, logits
|
@ -1,101 +0,0 @@
|
||||
# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
|
||||
from torch import nn
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Module):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
|
||||
self.bn = nn.BatchNorm2d(out_planes)
|
||||
self.relu = nn.ReLU6(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv( x )
|
||||
out = self.bn ( out )
|
||||
out = self.relu( out )
|
||||
return out
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self, num_classes, width_mult, input_channel, last_channel, block_name, dropout):
|
||||
super(MobileNetV2, self).__init__()
|
||||
if block_name == 'InvertedResidual':
|
||||
block = InvertedResidual
|
||||
else:
|
||||
raise ValueError('invalid block name : {:}'.format(block_name))
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16 , 1, 1],
|
||||
[6, 24 , 2, 2],
|
||||
[6, 32 , 3, 2],
|
||||
[6, 64 , 4, 2],
|
||||
[6, 96 , 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# building first layer
|
||||
input_channel = int(input_channel * width_mult)
|
||||
self.last_channel = int(last_channel * max(1.0, width_mult))
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = int(c * width_mult)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
self.message = 'MobileNetV2 : width_mult={:}, in-C={:}, last-C={:}, block={:}, dropout={:}'.format(width_mult, input_channel, last_channel, block_name, dropout)
|
||||
|
||||
# weight initialization
|
||||
self.apply( initialize_resnet )
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, inputs):
|
||||
features = self.features(inputs)
|
||||
vectors = features.mean([2, 3])
|
||||
predicts = self.classifier(vectors)
|
||||
return features, predicts
|
@ -1,133 +0,0 @@
|
||||
import functools
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
__all__ = ['ShuffleNetV2']
|
||||
|
||||
|
||||
def channel_shuffle(x, groups):
|
||||
batchsize, num_channels, height, width = x.data.size()
|
||||
channels_per_group = num_channels // groups
|
||||
|
||||
# reshape
|
||||
x = x.view(batchsize, groups, channels_per_group, height, width)
|
||||
|
||||
x = torch.transpose(x, 1, 2).contiguous()
|
||||
|
||||
# flatten
|
||||
x = x.view(batchsize, -1, height, width)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride):
|
||||
super(InvertedResidual, self).__init__()
|
||||
|
||||
if not (1 <= stride <= 3):
|
||||
raise ValueError('illegal stride value')
|
||||
self.stride = stride
|
||||
|
||||
branch_features = oup // 2
|
||||
assert (self.stride != 1) or (inp == branch_features << 1)
|
||||
|
||||
pw_conv11 = functools.partial(nn.Conv2d, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
dw_conv33 = functools.partial(self.depthwise_conv, kernel_size=3, stride=self.stride, padding=1)
|
||||
|
||||
if self.stride > 1:
|
||||
self.branch1 = nn.Sequential(
|
||||
dw_conv33(inp, inp),
|
||||
nn.BatchNorm2d(inp),
|
||||
pw_conv11(inp, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
self.branch2 = nn.Sequential(
|
||||
pw_conv11(inp if (self.stride > 1) else branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
dw_conv33(branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
pw_conv11(branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
|
||||
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
|
||||
|
||||
def forward(self, x):
|
||||
if self.stride == 1:
|
||||
x1, x2 = x.chunk(2, dim=1)
|
||||
out = torch.cat((x1, self.branch2(x2)), dim=1)
|
||||
else:
|
||||
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
|
||||
|
||||
out = channel_shuffle(out, 2)
|
||||
return out
|
||||
|
||||
|
||||
class ShuffleNetV2(nn.Module):
|
||||
def __init__(self, num_classes, stages):
|
||||
super(ShuffleNetV2, self).__init__()
|
||||
|
||||
self.stage_out_channels = stages
|
||||
assert len(stages) == 5, 'invalid stages : {:}'.format(stages)
|
||||
self.message = 'stages: ' + ' '.join([str(x) for x in stages])
|
||||
|
||||
input_channels = 3
|
||||
output_channels = self.stage_out_channels[0]
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
|
||||
nn.BatchNorm2d(output_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
input_channels = output_channels
|
||||
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
stage_names = ['stage{:}'.format(i) for i in [2, 3, 4]]
|
||||
stage_repeats = [4, 8, 4]
|
||||
for name, repeats, output_channels in zip(
|
||||
stage_names, stage_repeats, self.stage_out_channels[1:]):
|
||||
seq = [InvertedResidual(input_channels, output_channels, 2)]
|
||||
for i in range(repeats - 1):
|
||||
seq.append(InvertedResidual(output_channels, output_channels, 1))
|
||||
setattr(self, name, nn.Sequential(*seq))
|
||||
input_channels = output_channels
|
||||
|
||||
output_channels = self.stage_out_channels[-1]
|
||||
self.conv5 = nn.Sequential(
|
||||
nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(output_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
self.fc = nn.Linear(output_channels, num_classes)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv1( inputs )
|
||||
x = self.maxpool(x)
|
||||
x = self.stage2(x)
|
||||
x = self.stage3(x)
|
||||
x = self.stage4(x)
|
||||
x = self.conv5(x)
|
||||
features = x.mean([2, 3]) # globalpool
|
||||
predicts = self.fc(features)
|
||||
return features, predicts
|
||||
|
||||
#@staticmethod
|
||||
#def _getStages(mult):
|
||||
# stages = {
|
||||
# '0.5': [24, 48, 96 , 192, 1024],
|
||||
# '1.0': [24, 116, 232, 464, 1024],
|
||||
# '1.5': [24, 176, 352, 704, 1024],
|
||||
# '2.0': [24, 244, 488, 976, 2048],
|
||||
# }
|
||||
# return stages[str(mult)]
|
@ -11,15 +11,12 @@ from .clone_weights import init_from_model
|
||||
|
||||
def get_cifar_models(config):
|
||||
from .CifarResNet import CifarResNet
|
||||
from .CifarDenseNet import DenseNet
|
||||
from .CifarWideResNet import CifarWideResNet
|
||||
|
||||
super_type = getattr(config, 'super_type', 'basic')
|
||||
if super_type == 'basic':
|
||||
if config.arch == 'resnet':
|
||||
return CifarResNet(config.module, config.depth, config.class_num, config.zero_init_residual)
|
||||
elif config.arch == 'densenet':
|
||||
return DenseNet(config.growthRate, config.depth, config.reduction, config.class_num, config.bottleneck)
|
||||
elif config.arch == 'wideresnet':
|
||||
return CifarWideResNet(config.depth, config.wide_factor, config.class_num, config.dropout)
|
||||
else:
|
||||
@ -44,10 +41,8 @@ def get_cifar_models(config):
|
||||
|
||||
def get_imagenet_models(config):
|
||||
super_type = getattr(config, 'super_type', 'basic')
|
||||
if super_type == 'basic':
|
||||
return get_imagenet_models_basic(config)
|
||||
# NAS searched architecture
|
||||
elif super_type.startswith('infer'):
|
||||
if super_type.startswith('infer'):
|
||||
assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
|
||||
infer_mode = super_type.split('-')[1]
|
||||
if infer_mode == 'shape':
|
||||
@ -65,20 +60,6 @@ def get_imagenet_models(config):
|
||||
raise ValueError('invalid super-type : {:}'.format(super_type))
|
||||
|
||||
|
||||
def get_imagenet_models_basic(config):
|
||||
from .ImagenetResNet import ResNet
|
||||
from .MobileNet import MobileNetV2
|
||||
from .ShuffleNetV2 import ShuffleNetV2
|
||||
if config.arch == 'resnet':
|
||||
return ResNet(config.block_name, config.layers, config.deep_stem, config.class_num, config.zero_init_residual, config.groups, config.width_per_group)
|
||||
elif config.arch == 'MobileNetV2':
|
||||
return MobileNetV2(config.class_num, config.width_mult, config.input_channel, config.last_channel, config.block_name, config.dropout)
|
||||
elif config.arch == 'ShuffleNetV2':
|
||||
return ShuffleNetV2(config.class_num, config.stages)
|
||||
else:
|
||||
raise ValueError('invalid arch : {:}'.format( config.arch ))
|
||||
|
||||
|
||||
def obtain_model(config):
|
||||
if config.dataset == 'cifar':
|
||||
return get_cifar_models(config)
|
||||
|
@ -1,139 +0,0 @@
|
||||
# SphereFace: Deep Hypersphere Embedding for Face Recognition
|
||||
#
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
|
||||
def myphi(x,m):
|
||||
x = x * m
|
||||
return 1-x**2/math.factorial(2)+x**4/math.factorial(4)-x**6/math.factorial(6) + \
|
||||
x**8/math.factorial(8) - x**9/math.factorial(9)
|
||||
|
||||
class AngleLinear(nn.Module):
|
||||
def __init__(self, in_features, out_features, m = 4, phiflag=True):
|
||||
super(AngleLinear, self).__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
self.weight = nn.Parameter(torch.Tensor(in_features,out_features))
|
||||
self.weight.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
|
||||
self.phiflag = phiflag
|
||||
self.m = m
|
||||
self.mlambda = [
|
||||
lambda x: x**0,
|
||||
lambda x: x**1,
|
||||
lambda x: 2*x**2-1,
|
||||
lambda x: 4*x**3-3*x,
|
||||
lambda x: 8*x**4-8*x**2+1,
|
||||
lambda x: 16*x**5-20*x**3+5*x
|
||||
]
|
||||
|
||||
def forward(self, input):
|
||||
x = input # size=(B,F) F is feature len
|
||||
w = self.weight # size=(F,Classnum) F=in_features Classnum=out_features
|
||||
|
||||
ww = w.renorm(2,1,1e-5).mul(1e5)
|
||||
xlen = x.pow(2).sum(1).pow(0.5) # size=B
|
||||
wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum
|
||||
|
||||
cos_theta = x.mm(ww) # size=(B,Classnum)
|
||||
cos_theta = cos_theta / xlen.view(-1,1) / wlen.view(1,-1)
|
||||
cos_theta = cos_theta.clamp(-1,1)
|
||||
|
||||
if self.phiflag:
|
||||
cos_m_theta = self.mlambda[self.m](cos_theta)
|
||||
with torch.no_grad():
|
||||
theta = cos_theta.acos()
|
||||
k = (self.m*theta/3.14159265).floor()
|
||||
n_one = k*0.0 - 1
|
||||
phi_theta = (n_one**k) * cos_m_theta - 2*k
|
||||
else:
|
||||
theta = cos_theta.acos()
|
||||
phi_theta = myphi(theta,self.m)
|
||||
phi_theta = phi_theta.clamp(-1*self.m,1)
|
||||
|
||||
cos_theta = cos_theta * xlen.view(-1,1)
|
||||
phi_theta = phi_theta * xlen.view(-1,1)
|
||||
output = (cos_theta,phi_theta)
|
||||
return output # size=(B,Classnum,2)
|
||||
|
||||
|
||||
class SphereFace20(nn.Module):
|
||||
def __init__(self, classnum=10574):
|
||||
super(SphereFace20, self).__init__()
|
||||
self.classnum = classnum
|
||||
#input = B*3*112*96
|
||||
self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
|
||||
self.relu1_1 = nn.PReLU(64)
|
||||
self.conv1_2 = nn.Conv2d(64,64,3,1,1)
|
||||
self.relu1_2 = nn.PReLU(64)
|
||||
self.conv1_3 = nn.Conv2d(64,64,3,1,1)
|
||||
self.relu1_3 = nn.PReLU(64)
|
||||
|
||||
self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
|
||||
self.relu2_1 = nn.PReLU(128)
|
||||
self.conv2_2 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_2 = nn.PReLU(128)
|
||||
self.conv2_3 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_3 = nn.PReLU(128)
|
||||
|
||||
self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
|
||||
self.relu2_4 = nn.PReLU(128)
|
||||
self.conv2_5 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_5 = nn.PReLU(128)
|
||||
|
||||
|
||||
self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
|
||||
self.relu3_1 = nn.PReLU(256)
|
||||
self.conv3_2 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_2 = nn.PReLU(256)
|
||||
self.conv3_3 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_3 = nn.PReLU(256)
|
||||
|
||||
self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_4 = nn.PReLU(256)
|
||||
self.conv3_5 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_5 = nn.PReLU(256)
|
||||
|
||||
self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_6 = nn.PReLU(256)
|
||||
self.conv3_7 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_7 = nn.PReLU(256)
|
||||
|
||||
self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_8 = nn.PReLU(256)
|
||||
self.conv3_9 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_9 = nn.PReLU(256)
|
||||
|
||||
self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
|
||||
self.relu4_1 = nn.PReLU(512)
|
||||
self.conv4_2 = nn.Conv2d(512,512,3,1,1)
|
||||
self.relu4_2 = nn.PReLU(512)
|
||||
self.conv4_3 = nn.Conv2d(512,512,3,1,1)
|
||||
self.relu4_3 = nn.PReLU(512)
|
||||
|
||||
self.fc5 = nn.Linear(512*7*6,512)
|
||||
self.fc6 = AngleLinear(512, self.classnum)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1_1(self.conv1_1(x))
|
||||
x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
|
||||
|
||||
x = self.relu2_1(self.conv2_1(x))
|
||||
x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
|
||||
x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
|
||||
|
||||
x = self.relu3_1(self.conv3_1(x))
|
||||
x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
|
||||
x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
|
||||
x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
|
||||
x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
|
||||
|
||||
x = self.relu4_1(self.conv4_1(x))
|
||||
x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
|
||||
|
||||
x = x.view(x.size(0),-1)
|
||||
features = self.fc5(x)
|
||||
logits = self.fc6(features)
|
||||
return features, logits
|
@ -2,7 +2,7 @@
|
||||
|
||||
We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).
|
||||
|
||||
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/GDAS.png" width="520">
|
||||
<img src="https://github.com/D-X-Y/NAS-Projects/blob/master/others/GDAS/data/GDAS.png" width="520">
|
||||
Figure-1. We utilize a DAG to represent the search space of a neural cell. Different operations (colored arrows) transform one node (square) to its intermediate features (little circles). Meanwhile, each node is the sum of the intermediate features transformed from the previous nodes. As indicated by the solid connections, the neural cell in the proposed GDAS is a sampled sub-graph of this DAG. Specifically, among the intermediate features between every two nodes, GDAS samples one feature in a differentiable way.
|
||||
|
||||
### Requirements
|
||||
@ -51,9 +51,10 @@ You can also find some pre-trained models in [Google Driver](https://drive.googl
|
||||
|
||||
|
||||
### Experimental Results
|
||||
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/imagenet-results.png" width="700">
|
||||
Figure-2. Top-1 and top-5 errors on ImageNet.
|
||||
|
||||
<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/imagenet-results.png" width="700">
|
||||
|
||||
Figure-2. Top-1 and top-5 errors on ImageNet.
|
||||
|
||||
### Correction
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user