This commit is contained in:
D-X-Y 2019-02-01 01:27:38 +11:00
commit 13e908f4df
104 changed files with 102494 additions and 0 deletions

104
.gitignore vendored Executable file
View File

@ -0,0 +1,104 @@
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# Pycharm project
.idea
snapshots
*.pytorch
*.tar.bz
data
.*.swp
main_main.py
*.pdf
*/*.pdf
# Device
scripts-nas/.nfs00*
*/.nfs00*

21
LICENSE Executable file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Xuanyi Dong
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

17
README.md Normal file
View File

@ -0,0 +1,17 @@
# GDAS
By Xuanyi Dong and Yi Yang
University of Technology Sydney
Requirements
- PyTorch 1.0
- Python 3.6
```
conda install pytorch torchvision cuda100 -c pytorch
```
## Algorithm
Searching CNNs
```
```

View File

@ -0,0 +1,27 @@
{
"data_name" : ["str", "PTB"],
"data_path" : ["str", "./data/data/penn"],
"emsize" : ["int", 850],
"nhid" : ["int", 850],
"nhidlast" : ["int", 850],
"LR" : ["float", 20],
"clip" : ["float", 0.25],
"epochs" : ["int", 3000],
"train_batch": ["int", 64],
"eval_batch": ["int", 10],
"test_batch": ["int", 1],
"bptt" : ["int", 35],
"dropout" : ["float", 0.75],
"dropouth" : ["float", 0.25],
"dropoutx" : ["float", 0.75],
"dropouti" : ["float", 0.2],
"dropoute" : ["float", 0.1],
"nonmono" : ["int", 5],
"alpha" : ["float", 0],
"beta" : ["float", 1e-3],
"wdecay" : ["float", 8e-7],
"max_seq_len_delta" : ["int", 20]
}

View File

@ -0,0 +1,27 @@
{
"data_name" : ["str", "WT2"],
"data_path" : ["str", "./data/data/wikitext-2"],
"emsize" : ["int", 700],
"nhid" : ["int", 700],
"nhidlast" : ["int", 700],
"LR" : ["float", 20],
"clip" : ["float", 0.25],
"epochs" : ["int", 3000],
"train_batch": ["int", 64],
"eval_batch": ["int", 10],
"test_batch": ["int", 1],
"bptt" : ["int", 35],
"dropout" : ["float", 0.75],
"dropouth" : ["float", 0.15],
"dropoutx" : ["float", 0.75],
"dropouti" : ["float", 0.2],
"dropoute" : ["float", 0.1],
"nonmono" : ["int", 5],
"alpha" : ["float", 0],
"beta" : ["float", 1e-3],
"wdecay" : ["float", 5e-7],
"max_seq_len_delta" : ["int", 20]
}

8
configs/cos1800.config Normal file
View File

@ -0,0 +1,8 @@
{
"type" : ["str", "cosine"],
"batch_size": ["int", 128],
"epochs" : ["int", 1800],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0001],
"LR" : ["float", 0.2]
}

8
configs/cos600.config Normal file
View File

@ -0,0 +1,8 @@
{
"type" : ["str", "cosine"],
"batch_size": ["int", 128],
"epochs" : ["int", 600],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0005],
"LR" : ["float", 0.2]
}

View File

@ -0,0 +1,13 @@
{
"type" : ["str", "cosine"],
"batch_size": ["int", 128],
"epochs" : ["int", 600],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0005],
"LR" : ["float", 0.025],
"auxiliary" : ["bool", 1],
"auxiliary_weight" : ["float", 0.4],
"grad_clip" : ["float", 5],
"cutout" : ["int", 16],
"drop_path_prob" : ["float", 0.2]
}

View File

@ -0,0 +1,13 @@
{
"type" : ["str", "cosine"],
"batch_size": ["int", 96],
"epochs" : ["int", 600],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0003],
"LR" : ["float", 0.025],
"auxiliary" : ["bool", 1],
"auxiliary_weight" : ["float", 0.4],
"grad_clip" : ["float", 5],
"cutout" : ["int", 16],
"drop_path_prob" : ["float", 0.2]
}

View File

@ -0,0 +1,13 @@
{
"type" : ["str", "cosine"],
"batch_size": ["int", 96],
"epochs" : ["int", 600],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0003],
"LR" : ["float", 0.025],
"auxiliary" : ["bool", 1],
"auxiliary_weight" : ["float", 0.4],
"grad_clip" : ["float", 5],
"cutout" : ["int", 0],
"drop_path_prob" : ["float", 0.3]
}

View File

@ -0,0 +1,15 @@
{
"type" : ["str", "steplr"],
"batch_size": ["int", 128],
"epochs" : ["int", 250],
"decay_period": ["int", 1],
"gamma" : ["float", 0.97],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.00003],
"LR" : ["float", 0.1],
"label_smooth": ["float", 0.1],
"auxiliary" : ["bool", 1],
"auxiliary_weight" : ["float", 0.4],
"grad_clip" : ["float", 5],
"drop_path_prob" : ["float", 0]
}

10
configs/pyramidC10.config Normal file
View File

@ -0,0 +1,10 @@
{
"type" : ["str", "multistep"],
"batch_size": ["int", 128],
"epochs" : ["int", 300],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0001],
"LR" : ["float", 0.1],
"milestones": ["int", [150, 225]],
"gammas" : ["float", [0.1, 0.1]]
}

View File

@ -0,0 +1,10 @@
{
"type" : ["str", "multistep"],
"batch_size": ["int", 128],
"epochs" : ["int", 300],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0001],
"LR" : ["float", 0.5],
"milestones": ["int", [150, 225]],
"gammas" : ["float", [0.1, 0.1]]
}

10
configs/resnet165.config Normal file
View File

@ -0,0 +1,10 @@
{
"type" : ["str", "multistep"],
"batch_size": ["int", 128],
"epochs" : ["int", 165],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0001],
"LR" : ["float", 0.01],
"milestones": ["int", [1, 83, 124]],
"gammas" : ["float", [10, 0.1, 0.1]]
}

10
configs/resnet200.config Normal file
View File

@ -0,0 +1,10 @@
{
"type" : ["str", "multistep"],
"batch_size": ["int", 128],
"epochs" : ["int", 200],
"momentum" : ["float", 0.9],
"decay" : ["float", 0.0005],
"LR" : ["float", 0.01],
"milestones": ["int", [1 , 60, 120, 160]],
"gammas" : ["float", [10, 0.2, 0.2, 0.2]]
}

49
data/Get-PTB-WT2.sh Normal file
View File

@ -0,0 +1,49 @@
# https://github.com/salesforce/awd-lstm-lm
echo "=== Acquiring datasets ==="
echo "---"
mkdir -p save
mkdir -p data
cd data
echo "- Downloading WikiText-2 (WT2)"
wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
unzip -q wikitext-2-v1.zip
cd wikitext-2
mv wiki.train.tokens train.txt
mv wiki.valid.tokens valid.txt
mv wiki.test.tokens test.txt
cd ..
echo "- Downloading WikiText-103 (WT2)"
wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
unzip -q wikitext-103-v1.zip
cd wikitext-103
mv wiki.train.tokens train.txt
mv wiki.valid.tokens valid.txt
mv wiki.test.tokens test.txt
cd ..
echo "- Downloading Penn Treebank (PTB)"
wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
tar -xzf simple-examples.tgz
mkdir -p penn
cd penn
mv ../simple-examples/data/ptb.train.txt train.txt
mv ../simple-examples/data/ptb.test.txt test.txt
mv ../simple-examples/data/ptb.valid.txt valid.txt
cd ..
echo "- Downloading Penn Treebank (Character)"
mkdir -p pennchar
cd pennchar
mv ../simple-examples/data/ptb.char.train.txt train.txt
mv ../simple-examples/data/ptb.char.test.txt test.txt
mv ../simple-examples/data/ptb.char.valid.txt valid.txt
cd ..
rm -rf simple-examples/
echo "---"
echo "Happy language modeling :)"

90
data/README.BACK Executable file
View File

@ -0,0 +1,90 @@
# EraseReLU: A Simple Way to Ease the Training of Deep Convolution Neural Networks
This project implements [this paper](https://arxiv.org/abs/1709.07634) in [PyTorch](pytorch.org). The implementation refers to [ResNeXt-DenseNet](https://github.com/D-X-Y/ResNeXt-DenseNet)
## Usage
All the model definations are located in the directory `models`.
All the training scripts are located in the directory `scripts` and `Xscripts`.
To train the ResNet-110 with EraseReLU on CIFAR-10:
```bash
sh scripts/warmup_train_2gpu.sh resnet110_erase cifar10
```
To train the original ResNet-110 on CIFAR-10:
```bash
sh scripts/warmup_train_2gpu.sh resnet110 cifar10
```
### MiniImageNet for PatchShuffle
```
sh scripts-shuffle/train_resnet_00000.sh ResNet18
sh scripts-shuffle/train_resnet_10000.sh ResNet18
sh scripts-shuffle/train_resnet_11000.sh ResNet18
```
```
sh scripts-shuffle/train_pmd_00000.sh PMDNet18_300
sh scripts-shuffle/train_pmd_00000.sh PMDNet34_300
sh scripts-shuffle/train_pmd_00000.sh PMDNet50_300
sh scripts-shuffle/train_pmd_11000.sh PMDNet18_300
sh scripts-shuffle/train_pmd_11000.sh PMDNet34_300
sh scripts-shuffle/train_pmd_11000.sh PMDNet50_300
```
### ImageNet
- Use the scripts `train_imagenet.sh` to train models in PyTorch.
- Or you can use the codes in `extra_torch` to train models in Torch.
#### Group Noramlization
```
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 256
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 64
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 16
sh Xscripts/train_res_gn.sh 0,1,2,3,4,5,6,7 resnext50_32_4_gn 16
```
| Model | Batch Size | Top-1 Error | Top-5 Errpr |
|:--------------:|:----------:|:-----------:|:-----------:|
| VGG16-GN | 256 | 28.82 | 9.64 |
## Results
| Model | Error on CIFAR-10 | Error on CIFAR-100|
|:--------------:|:-----------------:|:-----------------:|
| ResNet-56 | 6.97 | 30.60 |
| ResNet-56 (ER) | 6.23 | 28.56 |
## Citation
If you find this project helos your research, please consider cite the paper:
```
@article{dong2017eraserelu,
title={EraseReLU: A Simple Way to Ease the Training of Deep Convolution Neural Networks},
author={Dong, Xuanyi and Kang, Guoliang and Zhan, Kun and Yang, Yi},
journal={arXiv preprint arXiv:1709.07634},
year={2017}
}
```
## Download the ImageNet dataset
The ImageNet Large Scale Visual Recognition Challenge (ILSVRC) dataset has 1000 categories and 1.2 million images. The images do not need to be preprocessed or packaged in any database, but the validation images need to be moved into appropriate subfolders.
1. Download the images from http://image-net.org/download-images
2. Extract the training data:
```bash
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
cd ..
```
3. Extract the validation data and move images to subfolders:
```bash
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xvf ILSVRC2012_img_val.tar
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
```

5
data/README.md Normal file
View File

@ -0,0 +1,5 @@
# Tiny-ImageNet
The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
# PTB and WT2
`bash Get-PTB-WT2.sh`

1000
data/classes.txt Normal file

File diff suppressed because it is too large Load Diff

3761
data/data/penn/test.txt Normal file

File diff suppressed because it is too large Load Diff

42068
data/data/penn/train.txt Normal file

File diff suppressed because it is too large Load Diff

3370
data/data/penn/valid.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

36718
data/data/wikitext-2/train.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

53
data/tiny-imagenet.py Normal file
View File

@ -0,0 +1,53 @@
import os, sys
from pathlib import Path
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
def load_val():
path = 'tiny-imagenet-200/val/val_annotations.txt'
cfile = open(path, 'r')
content = cfile.readlines()
content = [x.strip().split('\t') for x in content]
cfile.close()
images = [x[0] for x in content]
labels = [x[1] for x in content]
return images, labels
def main():
os.system("wget {:}".format(url))
os.system("rm -rf tiny-imagenet-200")
os.system("unzip -o tiny-imagenet-200.zip")
images, labels = load_val()
savedir = 'tiny-imagenet-200/new_val'
if not os.path.exists(savedir): os.makedirs(savedir)
for image, label in zip(images, labels):
cdir = savedir + '/' + label
if not os.path.exists(cdir): os.makedirs(cdir)
ori_path = 'tiny-imagenet-200/val/images/' + image
os.system("cp {:} {:}".format(ori_path, cdir))
os.system("rm -rf tiny-imagenet-200/val")
os.system("mv {:} tiny-imagenet-200/val".format(savedir))
def generate_salt_pepper():
targetdir = Path('tiny-imagenet-200/val')
noisedir = Path('tiny-imagenet-200/val-noise')
assert targetdir.exists(), '{:} does not exist'.format(targetdir)
from imgaug import augmenters as iaa
import cv2
aug = iaa.SaltAndPepper(p=0.2)
for sub in targetdir.iterdir():
if not sub.is_dir(): continue
subdir = noisedir / sub.name
if not subdir.exists(): os.makedirs('{:}'.format(subdir))
images = sub.glob('*.JPEG')
for image in images:
I = cv2.imread(str(image))
Inoise = aug.augment_image(I)
savepath = subdir / image.name
cv2.imwrite(str(savepath), Inoise)
print ('{:} done'.format(sub))
if __name__ == "__main__":
#main()
generate_salt_pepper()

310
exps-cnn/acc_search_v2.py Normal file
View File

@ -0,0 +1,310 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from nas import Network, NetworkACC2, NetworkV3, NetworkV4, NetworkV5, NetworkFACC1
from nas import return_alphas_str
from train_utils import main_procedure
from scheduler import load_config
Networks = {'base': Network, 'acc2': NetworkACC2, 'facc1': NetworkFACC1, 'NetworkV3': NetworkV3, 'NetworkV4': NetworkV4, 'NetworkV5': NetworkV5}
parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
parser.add_argument('--batch_size', type=int, help='the batch size')
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
parser.add_argument('--tau_max', type=float, help='initial tau')
parser.add_argument('--tau_min', type=float, help='minimum tau')
parser.add_argument('--momentum', type=float, help='momentum')
parser.add_argument('--weight_decay', type=float, help='weight decay')
parser.add_argument('--epochs', type=int, help='num of training epochs')
# architecture leraning rate
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
#
parser.add_argument('--init_channels', type=int, help='num of init channels')
parser.add_argument('--layers', type=int, help='total number of layers')
#
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
parser.add_argument('--model_config', type=str , help='the model configuration')
# resume
parser.add_argument('--resume', type=str , help='the resume path')
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
# split data
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
args.dataset = args.dataset.lower()
# Mean + Std
if args.dataset == 'cifar10':
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]
elif args.dataset == 'cifar100':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Argumentation
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
transforms.Normalize(mean, std)]
if args.cutout > 0 : lists += [Cutout(args.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Datasets
if args.dataset == 'cifar10':
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
num_classes = 10
elif args.dataset == 'cifar100':
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
num_classes = 100
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Loader
if args.validate:
indices = list(range(len(train_data)))
split = int(args.train_portion * len(indices))
random.shuffle(indices)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
pin_memory=True, num_workers=args.workers)
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
pin_memory=True, num_workers=args.workers)
else:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
# network and criterion
criterion = torch.nn.CrossEntropyLoss().cuda()
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers)
model = torch.nn.DataParallel(basemodel).cuda()
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
# optimizer and LR-scheduler
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
#base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
if args.resume is not None and os.path.isfile(args.resume):
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes = 0, {}
print_log('Train model-search from scratch.', log)
config = load_config(args.model_config)
if args.only_base:
print_log('---- Only Train the Searched Model ----', log)
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
return
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
base_scheduler.step()
basemodel.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
#if epoch + 2 == args.epochs:
# torch.cuda.empty_cache()
# basemodel.set_gumbel(False)
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}], tau={:}'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size, basemodel.get_tau()), log)
genotype = basemodel.genotype()
print_log('genotype = {:}'.format(genotype), log)
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
# training
train_acc1, train_acc5, train_obj, train_time \
= train(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
total_train_time += train_time
# validation
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
print_log('{:03d}/{:03d}, Train-Accuracy = {:.2f}, Test-Accuracy = {:.2f}'.format(epoch, args.epochs, train_acc1, valid_acc1), log)
# save genotype
genotypes[epoch] = basemodel.genotype()
# save checkpoint
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'genotypes' : genotypes,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict(),
'base_scheduler' : base_scheduler.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
torch.cuda.empty_cache()
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
log.close()
def train(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.train()
valid_iter = iter(valid_queue)
end = time.time()
for step, (inputs, targets) in enumerate(train_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# get a random minibatch from the search queue with replacement
try:
input_search, target_search = next(valid_iter)
except:
valid_iter = iter(valid_queue)
input_search, target_search = next(valid_iter)
target_search = target_search.cuda(non_blocking=True)
# update the architecture
arch_optimizer.zero_grad()
output_search = model(input_search)
arch_loss = criterion(output_search, target_search)
arch_loss.backward()
arch_optimizer.step()
# update the parameters
base_optimizer.zero_grad()
logits = model(inputs)
loss = criterion(logits, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
base_optimizer.step()
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(train_queue):
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg, batch_time.sum
def infer(valid_queue, model, criterion, epoch, log):
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.eval()
with torch.no_grad():
for step, (inputs, targets) in enumerate(valid_queue):
batch, C, H, W = inputs.size()
targets = targets.cuda(non_blocking=True)
logits = model(inputs)
loss = criterion(logits, targets)
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg
if __name__ == '__main__':
main()

397
exps-cnn/acc_search_v3.py Normal file
View File

@ -0,0 +1,397 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from nas import Network, NetworkACC2, NetworkV3, NetworkV4, NetworkV5, NetworkFACC1
from nas import return_alphas_str
from train_utils import main_procedure
from scheduler import load_config
Networks = {'base': Network, 'acc2': NetworkACC2, 'facc1': NetworkFACC1, 'NetworkV3': NetworkV3, 'NetworkV4': NetworkV4, 'NetworkV5': NetworkV5}
parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
parser.add_argument('--batch_size', type=int, help='the batch size')
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
parser.add_argument('--tau_max', type=float, help='initial tau')
parser.add_argument('--tau_min', type=float, help='minimum tau')
parser.add_argument('--momentum', type=float, help='momentum')
parser.add_argument('--weight_decay', type=float, help='weight decay')
parser.add_argument('--epochs', type=int, help='num of training epochs')
# architecture leraning rate
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
#
parser.add_argument('--init_channels', type=int, help='num of init channels')
parser.add_argument('--layers', type=int, help='total number of layers')
#
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
parser.add_argument('--model_config', type=str , help='the model configuration')
# resume
parser.add_argument('--resume', type=str , help='the resume path')
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
# split data
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
args.dataset = args.dataset.lower()
# Mean + Std
if args.dataset == 'cifar10':
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]
elif args.dataset == 'cifar100':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Argumentation
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
transforms.Normalize(mean, std)]
if args.cutout > 0 : lists += [Cutout(args.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Datasets
if args.dataset == 'cifar10':
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
num_classes = 10
elif args.dataset == 'cifar100':
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
num_classes = 100
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Loader
if args.validate:
indices = list(range(len(train_data)))
split = int(args.train_portion * len(indices))
random.shuffle(indices)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
pin_memory=True, num_workers=args.workers)
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
pin_memory=True, num_workers=args.workers)
else:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
# network and criterion
criterion = torch.nn.CrossEntropyLoss().cuda()
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers)
model = torch.nn.DataParallel(basemodel).cuda()
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
# optimizer and LR-scheduler
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
#base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
if args.resume is not None and os.path.isfile(args.resume):
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes = 0, {}
print_log('Train model-search from scratch.', log)
config = load_config(args.model_config)
if args.only_base:
print_log('---- Only Train the Searched Model ----', log)
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
return
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
base_scheduler.step()
basemodel.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
#if epoch + 1 == args.epochs:
# torch.cuda.empty_cache()
# basemodel.set_gumbel(False)
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}], tau={:}'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size, basemodel.get_tau()), log)
genotype = basemodel.genotype()
print_log('genotype = {:}'.format(genotype), log)
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
# training
if epoch + 1 == args.epochs:
train_acc1, train_acc5, train_obj, train_time \
= train_joint(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
total_train_time += train_time
else:
train_acc1, train_acc5, train_obj, train_time \
= train_base(train_loader, None, model, criterion, base_optimizer, None, epoch, log)
total_train_time += train_time
Arch__acc1, Arch__acc5, Arch__obj, train_time \
= train_arch(None , test_loader, model, criterion, None, arch_optimizer, epoch, log)
total_train_time += train_time
# validation
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
print_log('{:03d}/{:03d}, Train-Accuracy = {:.2f}, Arch-Accuracy = {:.2f}, Test-Accuracy = {:.2f}'.format(epoch, args.epochs, train_acc1, Arch__acc1, valid_acc1), log)
# save genotype
genotypes[epoch] = basemodel.genotype()
# save checkpoint
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'genotypes' : genotypes,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict(),
'base_scheduler' : base_scheduler.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
torch.cuda.empty_cache()
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
log.close()
def train_base(train_queue, _, model, criterion, base_optimizer, __, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.train()
end = time.time()
for step, (inputs, targets) in enumerate(train_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# update the parameters
base_optimizer.zero_grad()
logits = model(inputs)
loss = criterion(logits, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
base_optimizer.step()
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(train_queue):
Sstr = ' TRAIN-BASE ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg, batch_time.sum
def train_arch(_, valid_queue, model, criterion, __, arch_optimizer, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.train()
end = time.time()
for step, (inputs, targets) in enumerate(valid_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# update the architecture
arch_optimizer.zero_grad()
outputs = model(inputs)
arch_loss = criterion(outputs, targets)
arch_loss.backward()
arch_optimizer.step()
prec1, prec5 = obtain_accuracy(outputs.data, targets.data, topk=(1, 5))
objs.update(arch_loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
Sstr = ' TRAIN-ARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg, batch_time.sum
def train_joint(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.train()
valid_iter = iter(valid_queue)
end = time.time()
for step, (inputs, targets) in enumerate(train_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# get a random minibatch from the search queue with replacement
try:
input_search, target_search = next(valid_iter)
except:
valid_iter = iter(valid_queue)
input_search, target_search = next(valid_iter)
target_search = target_search.cuda(non_blocking=True)
# update the architecture
arch_optimizer.zero_grad()
output_search = model(input_search)
arch_loss = criterion(output_search, target_search)
arch_loss.backward()
arch_optimizer.step()
# update the parameters
base_optimizer.zero_grad()
logits = model(inputs)
loss = criterion(logits, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
base_optimizer.step()
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(train_queue):
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg, batch_time.sum
def infer(valid_queue, model, criterion, epoch, log):
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.eval()
with torch.no_grad():
for step, (inputs, targets) in enumerate(valid_queue):
batch, C, H, W = inputs.size()
targets = targets.cuda(non_blocking=True)
logits = model(inputs)
loss = criterion(logits, targets)
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg
if __name__ == '__main__':
main()

94
exps-cnn/cvpr-vis.py Normal file
View File

@ -0,0 +1,94 @@
# python ./exps-nas/cvpr-vis.py --save_dir ./snapshots/NAS-VIS/
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from nas import DMS_V1, DMS_F1
from nas_rnn import DARTS_V2, GDAS
from graphviz import Digraph
parser = argparse.ArgumentParser("Visualize the Networks")
parser.add_argument('--save_dir', type=str, help='The directory to save the network plot.')
args = parser.parse_args()
def plot_cnn(genotype, filename):
g = Digraph(
format='pdf',
edge_attr=dict(fontsize='20', fontname="times"),
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
engine='dot')
g.body.extend(['rankdir=LR'])
g.node("c_{k-2}", fillcolor='darkseagreen2')
g.node("c_{k-1}", fillcolor='darkseagreen2')
assert len(genotype) % 2 == 0, '{:}'.format(genotype)
steps = len(genotype) // 2
for i in range(steps):
g.node(str(i), fillcolor='lightblue')
for i in range(steps):
for k in [2*i, 2*i + 1]:
op, j, weight = genotype[k]
if j == 0:
u = "c_{k-2}"
elif j == 1:
u = "c_{k-1}"
else:
u = str(j-2)
v = str(i)
g.edge(u, v, label=op, fillcolor="gray")
g.node("c_{k}", fillcolor='palegoldenrod')
for i in range(steps):
g.edge(str(i), "c_{k}", fillcolor="gray")
g.render(filename, view=False)
def plot_rnn(genotype, filename):
g = Digraph(
format='pdf',
edge_attr=dict(fontsize='20', fontname="times"),
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
engine='dot')
g.body.extend(['rankdir=LR'])
g.node("x_{t}", fillcolor='darkseagreen2')
g.node("h_{t-1}", fillcolor='darkseagreen2')
g.node("0", fillcolor='lightblue')
g.edge("x_{t}", "0", fillcolor="gray")
g.edge("h_{t-1}", "0", fillcolor="gray")
steps = len(genotype)
for i in range(1, steps + 1):
g.node(str(i), fillcolor='lightblue')
for i, (op, j) in enumerate(genotype):
g.edge(str(j), str(i + 1), label=op, fillcolor="gray")
g.node("h_{t}", fillcolor='palegoldenrod')
for i in range(1, steps + 1):
g.edge(str(i), "h_{t}", fillcolor="gray")
g.render(filename, view=False)
if __name__ == '__main__':
save_dir = Path(args.save_dir)
save_path = str(save_dir / 'DMS_V1-normal')
plot_cnn(DMS_V1.normal, save_path)
save_path = str(save_dir / 'DMS_V1-reduce')
plot_cnn(DMS_V1.reduce, save_path)
save_path = str(save_dir / 'DMS_F1-normal')
plot_cnn(DMS_F1.normal, save_path)
save_path = str(save_dir / 'DARTS-V2-RNN')
plot_rnn(DARTS_V2.recurrent, save_path)
save_path = str(save_dir / 'GDAS-V1-RNN')
plot_rnn(GDAS.recurrent, save_path)

312
exps-cnn/meta_search.py Normal file
View File

@ -0,0 +1,312 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from datasets import TieredImageNet, MetaBatchSampler
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from meta_nas import return_alphas_str, MetaNetwork
from train_utils import main_procedure
from scheduler import load_config
Networks = {'meta': MetaNetwork}
parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
parser.add_argument('--n_way', type=int, help='N-WAY.')
parser.add_argument('--k_shot', type=int, help='K-SHOT.')
# Learning Parameters
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
parser.add_argument('--momentum', type=float, help='momentum')
parser.add_argument('--weight_decay', type=float, help='weight decay')
parser.add_argument('--epochs', type=int, help='num of training epochs')
# architecture leraning rate
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
#
parser.add_argument('--init_channels', type=int, help='num of init channels')
parser.add_argument('--layers', type=int, help='total number of layers')
#
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
parser.add_argument('--model_config', type=str , help='the model configuration')
# resume
parser.add_argument('--resume', type=str , help='the resume path')
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
# split data
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
# Mean + Std
means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
# Data Argumentation
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(),
transforms.Normalize(means, stds)]
if args.cutout > 0 : lists += [Cutout(args.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(means, stds)])
train_data = TieredImageNet(args.data_path, 'train', train_transform)
test_data = TieredImageNet(args.data_path, 'val' , test_transform )
train_sampler = MetaBatchSampler(train_data.labels, args.n_way, args.k_shot * 2, len(train_data) // (args.n_way*args.k_shot))
test_sampler = MetaBatchSampler( test_data.labels, args.n_way, args.k_shot * 2, len( test_data) // (args.n_way*args.k_shot))
train_loader = torch.utils.data.DataLoader(train_data, batch_sampler=train_sampler)
test_loader = torch.utils.data.DataLoader( test_data, batch_sampler= test_sampler)
# network
basemodel = Networks[args.arch](args.init_channels, args.layers, head='imagenet')
model = torch.nn.DataParallel(basemodel).cuda()
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
# optimizer and LR-scheduler
#base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-meta-search.pth')
if args.resume is not None and os.path.isfile(args.resume):
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes = 0, {}
print_log('Train model-search from scratch.', log)
config = load_config(args.model_config)
if args.only_base:
print_log('---- Only Train the Searched Model ----', log)
CIFAR_DATA_DIR = os.environ['TORCH_HOME'] + '/cifar.python'
main_procedure(config, 'cifar10', CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
return
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
base_scheduler.step()
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr())), log)
genotype = basemodel.genotype()
print_log('genotype = {:}'.format(genotype), log)
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
# training
train_acc1, train_obj, train_time \
= train(train_loader, test_loader, model, args.n_way, base_optimizer, arch_optimizer, epoch, log)
total_train_time += train_time
# validation
valid_acc1, valid_obj = infer(test_loader, model, epoch, args.n_way, log)
print_log('META -> {:}-way {:}-shot : {:03d}/{:03d} : Train Acc : {:.2f}, Test Acc : {:.2f}'.format(args.n_way, args.k_shot, epoch, args.epochs, train_acc1, valid_acc1), log)
# save genotype
genotypes[epoch] = basemodel.genotype()
# save checkpoint
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'genotypes' : genotypes,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict(),
'base_scheduler' : base_scheduler.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
CIFAR_DATA_DIR = os.environ['TORCH_HOME'] + '/cifar.python'
print_log('test for CIFAR-10', log)
torch.cuda.empty_cache()
main_procedure(config, 'cifar10' , CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
print_log('test for CIFAR-100', log)
torch.cuda.empty_cache()
main_procedure(config, 'cifar100', CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
log.close()
def euclidean_dist(A, B):
na, da = A.size()
nb, db = B.size()
assert da == db, 'invalid feature dim : {:} vs. {:}'.format(da, db)
X, Y = A.view(na, 1, da), B.view(1, nb, db)
return torch.pow(X-Y, 2).sum(2)
def get_loss(features, targets, n_way):
classes = torch.unique(targets)
shot = features.size(0) // n_way // 2
support_index, query_index, labels = [], [], []
for idx, cls in enumerate( classes.tolist() ):
indexs = (targets == cls).nonzero().view(-1).tolist()
support_index.append(indexs[:shot])
query_index += indexs[shot:]
labels += [idx] * shot
query_features = features[query_index, :]
support_features = features[support_index, :]
support_features = torch.mean(support_features, dim=1)
labels = torch.LongTensor(labels).cuda(non_blocking=True)
logits = -euclidean_dist(query_features, support_features)
loss = F.cross_entropy(logits, labels)
accuracy = obtain_accuracy(logits.data, labels.data, topk=(1,))[0]
return loss, accuracy
def train(train_queue, valid_queue, model, n_way, base_optimizer, arch_optimizer, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, accuracies = AverageMeter(), AverageMeter()
model.train()
valid_iter = iter(valid_queue)
end = time.time()
for step, (inputs, targets) in enumerate(train_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
#targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# get a random minibatch from the search queue with replacement
try:
input_search, target_search = next(valid_iter)
except:
valid_iter = iter(valid_queue)
input_search, target_search = next(valid_iter)
#target_search = target_search.cuda(non_blocking=True)
# update the architecture
arch_optimizer.zero_grad()
feature_search = model(input_search)
arch_loss, arch_accuracy = get_loss(feature_search, target_search, n_way)
arch_loss.backward()
arch_optimizer.step()
# update the parameters
base_optimizer.zero_grad()
feature_model = model(inputs)
model_loss, model_accuracy = get_loss(feature_model, targets, n_way)
model_loss.backward()
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
base_optimizer.step()
objs.update(model_loss.item() , batch)
accuracies.update(model_accuracy.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(train_queue):
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f})'.format(loss=objs, top1=accuracies)
Istr = 'I : {:}'.format( list(inputs.size()) )
print_log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Istr, log)
return accuracies.avg, objs.avg, batch_time.sum
def infer(valid_queue, model, epoch, n_way, log):
objs, accuracies = AverageMeter(), AverageMeter()
model.eval()
with torch.no_grad():
for step, (inputs, targets) in enumerate(valid_queue):
batch, C, H, W = inputs.size()
#targets = targets.cuda(non_blocking=True)
features = model(inputs)
loss, accuracy = get_loss(features, targets, n_way)
objs.update(loss.item() , batch)
accuracies.update(accuracy.item(), batch)
if step % (args.print_freq*4) == 0 or (step+1) == len(valid_queue):
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f})'.format(loss=objs, top1=accuracies)
print_log(Sstr + ' ' + Lstr, log)
return accuracies.avg, objs.avg
if __name__ == '__main__':
main()

96
exps-cnn/train_base.py Normal file
View File

@ -0,0 +1,96 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from nas import DARTS_V1, DARTS_V2, NASNet, PNASNet, AmoebaNet, ENASNet
from nas import DMS_V1, DMS_F1, GDAS_CC
from meta_nas import META_V1, META_V2
from train_utils import main_procedure
from train_utils_imagenet import main_procedure_imagenet
from scheduler import load_config
models = {'DARTS_V1': DARTS_V1,
'DARTS_V2': DARTS_V2,
'NASNet' : NASNet,
'PNASNet' : PNASNet,
'ENASNet' : ENASNet,
'DMS_V1' : DMS_V1,
'DMS_F1' : DMS_F1,
'GDAS_CC' : GDAS_CC,
'META_V1' : META_V1,
'META_V2' : META_V2,
'AmoebaNet' : AmoebaNet}
parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--dataset', type=str, choices=['imagenet', 'cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
parser.add_argument('--arch', type=str, choices=models.keys(), help='the searched model.')
#
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
parser.add_argument('--model_config', type=str , help='the model configuration')
parser.add_argument('--init_channels', type=int , help='the initial number of channels')
parser.add_argument('--layers', type=int , help='the number of layers.')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
args.dataset = args.dataset.lower()
config = load_config(args.model_config)
genotype = models[args.arch]
print_log('configuration : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log)
# clear GPU cache
torch.cuda.empty_cache()
if args.dataset == 'imagenet':
main_procedure_imagenet(config, args.data_path, args, genotype, args.init_channels, args.layers, log)
else:
main_procedure(config, args.dataset, args.data_path, args, genotype, args.init_channels, args.layers, log)
log.close()
if __name__ == '__main__':
main()

312
exps-cnn/train_search.py Normal file
View File

@ -0,0 +1,312 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import Cutout, count_parameters_in_MB
from datasets import TieredImageNet
from nas import return_alphas_str, Network, NetworkV1, NetworkF1
from train_utils import main_procedure
from scheduler import load_config
Networks = {'base': Network, 'share': NetworkV1, 'fix': NetworkF1}
parser = argparse.ArgumentParser("CNN")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100', 'tiered'], help='Choose between Cifar10/100 and TieredImageNet.')
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
parser.add_argument('--batch_size', type=int, help='the batch size')
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
parser.add_argument('--momentum', type=float, help='momentum')
parser.add_argument('--weight_decay', type=float, help='weight decay')
parser.add_argument('--epochs', type=int, help='num of training epochs')
# architecture leraning rate
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
#
parser.add_argument('--init_channels', type=int, help='num of init channels')
parser.add_argument('--layers', type=int, help='total number of layers')
#
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
parser.add_argument('--model_config', type=str , help='the model configuration')
# resume
parser.add_argument('--resume', type=str , help='the resume path')
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
# split data
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
args.dataset = args.dataset.lower()
# Mean + Std
if args.dataset == 'cifar10':
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]
elif args.dataset == 'cifar100':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
elif args.dataset == 'tiered':
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Argumentation
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
transforms.Normalize(mean, std)]
if args.cutout > 0 : lists += [Cutout(args.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
elif args.dataset == 'tiered':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
if args.cutout > 0 : lists += [Cutout(args.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Datasets
if args.dataset == 'cifar10':
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
num_classes, head = 10, 'cifar'
elif args.dataset == 'cifar100':
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
num_classes, head = 100, 'cifar'
elif args.dataset == 'tiered':
train_data = TieredImageNet(args.data_path, 'train-val', train_transform)
test_data = None
num_classes, head = train_data.n_classes, 'imagenet'
else:
raise TypeError("Unknow dataset : {:}".format(args.dataset))
# Data Loader
if args.validate:
indices = list(range(len(train_data)))
split = int(args.train_portion * len(indices))
random.shuffle(indices)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
pin_memory=True, num_workers=args.workers)
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
pin_memory=True, num_workers=args.workers)
else:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
# network and criterion
criterion = torch.nn.CrossEntropyLoss().cuda()
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers, head=head)
model = torch.nn.DataParallel(basemodel).cuda()
print_log("Network : {:}".format(model), log)
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
print_log("Train-transformation : {:}\nTest--transformation : {:}\nClass number : {:}".format(train_transform, test_transform, num_classes), log)
# optimizer and LR-scheduler
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
if args.resume is not None and os.path.isfile(args.resume):
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
elif os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
basemodel.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
genotypes = checkpoint['genotypes']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes = 0, {}
print_log('Train model-search from scratch.', log)
config = load_config(args.model_config)
if args.only_base:
print_log('---- Only Train the Searched Model ----', log)
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
return
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
base_scheduler.step()
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}]'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size), log)
genotype = basemodel.genotype()
print_log('genotype = {:}'.format(genotype), log)
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
# training
train_acc1, train_acc5, train_obj, train_time \
= train(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
total_train_time += train_time
# validation
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
print_log('Base-Search : {:03d}/{:03d} : Train-Acc={:.3f}, Test-Acc={:.3f}'.format(epoch, args.epochs, train_acc1, valid_acc1), log)
# save genotype
genotypes[epoch] = basemodel.genotype()
# save checkpoint
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'genotypes' : genotypes,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict(),
'base_scheduler' : base_scheduler.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
torch.cuda.empty_cache()
main_procedure(config, 'cifar10', os.environ['TORCH_HOME'] + '/cifar.python', args, basemodel.genotype(), 36, 20, log)
log.close()
def train(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.train()
valid_iter = iter(valid_queue)
end = time.time()
for step, (inputs, targets) in enumerate(train_queue):
batch, C, H, W = inputs.size()
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)
data_time.update(time.time() - end)
# get a random minibatch from the search queue with replacement
try:
input_search, target_search = next(valid_iter)
except:
valid_iter = iter(valid_queue)
input_search, target_search = next(valid_iter)
target_search = target_search.cuda(non_blocking=True)
# update the architecture
arch_optimizer.zero_grad()
output_search = model(input_search)
arch_loss = criterion(output_search, target_search)
arch_loss.backward()
arch_optimizer.step()
# update the parameters
base_optimizer.zero_grad()
logits = model(inputs)
loss = criterion(logits, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
base_optimizer.step()
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if step % args.print_freq == 0 or (step+1) == len(train_queue):
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg, batch_time.sum
def infer(valid_queue, model, criterion, epoch, log):
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
model.eval()
with torch.no_grad():
for step, (inputs, targets) in enumerate(valid_queue):
batch, C, H, W = inputs.size()
targets = targets.cuda(non_blocking=True)
logits = model(inputs)
loss = criterion(logits, targets)
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
objs.update(loss.item() , batch)
top1.update(prec1.item(), batch)
top5.update(prec5.item(), batch)
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
print_log(Sstr + ' ' + Lstr, log)
return top1.avg, top5.avg, objs.avg
if __name__ == '__main__':
main()

184
exps-cnn/train_utils.py Normal file
View File

@ -0,0 +1,184 @@
import os, sys, time
from copy import deepcopy
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time
from utils import count_parameters_in_MB
from utils import Cutout
from nas import NetworkCIFAR as Network
def obtain_best(accuracies):
if len(accuracies) == 0: return (0, 0)
tops = [value for key, value in accuracies.items()]
s2b = sorted( tops )
return s2b[-1]
def main_procedure(config, dataset, data_path, args, genotype, init_channels, layers, log):
# Mean + Std
if dataset == 'cifar10':
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]
elif dataset == 'cifar100':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
else:
raise TypeError("Unknow dataset : {:}".format(dataset))
# Dataset Transformation
if dataset == 'cifar10' or dataset == 'cifar100':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
transforms.Normalize(mean, std)]
if config.cutout > 0 : lists += [Cutout(config.cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
else:
raise TypeError("Unknow dataset : {:}".format(dataset))
# Dataset Defination
if dataset == 'cifar10':
train_data = dset.CIFAR10(data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR10(data_path, train=False, transform=test_transform , download=True)
class_num = 10
elif dataset == 'cifar100':
train_data = dset.CIFAR100(data_path, train= True, transform=train_transform, download=True)
test_data = dset.CIFAR100(data_path, train=False, transform=test_transform , download=True)
class_num = 100
else:
raise TypeError("Unknow dataset : {:}".format(dataset))
print_log('-------------------------------------- main-procedure', log)
print_log('config : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log)
print_log('init_channels : {:}'.format(init_channels), log)
print_log('layers : {:}'.format(layers), log)
print_log('class_num : {:}'.format(class_num), log)
basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
model = torch.nn.DataParallel(basemodel).cuda()
total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
print_log('Network =>\n{:}'.format(basemodel), log)
print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
print_log('config : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log)
print_log('args : {:}'.format(args), log)
print_log('Train-Dataset : {:}'.format(train_data), log)
print_log('Train-Trans : {:}'.format(train_transform), log)
print_log('Test--Dataset : {:}'.format(test_data ), log)
print_log('Test--Trans : {:}'.format(test_transform ), log)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True,
num_workers=args.workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data , batch_size=config.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True)
criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay)
#optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nestero=True)
if config.type == 'cosine':
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs))
else:
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
if os.path.isfile(checkpoint_path):
checkpoint = torch.load( checkpoint_path )
start_epoch = checkpoint['epoch']
basemodel.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
scheduler.load_state_dict(checkpoint['scheduler'])
accuracies = checkpoint['accuracies']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, accuracies = 0, {}
print_log('Train model from scratch without pre-trained model or snapshot', log)
# Main loop
start_time, epoch_time = time.time(), AverageMeter()
for epoch in range(start_epoch, config.epochs):
scheduler.step()
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size), log)
basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
train_acc1, train_acc5, train_los = _train(train_loader, model, criterion, optimizer, 'train', epoch, config, args.print_freq, log)
with torch.no_grad():
valid_acc1, valid_acc5, valid_los = _train(test_loader, model, criterion, optimizer, 'test', epoch, config, args.print_freq, log)
accuracies[epoch] = (valid_acc1, valid_acc5)
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'optimizer' : optimizer.state_dict(),
'scheduler' : scheduler.state_dict(),
'accuracies': accuracies},
checkpoint_path)
best_acc = obtain_best( accuracies )
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
if mode == 'train':
model.train()
elif mode == 'test':
model.eval()
else: raise ValueError("The mode is not right : {:}".format(mode))
end = time.time()
for i, (inputs, targets) in enumerate(xloader):
# measure data loading time
data_time.update(time.time() - end)
# calculate prediction and loss
targets = targets.cuda(non_blocking=True)
if mode == 'train': optimizer.zero_grad()
if config.auxiliary and model.training:
logits, logits_aux = model(inputs)
else:
logits = model(inputs)
loss = criterion(logits, targets)
if config.auxiliary and model.training:
loss_aux = criterion(logits_aux, targets)
loss += config.auxiliary_weight * loss_aux
if mode == 'train':
loss.backward()
if config.grad_clip > 0:
torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
optimizer.step()
# record
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update (prec1.item(), inputs.size(0))
top5.update (prec5.item(), inputs.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % print_freq == 0 or (i+1) == len(xloader):
Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
return top1.avg, top5.avg, losses.avg

View File

@ -0,0 +1,207 @@
import os, sys, time
from copy import deepcopy
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms
from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time
from utils import count_parameters_in_MB
from utils import print_FLOPs
from utils import Cutout
from nas import NetworkImageNet as Network
def obtain_best(accuracies):
if len(accuracies) == 0: return (0, 0)
tops = [value for key, value in accuracies.items()]
s2b = sorted( tops )
return s2b[-1]
class CrossEntropyLabelSmooth(nn.Module):
def __init__(self, num_classes, epsilon):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
log_probs = self.logsoftmax(inputs)
targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-targets * log_probs).mean(0).sum()
return loss
def main_procedure_imagenet(config, data_path, args, genotype, init_channels, layers, log):
# training data and testing data
traindir = os.path.join(data_path, 'train')
validdir = os.path.join(data_path, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_data = dset.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.2),
transforms.ToTensor(),
normalize,
]))
valid_data = dset.ImageFolder(
validdir,
transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
]))
train_queue = torch.utils.data.DataLoader(
train_data, batch_size=config.batch_size, shuffle= True, pin_memory=True, num_workers=args.workers)
valid_queue = torch.utils.data.DataLoader(
valid_data, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)
class_num = 1000
print_log('-------------------------------------- main-procedure', log)
print_log('config : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log)
print_log('init_channels : {:}'.format(init_channels), log)
print_log('layers : {:}'.format(layers), log)
print_log('class_num : {:}'.format(class_num), log)
basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
model = torch.nn.DataParallel(basemodel).cuda()
total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
print_log('Network =>\n{:}'.format(basemodel), log)
#print_FLOPs(basemodel, (1,3,224,224), [print_log, log])
print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
print_log('config : {:}'.format(config), log)
print_log('genotype : {:}'.format(genotype), log)
print_log('Train-Dataset : {:}'.format(train_data), log)
print_log('Valid--Dataset : {:}'.format(valid_data), log)
print_log('Args : {:}'.format(args), log)
criterion = torch.nn.CrossEntropyLoss().cuda()
criterion_smooth = CrossEntropyLabelSmooth(class_num, config.label_smooth).cuda()
optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay)
#optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nestero=True)
if config.type == 'cosine':
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs))
elif config.type == 'steplr':
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.decay_period, gamma=config.gamma)
else:
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth')
if os.path.isfile(checkpoint_path):
checkpoint = torch.load( checkpoint_path )
start_epoch = checkpoint['epoch']
basemodel.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
scheduler.load_state_dict(checkpoint['scheduler'])
accuracies = checkpoint['accuracies']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, accuracies = 0, {}
print_log('Train model from scratch without pre-trained model or snapshot', log)
# Main loop
start_time, epoch_time = time.time(), AverageMeter()
for epoch in range(start_epoch, config.epochs):
scheduler.step()
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size), log)
basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
train_acc1, train_acc5, train_los = _train(train_queue, model, criterion_smooth, optimizer, 'train', epoch, config, args.print_freq, log)
with torch.no_grad():
valid_acc1, valid_acc5, valid_los = _train(valid_queue, model, criterion, None, 'test' , epoch, config, args.print_freq, log)
accuracies[epoch] = (valid_acc1, valid_acc5)
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': basemodel.state_dict(),
'optimizer' : optimizer.state_dict(),
'scheduler' : scheduler.state_dict(),
'accuracies': accuracies},
checkpoint_path)
best_acc = obtain_best( accuracies )
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
if mode == 'train':
model.train()
elif mode == 'test':
model.eval()
else: raise ValueError("The mode is not right : {:}".format(mode))
end = time.time()
for i, (inputs, targets) in enumerate(xloader):
# measure data loading time
data_time.update(time.time() - end)
# calculate prediction and loss
targets = targets.cuda(non_blocking=True)
if mode == 'train': optimizer.zero_grad()
if config.auxiliary and model.training:
logits, logits_aux = model(inputs)
else:
logits = model(inputs)
loss = criterion(logits, targets)
if config.auxiliary and model.training:
loss_aux = criterion(logits_aux, targets)
loss += config.auxiliary_weight * loss_aux
if mode == 'train':
loss.backward()
if config.grad_clip > 0:
torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
optimizer.step()
# record
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update (prec1.item(), inputs.size(0))
top5.update (prec5.item(), inputs.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % print_freq == 0 or (i+1) == len(xloader):
Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
return top1.avg, top5.avg, losses.avg

69
exps-cnn/vis-arch.py Normal file
View File

@ -0,0 +1,69 @@
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from graphviz import Digraph
parser = argparse.ArgumentParser("Visualize the Networks")
parser.add_argument('--checkpoint', type=str, help='The path to the checkpoint.')
parser.add_argument('--save_dir', type=str, help='The directory to save the network plot.')
args = parser.parse_args()
def plot(genotype, filename):
g = Digraph(
format='pdf',
edge_attr=dict(fontsize='20', fontname="times"),
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
engine='dot')
g.body.extend(['rankdir=LR'])
g.node("c_{k-2}", fillcolor='darkseagreen2')
g.node("c_{k-1}", fillcolor='darkseagreen2')
assert len(genotype) % 2 == 0
steps = len(genotype) // 2
for i in range(steps):
g.node(str(i), fillcolor='lightblue')
for i in range(steps):
for k in [2*i, 2*i + 1]:
op, j, weight = genotype[k]
if j == 0:
u = "c_{k-2}"
elif j == 1:
u = "c_{k-1}"
else:
u = str(j-2)
v = str(i)
g.edge(u, v, label=op, fillcolor="gray")
g.node("c_{k}", fillcolor='palegoldenrod')
for i in range(steps):
g.edge(str(i), "c_{k}", fillcolor="gray")
g.render(filename, view=False)
if __name__ == '__main__':
checkpoint = args.checkpoint
assert os.path.isfile(checkpoint), 'Invalid path for checkpoint : {:}'.format(checkpoint)
checkpoint = torch.load( checkpoint, map_location='cpu' )
genotypes = checkpoint['genotypes']
save_dir = Path(args.save_dir)
subs = ['normal', 'reduce']
for sub in subs:
if not (save_dir / sub).exists():
(save_dir / sub).mkdir(parents=True, exist_ok=True)
for key, network in genotypes.items():
save_path = str(save_dir / 'normal' / 'epoch-{:03d}'.format( int(key) ))
print('save into {:}'.format(save_path))
plot(network.normal, save_path)
save_path = str(save_dir / 'reduce' / 'epoch-{:03d}'.format( int(key) ))
print('save into {:}'.format(save_path))
plot(network.reduce, save_path)

276
exps-rnn/acc_rnn_search.py Normal file
View File

@ -0,0 +1,276 @@
import os, gc, sys, math, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import count_parameters_in_MB
from datasets import Corpus
from nas_rnn import batchify, get_batch, repackage_hidden
from nas_rnn import DARTSCellSearch, RNNModelSearch
from train_rnn_utils import main_procedure
from scheduler import load_config
parser = argparse.ArgumentParser("RNN")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--emsize', type=int, default=300, help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=300, help='number of hidden units per layer')
parser.add_argument('--nhidlast', type=int, default=300, help='number of hidden units for the last rnn layer')
parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--batch_size', type=int, default=256, help='the batch size')
parser.add_argument('--eval_batch_size', type=int, default=10, help='the evaluation batch size')
parser.add_argument('--bptt', type=int, default=35, help='the sequence length')
# DropOut
parser.add_argument('--dropout', type=float, default=0.75, help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--dropouth', type=float, default=0.25, help='dropout for hidden nodes in rnn layers (0 = no dropout)')
parser.add_argument('--dropoutx', type=float, default=0.75, help='dropout for input nodes in rnn layers (0 = no dropout)')
parser.add_argument('--dropouti', type=float, default=0.2, help='dropout for input embedding layers (0 = no dropout)')
parser.add_argument('--dropoute', type=float, default=0, help='dropout to remove words from embedding layer (0 = no dropout)')
# Regularization
parser.add_argument('--lr', type=float, default=20, help='initial learning rate')
parser.add_argument('--alpha', type=float, default=0, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
parser.add_argument('--beta', type=float, default=1e-3, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
parser.add_argument('--wdecay', type=float, default=5e-7, help='weight decay applied to all weights')
# architecture leraning rate
parser.add_argument('--arch_lr', type=float, default=3e-3, help='learning rate for arch encoding')
parser.add_argument('--arch_wdecay', type=float, default=1e-3, help='weight decay for arch encoding')
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
# acceleration
parser.add_argument('--tau_max', type=float, help='initial tau')
parser.add_argument('--tau_min', type=float, help='minimum tau')
# log
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
if args.nhidlast < 0:
args.nhidlast = args.emsize
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
# Dataset
corpus = Corpus(args.data_path)
train_data = batchify(corpus.train, args.batch_size, True)
search_data = batchify(corpus.valid, args.batch_size, True)
valid_data = batchify(corpus.valid, args.eval_batch_size, True)
print_log("Train--Data Size : {:}".format(train_data.size()), log)
print_log("Search-Data Size : {:}".format(search_data.size()), log)
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
ntokens = len(corpus.dictionary)
model = RNNModelSearch(ntokens, args.emsize, args.nhid, args.nhidlast,
args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute,
DARTSCellSearch, None)
model = model.cuda()
print_log('model ==>> : {:}'.format(model), log)
print_log('Parameter size : {:} MB'.format(count_parameters_in_MB(model)), log)
base_optimizer = torch.optim.SGD(model.base_parameters(), lr=args.lr, weight_decay=args.wdecay)
arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_lr, weight_decay=args.arch_wdecay)
config = load_config(args.config_path)
print_log('Load config from {:} ==>>\n {:}'.format(args.config_path, config), log)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
if os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
model.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
genotypes = checkpoint['genotypes']
valid_losses = checkpoint['valid_losses']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes, valid_losses = 0, {}, {-1:1e8}
print_log('Train model-search from scratch.', log)
model.set_gumbel(True, False)
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
model.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} || tau={:}'.format(time_string(), epoch, args.epochs, need_time, model.get_tau()), log)
# training
data_time, train_time = train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log)
total_train_time += train_time
# evaluation
# validation
valid_loss = infer(model, corpus, valid_data, args.eval_batch_size)
# save genotype
if valid_loss < min( valid_losses.values() ): is_best = True
else : is_best = False
print_log('-'*10 + ' [Epoch={:03d}/{:03d}] : is-best={:}, validation-loss={:}, validation-PPL={:}'.format(epoch, args.epochs, is_best, valid_loss, math.exp(valid_loss)), log)
print_log('{:}'.format(F.softmax(model.arch_weights, dim=-1)), log)
print_log('genotype : {:}'.format(model.genotype()), log)
valid_losses[epoch] = valid_loss
genotypes[epoch] = model.genotype()
print_log(' the {:}-th genotype = {:}'.format(epoch, genotypes[epoch]), log)
# save checkpoint
if is_best:
genotypes['best'] = model.genotype()
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': model.state_dict(),
'genotypes' : genotypes,
'valid_losses' : valid_losses,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
torch.cuda.empty_cache()
main_procedure(config, genotypes['best'], args.save_path, args.print_freq, log)
log.close()
def train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
# Turn on training mode which enables dropout.
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden_train, hidden_valid = model.init_hidden(args.batch_size), model.init_hidden(args.batch_size)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
seq_len = int( args.bptt if np.random.random() < 0.95 else args.bptt / 2. )
# Prevent excessively small or negative sequence lengths
# seq_len = max(5, int(np.random.normal(bptt, 5)))
# # There's a very small chance that it could select a very long sequence length resulting in OOM
# seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
for param_group in base_optimizer.param_groups:
param_group['lr'] *= float( seq_len / args.bptt )
model.train()
data_valid, targets_valid = get_batch(search_data, i % (search_data.size(0) - 1), args.bptt)
data_train, targets_train = get_batch(train_data , i, seq_len)
hidden_train = repackage_hidden(hidden_train)
hidden_valid = repackage_hidden(hidden_valid)
data_time.update(time.time() - start_time)
# validation loss
targets_valid = targets_valid.contiguous().view(-1)
arch_optimizer.step()
log_prob, hidden_valid = model(data_valid, hidden_valid, return_h=False)
arch_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_valid)
arch_loss.backward()
arch_optimizer.step()
# model update
base_optimizer.zero_grad()
targets_train = targets_train.contiguous().view(-1)
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data_train, hidden_train, return_h=True)
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_train)
loss = raw_loss
# Activiation Regularization
if args.alpha > 0:
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
nn.utils.clip_grad_norm_(model.base_parameters(), args.clip)
base_optimizer.step()
for param_group in base_optimizer.param_groups:
param_group['lr'] /= float( seq_len / args.bptt )
total_loss += raw_loss.item()
gc.collect()
batch_time.update(time.time() - start_time)
start_time = time.time()
batch, i = batch + 1, i + seq_len
if batch % args.print_freq == 0 or i >= train_data.size(0) - 1 - 1:
print_log(' || Epoch: {:03d} :: {:03d}/{:03d} '.format(epoch, batch, len(train_data) // args.bptt), log)
#print_log(' || Epoch: {:03d} :: {:03d}/{:03d} = {:}'.format(epoch, batch, len(train_data) // args.bptt, model.genotype()), log)
cur_loss = total_loss / args.print_freq
print_log(' [TRAIN] Time : data {:.3f} ({:.3f}) batch {:.3f} ({:.3f}) Loss : {:}, PPL : {:}'.format(data_time.val, data_time.avg, batch_time.val, batch_time.avg, cur_loss, math.exp(cur_loss)), log)
#print(F.softmax(model.arch_weights, dim=-1))
total_loss = 0
return data_time.sum, batch_time.sum
def infer(model, corpus, data_source, batch_size):
model.eval()
with torch.no_grad():
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args.bptt)
targets = targets.view(-1)
log_prob, hidden = model(data, hidden)
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
total_loss += loss.item() * len(data)
hidden = repackage_hidden(hidden)
return total_loss / len(data_source)
if __name__ == '__main__':
main()

75
exps-rnn/debug_test.py Normal file
View File

@ -0,0 +1,75 @@
import os, gc, sys, time, math
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time
from utils import count_parameters_in_MB
from datasets import Corpus
from nas_rnn import batchify, get_batch, repackage_hidden
from nas_rnn import DARTS
from nas_rnn import DARTSCell, RNNModel
from nas_rnn import basemodel as model
from scheduler import load_config
def main_procedure(config, genotype, print_freq, log):
print_log('-'*90, log)
print_log('genotype : {:}'.format(genotype), log)
print_log('config : {:}'.format(config.bptt), log)
corpus = Corpus(config.data_path)
train_data = batchify(corpus.train, config.train_batch, True)
valid_data = batchify(corpus.valid, config.eval_batch , True)
test_data = batchify(corpus.test, config.test_batch , True)
ntokens = len(corpus.dictionary)
print_log("Train--Data Size : {:}".format(train_data.size()), log)
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
print_log("Test---Data Size : {:}".format( test_data.size()), log)
print_log("ntokens = {:}".format(ntokens), log)
model = RNNModel(ntokens, config.emsize, config.nhid, config.nhidlast,
config.dropout, config.dropouth, config.dropoutx, config.dropouti, config.dropoute,
cell_cls=DARTSCell, genotype=genotype)
model = model.cuda()
print_log('Network =>\n{:}'.format(model), log)
print_log('Genotype : {:}'.format(genotype), log)
print_log('Parameters : {:.3f} MB'.format(count_parameters_in_MB(model)), log)
print_log('--------------------- Finish Training ----------------', log)
test_loss = evaluate(model, corpus, test_data , config.test_batch, config.bptt)
print_log('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(test_loss, math.exp(test_loss)), log)
vali_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
print_log('| End of training | valid loss {:5.2f} | valid ppl {:8.2f}'.format(vali_loss, math.exp(vali_loss)), log)
def evaluate(model, corpus, data_source, batch_size, bptt):
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss, total_length = 0.0, 0.0
with torch.no_grad():
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, bptt):
data, targets = get_batch(data_source, i, bptt)
targets = targets.view(-1)
log_prob, hidden = model(data, hidden)
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
total_loss += loss.item() * len(data)
total_length += len(data)
hidden = repackage_hidden(hidden)
return total_loss / total_length
if __name__ == '__main__':
path = './configs/NAS-PTB-BASE.config'
config = load_config(path)
main_procedure(config, DARTS, 10, None)

View File

@ -0,0 +1,70 @@
import os, gc, sys, math, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, time_file_str, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import count_parameters_in_MB
from nas_rnn import DARTS_V1, DARTS_V2, GDAS
from train_rnn_utils import main_procedure
from scheduler import load_config
Networks = {'DARTS_V1': DARTS_V1,
'DARTS_V2': DARTS_V2,
'GDAS' : GDAS}
parser = argparse.ArgumentParser("RNN")
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='the network architecture')
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
# log
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('--threads', type=int, default=10, help='the number of threads')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
torch.set_num_threads(args.threads)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}-{:}.txt'.format(args.manualSeed, time_file_str())), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
config = load_config( args.config_path )
genotype = Networks[ args.arch ]
main_procedure(config, genotype, args.save_path, args.print_freq, log)
log.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,267 @@
import os, gc, sys, math, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
from pathlib import Path
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
from utils import AverageMeter, time_string, convert_secs2time
from utils import print_log, obtain_accuracy
from utils import count_parameters_in_MB
from datasets import Corpus
from nas_rnn import batchify, get_batch, repackage_hidden
from nas_rnn import DARTSCellSearch, RNNModelSearch
from train_rnn_utils import main_procedure
from scheduler import load_config
parser = argparse.ArgumentParser("RNN")
parser.add_argument('--data_path', type=str, help='Path to dataset')
parser.add_argument('--emsize', type=int, default=300, help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=300, help='number of hidden units per layer')
parser.add_argument('--nhidlast', type=int, default=300, help='number of hidden units for the last rnn layer')
parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--batch_size', type=int, default=256, help='the batch size')
parser.add_argument('--eval_batch_size', type=int, default=10, help='the evaluation batch size')
parser.add_argument('--bptt', type=int, default=35, help='the sequence length')
# DropOut
parser.add_argument('--dropout', type=float, default=0.75, help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--dropouth', type=float, default=0.25, help='dropout for hidden nodes in rnn layers (0 = no dropout)')
parser.add_argument('--dropoutx', type=float, default=0.75, help='dropout for input nodes in rnn layers (0 = no dropout)')
parser.add_argument('--dropouti', type=float, default=0.2, help='dropout for input embedding layers (0 = no dropout)')
parser.add_argument('--dropoute', type=float, default=0, help='dropout to remove words from embedding layer (0 = no dropout)')
# Regularization
parser.add_argument('--lr', type=float, default=20, help='initial learning rate')
parser.add_argument('--alpha', type=float, default=0, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
parser.add_argument('--beta', type=float, default=1e-3, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
parser.add_argument('--wdecay', type=float, default=5e-7, help='weight decay applied to all weights')
# architecture leraning rate
parser.add_argument('--arch_lr', type=float, default=3e-3, help='learning rate for arch encoding')
parser.add_argument('--arch_wdecay', type=float, default=1e-3, help='weight decay for arch encoding')
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
# log
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
parser.add_argument('--manualSeed', type=int, help='manual seed')
args = parser.parse_args()
assert torch.cuda.is_available(), 'torch.cuda is not available'
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
if args.nhidlast < 0:
args.nhidlast = args.emsize
random.seed(args.manualSeed)
cudnn.benchmark = True
cudnn.enabled = True
torch.manual_seed(args.manualSeed)
torch.cuda.manual_seed_all(args.manualSeed)
def main():
# Init logger
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
if not os.path.isdir(args.save_path):
os.makedirs(args.save_path)
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
print_log('save path : {}'.format(args.save_path), log)
state = {k: v for k, v in args._get_kwargs()}
print_log(state, log)
print_log("Random Seed: {}".format(args.manualSeed), log)
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
print_log("Torch version : {}".format(torch.__version__), log)
print_log("CUDA version : {}".format(torch.version.cuda), log)
print_log("cuDNN version : {}".format(cudnn.version()), log)
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
# Dataset
corpus = Corpus(args.data_path)
train_data = batchify(corpus.train, args.batch_size, True)
search_data = batchify(corpus.valid, args.batch_size, True)
valid_data = batchify(corpus.valid, args.eval_batch_size, True)
print_log("Train--Data Size : {:}".format(train_data.size()), log)
print_log("Search-Data Size : {:}".format(search_data.size()), log)
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
ntokens = len(corpus.dictionary)
model = RNNModelSearch(ntokens, args.emsize, args.nhid, args.nhidlast,
args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute,
DARTSCellSearch, None)
model = model.cuda()
print_log('model ==>> : {:}'.format(model), log)
print_log('Parameter size : {:} MB'.format(count_parameters_in_MB(model)), log)
base_optimizer = torch.optim.SGD(model.base_parameters(), lr=args.lr, weight_decay=args.wdecay)
arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_lr, weight_decay=args.arch_wdecay)
config = load_config(args.config_path)
print_log('Load config from {:} ==>>\n {:}'.format(args.config_path, config), log)
# snapshot
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
if os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
start_epoch = checkpoint['epoch']
model.load_state_dict( checkpoint['state_dict'] )
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
genotypes = checkpoint['genotypes']
valid_losses = checkpoint['valid_losses']
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
else:
start_epoch, genotypes, valid_losses = 0, {}, {-1:1e8}
print_log('Train model-search from scratch.', log)
# Main loop
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
for epoch in range(start_epoch, args.epochs):
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s}'.format(time_string(), epoch, args.epochs, need_time), log)
# training
data_time, train_time = train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log)
total_train_time += train_time
# evaluation
# validation
valid_loss = infer(model, corpus, valid_data, args.eval_batch_size)
# save genotype
if valid_loss < min( valid_losses.values() ): is_best = True
else : is_best = False
print_log('-'*10 + ' [Epoch={:03d}/{:03d}] : is-best={:}, validation-loss={:}, validation-PPL={:}'.format(epoch, args.epochs, is_best, valid_loss, math.exp(valid_loss)), log)
valid_losses[epoch] = valid_loss
genotypes[epoch] = model.genotype()
print_log(' the {:}-th genotype = {:}'.format(epoch, genotypes[epoch]), log)
# save checkpoint
if is_best:
genotypes['best'] = model.genotype()
torch.save({'epoch' : epoch + 1,
'args' : deepcopy(args),
'state_dict': model.state_dict(),
'genotypes' : genotypes,
'valid_losses' : valid_losses,
'base_optimizer' : base_optimizer.state_dict(),
'arch_optimizer' : arch_optimizer.state_dict()},
checkpoint_path)
print_log('----> Save into {:}'.format(checkpoint_path), log)
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
# clear GPU cache
torch.cuda.empty_cache()
main_procedure(config, genotypes['best'], args.save_path, args.print_freq, log)
log.close()
def train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log):
data_time, batch_time = AverageMeter(), AverageMeter()
# Turn on training mode which enables dropout.
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden_train, hidden_valid = model.init_hidden(args.batch_size), model.init_hidden(args.batch_size)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
seq_len = int( args.bptt if np.random.random() < 0.95 else args.bptt / 2. )
# Prevent excessively small or negative sequence lengths
# seq_len = max(5, int(np.random.normal(bptt, 5)))
# # There's a very small chance that it could select a very long sequence length resulting in OOM
# seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
for param_group in base_optimizer.param_groups:
param_group['lr'] *= float( seq_len / args.bptt )
model.train()
data_valid, targets_valid = get_batch(search_data, i % (search_data.size(0) - 1), args.bptt)
data_train, targets_train = get_batch(train_data , i, seq_len)
hidden_train = repackage_hidden(hidden_train)
hidden_valid = repackage_hidden(hidden_valid)
data_time.update(time.time() - start_time)
# validation loss
targets_valid = targets_valid.contiguous().view(-1)
arch_optimizer.step()
log_prob, hidden_valid = model(data_valid, hidden_valid, return_h=False)
arch_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_valid)
arch_loss.backward()
arch_optimizer.step()
# model update
base_optimizer.zero_grad()
targets_train = targets_train.contiguous().view(-1)
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data_train, hidden_train, return_h=True)
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_train)
loss = raw_loss
# Activiation Regularization
if args.alpha > 0:
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
nn.utils.clip_grad_norm_(model.base_parameters(), args.clip)
base_optimizer.step()
for param_group in base_optimizer.param_groups:
param_group['lr'] /= float( seq_len / args.bptt )
total_loss += raw_loss.item()
gc.collect()
batch_time.update(time.time() - start_time)
start_time = time.time()
batch, i = batch + 1, i + seq_len
if batch % args.print_freq == 0 or i >= train_data.size(0) - 1 - 1:
print_log(' || Epoch: {:03d} :: {:03d}/{:03d}'.format(epoch, batch, len(train_data) // args.bptt), log)
#print_log(' || Epoch: {:03d} :: {:03d}/{:03d} = {:}'.format(epoch, batch, len(train_data) // args.bptt, model.genotype()), log)
cur_loss = total_loss / args.print_freq
print_log(' ---> Time : data {:.3f} ({:.3f}) batch {:.3f} ({:.3f}) Loss : {:}, PPL : {:}'.format(data_time.val, data_time.avg, batch_time.val, batch_time.avg, cur_loss, math.exp(cur_loss)), log)
print(F.softmax(model.arch_weights, dim=-1))
total_loss = 0
return data_time.sum, batch_time.sum
def infer(model, corpus, data_source, batch_size):
model.eval()
with torch.no_grad():
total_loss, total_length = 0, 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args.bptt)
targets = targets.view(-1)
log_prob, hidden = model(data, hidden)
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
total_loss += loss.item() * len(data)
total_length += len(data)
hidden = repackage_hidden(hidden)
return total_loss / total_length
if __name__ == '__main__':
main()

220
exps-rnn/train_rnn_utils.py Normal file
View File

@ -0,0 +1,220 @@
import os, gc, sys, time, math
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn
from utils import print_log, obtain_accuracy, AverageMeter
from utils import time_string, convert_secs2time
from utils import count_parameters_in_MB
from datasets import Corpus
from nas_rnn import batchify, get_batch, repackage_hidden
from nas_rnn import DARTSCell, RNNModel
def obtain_best(accuracies):
if len(accuracies) == 0: return (0, 0)
tops = [value for key, value in accuracies.items()]
s2b = sorted( tops )
return s2b[-1]
def main_procedure(config, genotype, save_dir, print_freq, log):
print_log('-'*90, log)
print_log('save-dir : {:}'.format(save_dir), log)
print_log('genotype : {:}'.format(genotype), log)
print_log('config : {:}'.format(config), log)
corpus = Corpus(config.data_path)
train_data = batchify(corpus.train, config.train_batch, True)
valid_data = batchify(corpus.valid, config.eval_batch , True)
test_data = batchify(corpus.test, config.test_batch , True)
ntokens = len(corpus.dictionary)
print_log("Train--Data Size : {:}".format(train_data.size()), log)
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
print_log("Test---Data Size : {:}".format( test_data.size()), log)
print_log("ntokens = {:}".format(ntokens), log)
model = RNNModel(ntokens, config.emsize, config.nhid, config.nhidlast,
config.dropout, config.dropouth, config.dropoutx, config.dropouti, config.dropoute,
cell_cls=DARTSCell, genotype=genotype)
model = model.cuda()
print_log('Network =>\n{:}'.format(model), log)
print_log('Genotype : {:}'.format(genotype), log)
print_log('Parameters : {:.3f} MB'.format(count_parameters_in_MB(model)), log)
checkpoint_path = os.path.join(save_dir, 'checkpoint-{:}.pth'.format(config.data_name))
Soptimizer = torch.optim.SGD (model.parameters(), lr=config.LR, weight_decay=config.wdecay)
Aoptimizer = torch.optim.ASGD(model.parameters(), lr=config.LR, t0=0, lambd=0., weight_decay=config.wdecay)
if os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
model.load_state_dict( checkpoint['state_dict'] )
Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
epoch = checkpoint['epoch']
use_asgd = checkpoint['use_asgd']
print_log('load checkpoint from {:} and start train from {:}'.format(checkpoint_path, epoch), log)
else:
epoch, use_asgd = 0, False
start_time, epoch_time = time.time(), AverageMeter()
valid_loss_from_sgd, losses = [], {-1 : 1e9}
while epoch < config.epochs:
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
print_log("\n==>>{:s} [Epoch={:04d}/{:04d}] {:}".format(time_string(), epoch, config.epochs, need_time), log)
if use_asgd : optimizer = Aoptimizer
else : optimizer = Soptimizer
try:
Dtime, Btime = train(model, optimizer, corpus, train_data, config, epoch, print_freq, log)
except:
torch.cuda.empty_cache()
checkpoint = torch.load(checkpoint_path)
model.load_state_dict( checkpoint['state_dict'] )
Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
epoch = checkpoint['epoch']
use_asgd = checkpoint['use_asgd']
valid_loss_from_sgd = checkpoint['valid_loss_from_sgd']
continue
if use_asgd:
tmp = {}
for prm in model.parameters():
tmp[prm] = prm.data.clone()
prm.data = Aoptimizer.state[prm]['ax'].clone()
val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
for prm in model.parameters():
prm.data = tmp[prm].clone()
else:
val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
if len(valid_loss_from_sgd) > config.nonmono and val_loss > min(valid_loss_from_sgd):
use_asgd = True
valid_loss_from_sgd.append( val_loss )
print_log('{:} end of epoch {:3d} with {:} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(time_string(), epoch, 'ASGD' if use_asgd else 'SGD', val_loss, math.exp(val_loss)), log)
if val_loss < min(losses.values()):
if use_asgd:
tmp = {}
for prm in model.parameters():
tmp[prm] = prm.data.clone()
prm.data = Aoptimizer.state[prm]['ax'].clone()
torch.save({'epoch' : epoch,
'use_asgd' : use_asgd,
'valid_loss_from_sgd': valid_loss_from_sgd,
'state_dict': model.state_dict(),
'SGD_optimizer' : Soptimizer.state_dict(),
'ASGD_optimizer': Aoptimizer.state_dict()},
checkpoint_path)
if use_asgd:
for prm in model.parameters():
prm.data = tmp[prm].clone()
print_log('save into {:}'.format(checkpoint_path), log)
if use_asgd:
tmp = {}
for prm in model.parameters():
tmp[prm] = prm.data.clone()
prm.data = Aoptimizer.state[prm]['ax'].clone()
test_loss = evaluate(model, corpus, test_data, config.test_batch, config.bptt)
if use_asgd:
for prm in model.parameters():
prm.data = tmp[prm].clone()
print_log('| epoch={:03d} | test loss {:5.2f} | test ppl {:8.2f}'.format(epoch, test_loss, math.exp(test_loss)), log)
losses[epoch] = val_loss
epoch = epoch + 1
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
print_log('--------------------- Finish Training ----------------', log)
checkpoint = torch.load(checkpoint_path)
model.load_state_dict( checkpoint['state_dict'] )
test_loss = evaluate(model, corpus, test_data , config.test_batch, config.bptt)
print_log('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(test_loss, math.exp(test_loss)), log)
vali_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
print_log('| End of training | valid loss {:5.2f} | valid ppl {:8.2f}'.format(vali_loss, math.exp(vali_loss)), log)
def evaluate(model, corpus, data_source, batch_size, bptt):
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss, total_length = 0.0, 0.0
with torch.no_grad():
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, bptt):
data, targets = get_batch(data_source, i, bptt)
targets = targets.view(-1)
log_prob, hidden = model(data, hidden)
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
total_loss += loss.item() * len(data)
total_length += len(data)
hidden = repackage_hidden(hidden)
return total_loss / total_length
def train(model, optimizer, corpus, train_data, config, epoch, print_freq, log):
# Turn on training mode which enables dropout.
total_loss, data_time, batch_time = 0, AverageMeter(), AverageMeter()
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden_train = model.init_hidden(config.train_batch)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
bptt = config.bptt if np.random.random() < 0.95 else config.bptt / 2.
# Prevent excessively small or negative sequence lengths
seq_len = max(5, int(np.random.normal(bptt, 5)))
# There's a very small chance that it could select a very long sequence length resulting in OOM
seq_len = min(seq_len, config.bptt + config.max_seq_len_delta)
lr2 = optimizer.param_groups[0]['lr']
optimizer.param_groups[0]['lr'] = lr2 * seq_len / config.bptt
model.train()
data, targets = get_batch(train_data, i, seq_len)
targets = targets.contiguous().view(-1)
# count data preparation time
data_time.update(time.time() - start_time)
optimizer.zero_grad()
hidden_train = repackage_hidden(hidden_train)
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data, hidden_train, return_h=True)
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
loss = raw_loss
# Activiation Regularization
if config.alpha > 0:
loss = loss + sum(config.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
loss = loss + sum(config.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)
optimizer.step()
gc.collect()
optimizer.param_groups[0]['lr'] = lr2
total_loss += raw_loss.item()
assert torch.isnan(loss) == False, '--- Epoch={:04d} :: {:03d}/{:03d} Get Loss = Nan'.format(epoch, batch, len(train_data)//config.bptt)
batch_time.update(time.time() - start_time)
start_time = time.time()
batch, i = batch + 1, i + seq_len
if batch % print_freq == 0:
cur_loss = total_loss / print_freq
print_log(' >> Epoch: {:04d} :: {:03d}/{:03d} || loss = {:5.2f}, ppl = {:8.2f}'.format(epoch, batch, len(train_data) // config.bptt, cur_loss, math.exp(cur_loss)), log)
total_loss = 0
return data_time.sum, batch_time.sum

View File

@ -0,0 +1,122 @@
import os
import torch
from collections import Counter
class Dictionary(object):
def __init__(self):
self.word2idx = {}
self.idx2word = []
self.counter = Counter()
self.total = 0
def add_word(self, word):
if word not in self.word2idx:
self.idx2word.append(word)
self.word2idx[word] = len(self.idx2word) - 1
token_id = self.word2idx[word]
self.counter[token_id] += 1
self.total += 1
return self.word2idx[word]
def __len__(self):
return len(self.idx2word)
class Corpus(object):
def __init__(self, path):
self.dictionary = Dictionary()
self.train = self.tokenize(os.path.join(path, 'train.txt'))
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
self.test = self.tokenize(os.path.join(path, 'test.txt'))
def tokenize(self, path):
"""Tokenizes a text file."""
assert os.path.exists(path)
# Add words to the dictionary
with open(path, 'r', encoding='utf-8') as f:
tokens = 0
for line in f:
words = line.split() + ['<eos>']
tokens += len(words)
for word in words:
self.dictionary.add_word(word)
# Tokenize file content
with open(path, 'r', encoding='utf-8') as f:
ids = torch.LongTensor(tokens)
token = 0
for line in f:
words = line.split() + ['<eos>']
for word in words:
ids[token] = self.dictionary.word2idx[word]
token += 1
return ids
class SentCorpus(object):
def __init__(self, path):
self.dictionary = Dictionary()
self.train = self.tokenize(os.path.join(path, 'train.txt'))
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
self.test = self.tokenize(os.path.join(path, 'test.txt'))
def tokenize(self, path):
"""Tokenizes a text file."""
assert os.path.exists(path)
# Add words to the dictionary
with open(path, 'r', encoding='utf-8') as f:
tokens = 0
for line in f:
words = line.split() + ['<eos>']
tokens += len(words)
for word in words:
self.dictionary.add_word(word)
# Tokenize file content
sents = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
if not line:
continue
words = line.split() + ['<eos>']
sent = torch.LongTensor(len(words))
for i, word in enumerate(words):
sent[i] = self.dictionary.word2idx[word]
sents.append(sent)
return sents
class BatchSentLoader(object):
def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
self.sents = sents
self.batch_size = batch_size
self.sort_sents = sorted(sents, key=lambda x: x.size(0))
self.cuda = cuda
self.volatile = volatile
self.pad_id = pad_id
def __next__(self):
if self.idx >= len(self.sort_sents):
raise StopIteration
batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
batch = self.sort_sents[self.idx:self.idx+batch_size]
max_len = max([s.size(0) for s in batch])
tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
for i in range(len(batch)):
s = batch[i]
tensor[:s.size(0),i].copy_(s)
if self.cuda:
tensor = tensor.cuda()
self.idx += batch_size
return tensor
next = __next__
def __iter__(self):
self.idx = 0
return self

View File

@ -0,0 +1,65 @@
# coding=utf-8
import numpy as np
import torch
class MetaBatchSampler(object):
def __init__(self, labels, classes_per_it, num_samples, iterations):
'''
Initialize MetaBatchSampler
Args:
- labels: an iterable containing all the labels for the current dataset
samples indexes will be infered from this iterable.
- classes_per_it: number of random classes for each iteration
- num_samples: number of samples for each iteration for each class (support + query)
- iterations: number of iterations (episodes) per epoch
'''
super(MetaBatchSampler, self).__init__()
self.labels = labels.copy()
self.classes_per_it = classes_per_it
self.sample_per_class = num_samples
self.iterations = iterations
self.classes, self.counts = np.unique(self.labels, return_counts=True)
assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0
assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes))
self.classes = torch.LongTensor(self.classes)
# create a matrix, indexes, of dim: classes X max(elements per class)
# fill it with nans
# for every class c, fill the relative row with the indices samples belonging to c
# in numel_per_class we store the number of samples for each class/row
self.indexes = { x.item() : [] for x in self.classes }
indexes = { x.item() : [] for x in self.classes }
for idx, label in enumerate(self.labels):
indexes[ label.item() ].append( idx )
for key, value in indexes.items():
self.indexes[ key ] = torch.LongTensor( value )
def __iter__(self):
# yield a batch of indexes
spc = self.sample_per_class
cpi = self.classes_per_it
for it in range(self.iterations):
batch_size = spc * cpi
batch = torch.LongTensor(batch_size)
assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes))
c_idxs = torch.randperm(len(self.classes))[:cpi]
for i, cls in enumerate(self.classes[c_idxs]):
s = slice(i * spc, (i + 1) * spc)
num = self.indexes[ cls.item() ].nelement()
assert spc < num, '{:} vs. {:}'.format(spc, num)
sample_idxs = torch.randperm( num )[:spc]
batch[s] = self.indexes[ cls.item() ][sample_idxs]
batch = batch[torch.randperm(len(batch))]
yield batch
def __len__(self):
# returns the number of iterations (episodes) per epoch
return self.iterations

View File

@ -0,0 +1,84 @@
from __future__ import print_function
import numpy as np
from PIL import Image
import pickle as pkl
import os, cv2, csv, glob
import torch
import torch.utils.data as data
class TieredImageNet(data.Dataset):
def __init__(self, root_dir, split, transform=None):
self.split = split
self.root_dir = root_dir
self.transform = transform
splits = split.split('-')
images, labels, last = [], [], 0
for split in splits:
labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split)
images_name = '{:}/{:}_images.npz'.format(self.root_dir, split)
# decompress images if npz not exits
if not os.path.exists(images_name):
png_pkl = images_name[:-4] + '_png.pkl'
if os.path.exists(png_pkl):
decompress(images_name, png_pkl)
else:
raise ValueError('png_pkl {:} not exits'.format( png_pkl ))
assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name)
print ("Prepare {:} done".format(images_name))
try:
with open(labels_name) as f:
data = pkl.load(f)
label_specific = data["label_specific"]
except:
with open(labels_name, 'rb') as f:
data = pkl.load(f, encoding='bytes')
label_specific = data[b'label_specific']
with np.load(images_name, mmap_mode="r", encoding='latin1') as data:
image_data = data["images"]
images.append( image_data )
label_specific = label_specific + last
labels.append( label_specific )
last = np.max(label_specific) + 1
print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific)))
images, labels = np.concatenate(images), np.concatenate(labels)
self.images = images
self.labels = labels
self.n_classes = int( np.max(labels) + 1 )
self.dict_index_label = {}
for cls in range(self.n_classes):
idxs = np.where(labels==cls)[0]
self.dict_index_label[cls] = idxs
self.length = len(labels)
print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels)))
def __repr__(self):
return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__))
def __len__(self):
return self.length
def __getitem__(self, index):
assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
image = self.images[index].copy()
label = int(self.labels[index])
image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB')
if self.transform is not None:
image = self.transform( image )
return image, label
def decompress(path, output):
with open(output, 'rb') as f:
array = pkl.load(f, encoding='bytes')
images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8)
for ii, item in enumerate(array):
im = cv2.imdecode(item, 1)
images[ii] = im
np.savez(path, images=images)

3
lib/datasets/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .MetaBatchSampler import MetaBatchSampler
from .TieredImageNet import TieredImageNet
from .LanguageDataset import Corpus

10
lib/datasets/test_NLP.py Normal file
View File

@ -0,0 +1,10 @@
import os, sys, torch
from LanguageDataset import SentCorpus, BatchSentLoader
if __name__ == '__main__':
path = '../../data/data/penn'
corpus = SentCorpus( path )
loader = BatchSentLoader(corpus.test, 10)
for i, d in enumerate(loader):
print('{:} :: {:}'.format(i, d.size()))

View File

@ -0,0 +1,33 @@
import os, sys, torch
import torchvision.transforms as transforms
from TieredImageNet import TieredImageNet
from MetaBatchSampler import MetaBatchSampler
root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet'
print ('root : {:}'.format(root_dir))
means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)]
transform = transforms.Compose(lists)
dataset = TieredImageNet(root_dir, 'val-test', transform)
image, label = dataset[111]
print ('image shape = {:}, label = {:}'.format(image.size(), label))
print ('image : min = {:}, max = {:} ||| label : {:}'.format(image.min(), image.max(), label))
sampler = MetaBatchSampler(dataset.labels, 250, 100, 10)
dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)
print ('the length of dataset : {:}'.format( len(dataset) ))
print ('the length of loader : {:}'.format( len(dataloader) ))
for images, labels in dataloader:
print ('images : {:}'.format( images.size() ))
print ('labels : {:}'.format( labels.size() ))
for i in range(3):
print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std()))
print('-----')

4
lib/move.sh Normal file
View File

@ -0,0 +1,4 @@
rm -rf pytorch
git clone https://github.com/pytorch/pytorch.git
cp -r ./pytorch/torch/nn xnn
rm -rf pytorch

89
lib/nas/CifarNet.py Normal file
View File

@ -0,0 +1,89 @@
import torch
import torch.nn as nn
from .construct_utils import Cell, Transition
class AuxiliaryHeadCIFAR(nn.Module):
def __init__(self, C, num_classes):
"""assuming input size 8x8"""
super(AuxiliaryHeadCIFAR, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x
class NetworkCIFAR(nn.Module):
def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkCIFAR, self).__init__()
self._layers = layers
stem_multiplier = 3
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
self.cells = nn.ModuleList()
reduction_prev = False
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction and genotype.reduce is None:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
else:
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells.append( cell )
C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
if i == 2*layers//3:
C_to_auxiliary = C_prev
if auxiliary:
self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
else:
self.auxiliary_head = None
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.drop_path_prob = -1
def update_drop_path(self, drop_path_prob):
self.drop_path_prob = drop_path_prob
def auxiliary_param(self):
if self.auxiliary_head is None: return []
else: return list( self.auxiliary_head.parameters() )
def forward(self, inputs):
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
if i == 2*self._layers//3:
if self.auxiliary_head and self.training:
logits_aux = self.auxiliary_head(s1)
out = self.global_pooling(s1)
out = out.view(out.size(0), -1)
logits = self.classifier(out)
if self.auxiliary_head and self.training:
return logits, logits_aux
else:
return logits

101
lib/nas/ImageNet.py Normal file
View File

@ -0,0 +1,101 @@
import torch
import torch.nn as nn
from .construct_utils import Cell, Transition
class AuxiliaryHeadImageNet(nn.Module):
def __init__(self, C, num_classes):
"""assuming input size 14x14"""
super(AuxiliaryHeadImageNet, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
# NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
# Commenting it out for consistency with the experiments in the paper.
# nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x
class NetworkImageNet(nn.Module):
def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkImageNet, self).__init__()
self._layers = layers
self.stem0 = nn.Sequential(
nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C // 2),
nn.ReLU(inplace=True),
nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
self.stem1 = nn.Sequential(
nn.ReLU(inplace=True),
nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
C_prev_prev, C_prev, C_curr = C, C, C
self.cells = nn.ModuleList()
reduction_prev = True
for i in range(layers):
if i in [layers // 3, 2 * layers // 3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction and genotype.reduce is None:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
else:
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
if i == 2 * layers // 3:
C_to_auxiliary = C_prev
if auxiliary:
self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
else:
self.auxiliary_head = None
self.global_pooling = nn.AvgPool2d(7)
self.classifier = nn.Linear(C_prev, num_classes)
self.drop_path_prob = -1
def update_drop_path(self, drop_path_prob):
self.drop_path_prob = drop_path_prob
def auxiliary_param(self):
if self.auxiliary_head is None: return []
else: return list( self.auxiliary_head.parameters() )
def forward(self, input):
s0 = self.stem0(input)
s1 = self.stem1(s0)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
#print ('{:} : {:} - {:}'.format(i, s0.size(), s1.size()))
if i == 2 * self._layers // 3:
if self.auxiliary_head and self.training:
logits_aux = self.auxiliary_head(s1)
out = self.global_pooling(s1)
logits = self.classifier(out.view(out.size(0), -1))
if self.auxiliary_head and self.training:
return logits, logits_aux
else:
return logits

27
lib/nas/SE_Module.py Normal file
View File

@ -0,0 +1,27 @@
import torch
import torch.nn as nn
# Squeeze and Excitation module
class SqEx(nn.Module):
def __init__(self, n_features, reduction=16):
super(SqEx, self).__init__()
if n_features % reduction != 0:
raise ValueError('n_features must be divisible by reduction (default = 16)')
self.linear1 = nn.Linear(n_features, n_features // reduction, bias=True)
self.nonlin1 = nn.ReLU(inplace=True)
self.linear2 = nn.Linear(n_features // reduction, n_features, bias=True)
self.nonlin2 = nn.Sigmoid()
def forward(self, x):
y = F.avg_pool2d(x, kernel_size=x.size()[2:4])
y = y.permute(0, 2, 3, 1)
y = self.nonlin1(self.linear1(y))
y = self.nonlin2(self.linear2(y))
y = y.permute(0, 3, 1, 2)
y = x * y
return y

18
lib/nas/__init__.py Normal file
View File

@ -0,0 +1,18 @@
from .model_search import Network
from .model_search_v1 import NetworkV1
from .model_search_f1 import NetworkF1
# acceleration model
from .model_search_f1_acc2 import NetworkFACC1
from .model_search_acc2 import NetworkACC2
from .model_search_v3 import NetworkV3
from .model_search_v4 import NetworkV4
from .model_search_v5 import NetworkV5
from .CifarNet import NetworkCIFAR
from .ImageNet import NetworkImageNet
# genotypes
from .genotypes import DARTS_V1, DARTS_V2
from .genotypes import NASNet, PNASNet, AmoebaNet, ENASNet
from .genotypes import DMS_V1, DMS_F1, GDAS_CC
from .construct_utils import return_alphas_str

151
lib/nas/construct_utils.py Normal file
View File

@ -0,0 +1,151 @@
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from .operations import OPS, FactorizedReduce, ReLUConvBN, Identity
def random_select(length, ratio):
clist = []
index = random.randint(0, length-1)
for i in range(length):
if i == index or random.random() < ratio:
clist.append( 1 )
else:
clist.append( 0 )
return clist
def all_select(length):
return [1 for i in range(length)]
def drop_path(x, drop_prob):
if drop_prob > 0.:
keep_prob = 1. - drop_prob
mask = x.new_zeros(x.size(0), 1, 1, 1)
mask = mask.bernoulli_(keep_prob)
x.div_(keep_prob)
x.mul_(mask)
return x
def return_alphas_str(basemodel):
string = 'normal : {:}'.format( F.softmax(basemodel.alphas_normal, dim=-1) )
if hasattr(basemodel, 'alphas_reduce'):
string = string + '\nreduce : {:}'.format( F.softmax(basemodel.alphas_reduce, dim=-1) )
return string
class Cell(nn.Module):
def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
print(C_prev_prev, C_prev, C)
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
if reduction:
op_names, indices, values = zip(*genotype.reduce)
concat = genotype.reduce_concat
else:
op_names, indices, values = zip(*genotype.normal)
concat = genotype.normal_concat
self._compile(C, op_names, indices, values, concat, reduction)
def _compile(self, C, op_names, indices, values, concat, reduction):
assert len(op_names) == len(indices)
self._steps = len(op_names) // 2
self._concat = concat
self.multiplier = len(concat)
self._ops = nn.ModuleList()
for name, index in zip(op_names, indices):
stride = 2 if reduction and index < 2 else 1
op = OPS[name](C, stride, True)
self._ops.append( op )
self._indices = indices
self._values = values
def forward(self, s0, s1, drop_prob):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
for i in range(self._steps):
h1 = states[self._indices[2*i]]
h2 = states[self._indices[2*i+1]]
op1 = self._ops[2*i]
op2 = self._ops[2*i+1]
h1 = op1(h1)
h2 = op2(h2)
if self.training and drop_prob > 0.:
if not isinstance(op1, Identity):
h1 = drop_path(h1, drop_prob)
if not isinstance(op2, Identity):
h2 = drop_path(h2, drop_prob)
s = h1 + h2
states += [s]
return torch.cat([states[i] for i in self._concat], dim=1)
class Transition(nn.Module):
def __init__(self, C_prev_prev, C_prev, C, reduction_prev, multiplier=4):
super(Transition, self).__init__()
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
self.multiplier = multiplier
self.reduction = True
self.ops1 = nn.ModuleList(
[nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
nn.BatchNorm2d(C, affine=True),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(C, affine=True)),
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
nn.BatchNorm2d(C, affine=True),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(C, affine=True))])
self.ops2 = nn.ModuleList(
[nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.BatchNorm2d(C, affine=True)),
nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1),
nn.BatchNorm2d(C, affine=True))])
def forward(self, s0, s1, drop_prob = -1):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
X0 = self.ops1[0] (s0)
X1 = self.ops1[1] (s1)
if self.training and drop_prob > 0.:
X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
X2 = self.ops2[0] (X0+X1)
X3 = self.ops2[1] (s1)
if self.training and drop_prob > 0.:
X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
return torch.cat([X0, X1, X2, X3], dim=1)

203
lib/nas/genotypes.py Normal file
View File

@ -0,0 +1,203 @@
from collections import namedtuple
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
PRIMITIVES = [
'none',
'max_pool_3x3',
'avg_pool_3x3',
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'dil_conv_3x3',
'dil_conv_5x5'
]
NASNet = Genotype(
normal = [
('sep_conv_5x5', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('sep_conv_5x5', 0, 1.0),
('sep_conv_3x3', 0, 1.0),
('avg_pool_3x3', 1, 1.0),
('skip_connect', 0, 1.0),
('avg_pool_3x3', 0, 1.0),
('avg_pool_3x3', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('skip_connect', 1, 1.0),
],
normal_concat = [2, 3, 4, 5, 6],
reduce = [
('sep_conv_5x5', 1, 1.0),
('sep_conv_7x7', 0, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_7x7', 0, 1.0),
('avg_pool_3x3', 1, 1.0),
('sep_conv_5x5', 0, 1.0),
('skip_connect', 3, 1.0),
('avg_pool_3x3', 2, 1.0),
('sep_conv_3x3', 2, 1.0),
('max_pool_3x3', 1, 1.0),
],
reduce_concat = [4, 5, 6],
)
AmoebaNet = Genotype(
normal = [
('avg_pool_3x3', 0, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('sep_conv_5x5', 2, 1.0),
('sep_conv_3x3', 0, 1.0),
('avg_pool_3x3', 3, 1.0),
('sep_conv_3x3', 1, 1.0),
('skip_connect', 1, 1.0),
('skip_connect', 0, 1.0),
('avg_pool_3x3', 1, 1.0),
],
normal_concat = [4, 5, 6],
reduce = [
('avg_pool_3x3', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('max_pool_3x3', 0, 1.0),
('sep_conv_7x7', 2, 1.0),
('sep_conv_7x7', 0, 1.0),
('avg_pool_3x3', 1, 1.0),
('max_pool_3x3', 0, 1.0),
('max_pool_3x3', 1, 1.0),
('conv_7x1_1x7', 0, 1.0),
('sep_conv_3x3', 5, 1.0),
],
reduce_concat = [3, 4, 6]
)
DARTS_V1 = Genotype(
normal=[
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('skip_connect', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('skip_connect', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('skip_connect', 2, 1.0)],
normal_concat=[2, 3, 4, 5],
reduce=[
('max_pool_3x3', 0, 1.0),
('max_pool_3x3', 1, 1.0),
('skip_connect', 2, 1.0),
('max_pool_3x3', 0, 1.0),
('max_pool_3x3', 0, 1.0),
('skip_connect', 2, 1.0),
('skip_connect', 2, 1.0),
('avg_pool_3x3', 0, 1.0)],
reduce_concat=[2, 3, 4, 5]
)
DARTS_V2 = Genotype(
normal=[
('sep_conv_3x3', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 1, 1.0),
('skip_connect', 0, 1.0),
('skip_connect', 0, 1.0),
('dil_conv_3x3', 2, 1.0)],
normal_concat=[2, 3, 4, 5],
reduce=[
('max_pool_3x3', 0, 1.0),
('max_pool_3x3', 1, 1.0),
('skip_connect', 2, 1.0),
('max_pool_3x3', 1, 1.0),
('max_pool_3x3', 0, 1.0),
('skip_connect', 2, 1.0),
('skip_connect', 2, 1.0),
('max_pool_3x3', 1, 1.0)],
reduce_concat=[2, 3, 4, 5]
)
PNASNet = Genotype(
normal = [
('sep_conv_5x5', 0, 1.0),
('max_pool_3x3', 0, 1.0),
('sep_conv_7x7', 1, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_5x5', 1, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 4, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('skip_connect', 1, 1.0),
],
normal_concat = [2, 3, 4, 5, 6],
reduce = [
('sep_conv_5x5', 0, 1.0),
('max_pool_3x3', 0, 1.0),
('sep_conv_7x7', 1, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_5x5', 1, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 4, 1.0),
('max_pool_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('skip_connect', 1, 1.0),
],
reduce_concat = [2, 3, 4, 5, 6],
)
# https://arxiv.org/pdf/1802.03268.pdf
ENASNet = Genotype(
normal = [
('sep_conv_3x3', 1, 1.0),
('skip_connect', 1, 1.0),
('sep_conv_5x5', 1, 1.0),
('skip_connect', 0, 1.0),
('avg_pool_3x3', 0, 1.0),
('sep_conv_3x3', 1, 1.0),
('sep_conv_3x3', 0, 1.0),
('avg_pool_3x3', 1, 1.0),
('sep_conv_5x5', 1, 1.0),
('avg_pool_3x3', 0, 1.0),
],
normal_concat = [2, 3, 4, 5, 6],
reduce = [
('sep_conv_5x5', 0, 1.0),
('sep_conv_3x3', 1, 1.0), # 2
('sep_conv_3x3', 1, 1.0),
('avg_pool_3x3', 1, 1.0), # 3
('sep_conv_3x3', 1, 1.0),
('avg_pool_3x3', 1, 1.0), # 4
('avg_pool_3x3', 1, 1.0),
('sep_conv_5x5', 4, 1.0), # 5
('sep_conv_3x3', 5, 1.0),
('sep_conv_5x5', 0, 1.0),
],
reduce_concat = [2, 3, 4, 5, 6],
)
DARTS = DARTS_V2
# Search by normal and reduce
DMS_V1 = Genotype(
normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
normal_concat=range(2, 6),
reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
reduce_concat=range(2, 6)
)
# Search by normal and fixing reduction
DMS_F1 = Genotype(
normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
normal_concat=[2, 3, 4, 5],
reduce=None,
reduce_concat=[2, 3, 4, 5],
)
# Combine DMS_V1 and DMS_F1
GDAS_CC = Genotype(
normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
normal_concat=range(2, 6),
reduce=None,
reduce_concat=range(2, 6)
)

19
lib/nas/head_utils.py Normal file
View File

@ -0,0 +1,19 @@
import torch
import torch.nn as nn
class ImageNetHEAD(nn.Sequential):
def __init__(self, C, stride=2):
super(ImageNetHEAD, self).__init__()
self.add_module('conv1', nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False))
self.add_module('bn1' , nn.BatchNorm2d(C // 2))
self.add_module('relu1', nn.ReLU(inplace=True))
self.add_module('conv2', nn.Conv2d(C // 2, C, kernel_size=3, stride=stride, padding=1, bias=False))
self.add_module('bn2' , nn.BatchNorm2d(C))
class CifarHEAD(nn.Sequential):
def __init__(self, C):
super(CifarHEAD, self).__init__()
self.add_module('conv', nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False))
self.add_module('bn', nn.BatchNorm2d(C))

166
lib/nas/model_search.py Normal file
View File

@ -0,0 +1,166 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .head_utils import CifarHEAD, ImageNetHEAD
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class Network(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3, head='cifar'):
super(Network, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
if head == 'cifar':
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
elif head == 'imagenet':
self.stem = ImageNetHEAD(C_curr, stride=1)
else:
raise ValueError('Invalid head : {:}'.format(head))
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
return -1
def get_tau(self):
return -1
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_reduce, dim=-1)
else:
weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

View File

@ -0,0 +1,180 @@
# gumbel softmax
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
if use_sum > 3:
return sum(w * op(x) for w, op in zip(weights, self._ops))
else:
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkACC2(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkACC2, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
self.use_gumbel = True
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_gumbel(self, use_gumbel):
self.use_gumbel = use_gumbel
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
else : weights = F.softmax(self.alphas_reduce, dim=-1)
else:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
else : weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

167
lib/nas/model_search_f1.py Normal file
View File

@ -0,0 +1,167 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .construct_utils import Transition
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkF1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkF1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
else:
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
return -1
def get_tau(self):
return -1
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
s0, s1 = s1, cell(s0, s1)
else:
weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=None , reduce_concat=concat
)
return genotype

View File

@ -0,0 +1,183 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .construct_utils import Transition
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
if use_sum > 3:
return sum(w * op(x) for w, op in zip(weights, self._ops))
else:
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkFACC1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkFACC1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
self.tau = 5
self.use_gumbel = True
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
if reduction:
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
else:
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_gumbel(self, use_gumbel):
self.use_gumbel = use_gumbel
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
s0, s1 = s1, cell(s0, s1)
else:
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
else : weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=None , reduce_concat=concat
)
return genotype

161
lib/nas/model_search_v1.py Normal file
View File

@ -0,0 +1,161 @@
# share parameters
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
x = self._ops[offset+j](h, weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV1(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV1, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
return -1
def get_tau(self):
return -1
def arch_parameters(self):
return [self.alphas_normal]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_normal, dim=-1)
else:
weights = F.softmax(self.alphas_normal, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

171
lib/nas/model_search_v3.py Normal file
View File

@ -0,0 +1,171 @@
# random selection
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0:
indicator = all_select( len(states) )
else:
indicator = random_select( len(states), 0.5 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist) / sum(indicator)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV3(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV3, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_reduce, dim=-1)
else:
weights = F.softmax(self.alphas_reduce, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

176
lib/nas/model_search_v4.py Normal file
View File

@ -0,0 +1,176 @@
# random selection
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
indicators = random_select( len(cpu_weights), 0.5 )
clist, ws = [], []
for w, indicator, op in zip(weights, indicators, self._ops):
if indicator:
clist.append( w * op(x) )
ws.append( w )
return sum(clist) / sum(ws)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0:
indicator = all_select( len(states) )
else:
indicator = random_select( len(states), 0.5 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist) / sum(indicator)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV4(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV4, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.softmax(self.alphas_reduce, dim=-1)
else:
weights = F.softmax(self.alphas_reduce, dim=-1)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

174
lib/nas/model_search_v5.py Normal file
View File

@ -0,0 +1,174 @@
# gumbel softmax
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from .operations import OPS, FactorizedReduce, ReLUConvBN
from .genotypes import PRIMITIVES, Genotype
from .construct_utils import random_select, all_select
class MixedOp(nn.Module):
def __init__(self, C, stride):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in PRIMITIVES:
op = OPS[primitive](C, stride, False)
self._ops.append(op)
def forward(self, x, weights, cpu_weights):
clist = []
for j, cpu_weight in enumerate(cpu_weights):
if abs(cpu_weight) > 1e-10:
clist.append( weights[j] * self._ops[j](x) )
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
if len(clist) == 1: return clist[0]
else : return sum(clist)
class Cell(nn.Module):
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
self.reduction = reduction
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
for i in range(self._steps):
for j in range(2+i):
stride = 2 if reduction and j < 2 else 1
op = MixedOp(C, stride)
self._ops.append(op)
def forward(self, s0, s1, weights):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
cpu_weights = weights.tolist()
states = [s0, s1]
offset = 0
for i in range(self._steps):
clist = []
if i == 0: indicator = all_select( len(states) )
else : indicator = random_select( len(states), 0.6 )
for j, h in enumerate(states):
if indicator[j] == 0: continue
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
clist.append( x )
s = sum(clist)
offset += len(states)
states.append(s)
return torch.cat(states[-self._multiplier:], dim=1)
class NetworkV5(nn.Module):
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
super(NetworkV5, self).__init__()
self._C = C
self._num_classes = num_classes
self._layers = layers
self._steps = steps
self._multiplier = multiplier
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
reduction_prev, cells = False, []
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
cells.append( cell )
C_prev_prev, C_prev = C_prev, multiplier*C_curr
self.cells = nn.ModuleList(cells)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.tau = 5
# initialize architecture parameters
k = sum(1 for i in range(self._steps) for n in range(2+i))
num_ops = len(PRIMITIVES)
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
nn.init.normal_(self.alphas_normal, 0, 0.001)
nn.init.normal_(self.alphas_reduce, 0, 0.001)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def arch_parameters(self):
return [self.alphas_normal, self.alphas_reduce]
def base_parameters(self):
lists = list(self.stem.parameters()) + list(self.cells.parameters())
lists += list(self.global_pooling.parameters())
lists += list(self.classifier.parameters())
return lists
def forward(self, inputs):
batch, C, H, W = inputs.size()
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction:
weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
else:
weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
s0, s1 = s1, cell(s0, s1, weights)
out = self.global_pooling(s1)
out = out.view(batch, -1)
logits = self.classifier(out)
return logits
def genotype(self):
def _parse(weights):
gene, n, start = [], 2, 0
for i in range(self._steps):
end = start + n
W = weights[start:end].copy()
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
for j in edges:
k_best = None
for k in range(len(W[j])):
if k != PRIMITIVES.index('none'):
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
start = end
n += 1
return gene
with torch.no_grad():
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
concat = range(2+self._steps-self._multiplier, self._steps+2)
genotype = Genotype(
normal=gene_normal, normal_concat=concat,
reduce=gene_reduce, reduce_concat=concat
)
return genotype

122
lib/nas/operations.py Normal file
View File

@ -0,0 +1,122 @@
import torch
import torch.nn as nn
OPS = {
'none' : lambda C, stride, affine: Zero(stride),
'avg_pool_3x3' : lambda C, stride, affine: nn.Sequential(
nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
nn.BatchNorm2d(C, affine=False) ),
'max_pool_3x3' : lambda C, stride, affine: nn.Sequential(
nn.MaxPool2d(3, stride=stride, padding=1),
nn.BatchNorm2d(C, affine=False) ),
'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
'conv_7x1_1x7' : lambda C, stride, affine: Conv717(C, C, stride, affine),
}
class Conv717(nn.Module):
def __init__(self, C_in, C_out, stride, affine):
super(Conv717, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in , C_out, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
nn.Conv2d(C_out, C_out, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
nn.BatchNorm2d(C_out, affine=affine)
)
def forward(self, x):
return self.op(x)
class ReLUConvBN(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
nn.BatchNorm2d(C_out, affine=affine)
)
def forward(self, x):
return self.op(x)
class DilConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
super(DilConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
def forward(self, x):
return self.op(x)
class SepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
def forward(self, x):
return self.op(x)
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
class Zero(nn.Module):
def __init__(self, stride):
super(Zero, self).__init__()
self.stride = stride
def forward(self, x):
if self.stride == 1:
return x.mul(0.)
return x[:,:,::self.stride,::self.stride].mul(0.)
class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, affine=True):
super(FactorizedReduce, self).__init__()
assert C_out % 2 == 0
self.relu = nn.ReLU(inplace=False)
self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.bn = nn.BatchNorm2d(C_out, affine=affine)
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
def forward(self, x):
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
out = self.bn(out)
return out

9
lib/nas_rnn/__init__.py Normal file
View File

@ -0,0 +1,9 @@
# utils
from .utils import batchify, get_batch, repackage_hidden
# models
from .model_search import RNNModelSearch
from .model_search import DARTSCellSearch
from .basemodel import DARTSCell, RNNModel
# architecture
from .genotypes import DARTS_V1, DARTS_V2
from .genotypes import GDAS

181
lib/nas_rnn/basemodel.py Normal file
View File

@ -0,0 +1,181 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from .genotypes import STEPS
from .utils import mask2d, LockedDropout, embedded_dropout
INITRANGE = 0.04
def none_func(x):
return x * 0
class DARTSCell(nn.Module):
def __init__(self, ninp, nhid, dropouth, dropoutx, genotype):
super(DARTSCell, self).__init__()
self.nhid = nhid
self.dropouth = dropouth
self.dropoutx = dropoutx
self.genotype = genotype
# genotype is None when doing arch search
steps = len(self.genotype.recurrent) if self.genotype is not None else STEPS
self._W0 = nn.Parameter(torch.Tensor(ninp+nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE))
self._Ws = nn.ParameterList([
nn.Parameter(torch.Tensor(nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) for i in range(steps)
])
def forward(self, inputs, hidden, arch_probs):
T, B = inputs.size(0), inputs.size(1)
if self.training:
x_mask = mask2d(B, inputs.size(2), keep_prob=1.-self.dropoutx)
h_mask = mask2d(B, hidden.size(2), keep_prob=1.-self.dropouth)
else:
x_mask = h_mask = None
hidden = hidden[0]
hiddens = []
for t in range(T):
hidden = self.cell(inputs[t], hidden, x_mask, h_mask, arch_probs)
hiddens.append(hidden)
hiddens = torch.stack(hiddens)
return hiddens, hiddens[-1].unsqueeze(0)
def _compute_init_state(self, x, h_prev, x_mask, h_mask):
if self.training:
xh_prev = torch.cat([x * x_mask, h_prev * h_mask], dim=-1)
else:
xh_prev = torch.cat([x, h_prev], dim=-1)
c0, h0 = torch.split(xh_prev.mm(self._W0), self.nhid, dim=-1)
c0 = c0.sigmoid()
h0 = h0.tanh()
s0 = h_prev + c0 * (h0-h_prev)
return s0
def _get_activation(self, name):
if name == 'tanh':
f = torch.tanh
elif name == 'relu':
f = torch.relu
elif name == 'sigmoid':
f = torch.sigmoid
elif name == 'identity':
f = lambda x: x
elif name == 'none':
f = none_func
else:
raise NotImplementedError
return f
def cell(self, x, h_prev, x_mask, h_mask, _):
s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
states = [s0]
for i, (name, pred) in enumerate(self.genotype.recurrent):
s_prev = states[pred]
if self.training:
ch = (s_prev * h_mask).mm(self._Ws[i])
else:
ch = s_prev.mm(self._Ws[i])
c, h = torch.split(ch, self.nhid, dim=-1)
c = c.sigmoid()
fn = self._get_activation(name)
h = fn(h)
s = s_prev + c * (h-s_prev)
states += [s]
output = torch.mean(torch.stack([states[i] for i in self.genotype.concat], -1), -1)
return output
class RNNModel(nn.Module):
"""Container module with an encoder, a recurrent module, and a decoder."""
def __init__(self, ntoken, ninp, nhid, nhidlast,
dropout=0.5, dropouth=0.5, dropoutx=0.5, dropouti=0.5, dropoute=0.1,
cell_cls=None, genotype=None):
super(RNNModel, self).__init__()
self.lockdrop = LockedDropout()
self.encoder = nn.Embedding(ntoken, ninp)
assert ninp == nhid == nhidlast
if cell_cls == DARTSCell:
assert genotype is not None
rnns = [cell_cls(ninp, nhid, dropouth, dropoutx, genotype)]
else:
assert genotype is None
rnns = [cell_cls(ninp, nhid, dropouth, dropoutx)]
self.rnns = torch.nn.ModuleList(rnns)
self.decoder = nn.Linear(ninp, ntoken)
self.decoder.weight = self.encoder.weight
self.init_weights()
self.arch_weights = None
self.ninp = ninp
self.nhid = nhid
self.nhidlast = nhidlast
self.dropout = dropout
self.dropouti = dropouti
self.dropoute = dropoute
self.ntoken = ntoken
self.cell_cls = cell_cls
# acceleration
self.tau = None
self.use_gumbel = False
def set_gumbel(self, use_gumbel, set_check):
self.use_gumbel = use_gumbel
for i, rnn in enumerate(self.rnns):
rnn.set_check(set_check)
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def init_weights(self):
self.encoder.weight.data.uniform_(-INITRANGE, INITRANGE)
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-INITRANGE, INITRANGE)
def forward(self, input, hidden, return_h=False):
batch_size = input.size(1)
emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
emb = self.lockdrop(emb, self.dropouti)
raw_output = emb
new_hidden = []
raw_outputs = []
outputs = []
if self.arch_weights is None:
arch_probs = None
else:
if self.use_gumbel: arch_probs = F.gumbel_softmax(self.arch_weights, self.tau, False)
else : arch_probs = F.softmax(self.arch_weights, dim=-1)
for l, rnn in enumerate(self.rnns):
current_input = raw_output
raw_output, new_h = rnn(raw_output, hidden[l], arch_probs)
new_hidden.append(new_h)
raw_outputs.append(raw_output)
hidden = new_hidden
output = self.lockdrop(raw_output, self.dropout)
outputs.append(output)
logit = self.decoder(output.view(-1, self.ninp))
log_prob = nn.functional.log_softmax(logit, dim=-1)
model_output = log_prob
model_output = model_output.view(-1, batch_size, self.ntoken)
if return_h: return model_output, hidden, raw_outputs, outputs
else : return model_output, hidden
def init_hidden(self, bsz):
weight = next(self.parameters()).clone()
return [weight.new(1, bsz, self.nhid).zero_()]

55
lib/nas_rnn/genotypes.py Normal file
View File

@ -0,0 +1,55 @@
from collections import namedtuple
Genotype = namedtuple('Genotype', 'recurrent concat')
PRIMITIVES = [
'none',
'tanh',
'relu',
'sigmoid',
'identity'
]
STEPS = 8
CONCAT = 8
ENAS = Genotype(
recurrent = [
('tanh', 0),
('tanh', 1),
('relu', 1),
('tanh', 3),
('tanh', 3),
('relu', 3),
('relu', 4),
('relu', 7),
('relu', 8),
('relu', 8),
('relu', 8),
],
concat = [2, 5, 6, 9, 10, 11]
)
DARTS_V1 = Genotype(
recurrent = [
('relu', 0),
('relu', 1),
('tanh', 2),
('relu', 3), ('relu', 4), ('identity', 1), ('relu', 5), ('relu', 1)
],
concat=range(1, 9)
)
DARTS_V2 = Genotype(
recurrent = [
('sigmoid', 0), ('relu', 1), ('relu', 1),
('identity', 1), ('tanh', 2), ('sigmoid', 5),
('tanh', 3), ('relu', 5)
],
concat=range(1, 9)
)
GDAS = Genotype(
recurrent=[('relu', 0), ('relu', 0), ('identity', 1), ('relu', 1), ('tanh', 0), ('relu', 2), ('identity', 4), ('identity', 2)],
concat=range(1, 9)
)

104
lib/nas_rnn/model_search.py Normal file
View File

@ -0,0 +1,104 @@
import copy, torch
import torch.nn as nn
import torch.nn.functional as F
from collections import namedtuple
from .genotypes import PRIMITIVES, STEPS, CONCAT, Genotype
from .basemodel import DARTSCell, RNNModel
class DARTSCellSearch(DARTSCell):
def __init__(self, ninp, nhid, dropouth, dropoutx):
super(DARTSCellSearch, self).__init__(ninp, nhid, dropouth, dropoutx, genotype=None)
self.bn = nn.BatchNorm1d(nhid, affine=False)
self.check_zero = False
def set_check(self, check_zero):
self.check_zero = check_zero
def cell(self, x, h_prev, x_mask, h_mask, arch_probs):
s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
s0 = self.bn(s0)
if self.check_zero:
arch_probs_cpu = arch_probs.cpu().tolist()
#arch_probs = F.softmax(self.weights, dim=-1)
offset = 0
states = s0.unsqueeze(0)
for i in range(STEPS):
if self.training:
masked_states = states * h_mask.unsqueeze(0)
else:
masked_states = states
ch = masked_states.view(-1, self.nhid).mm(self._Ws[i]).view(i+1, -1, 2*self.nhid)
c, h = torch.split(ch, self.nhid, dim=-1)
c = c.sigmoid()
s = torch.zeros_like(s0)
for k, name in enumerate(PRIMITIVES):
if name == 'none':
continue
fn = self._get_activation(name)
unweighted = states + c * (fn(h) - states)
if self.check_zero:
INDEX, INDDX = [], []
for jj in range(offset, offset+i+1):
if arch_probs_cpu[jj][k] > 0:
INDEX.append(jj)
INDDX.append(jj-offset)
if len(INDEX) == 0: continue
s += torch.sum(arch_probs[INDEX, k].unsqueeze(-1).unsqueeze(-1) * unweighted[INDDX, :, :], dim=0)
else:
s += torch.sum(arch_probs[offset:offset+i+1, k].unsqueeze(-1).unsqueeze(-1) * unweighted, dim=0)
s = self.bn(s)
states = torch.cat([states, s.unsqueeze(0)], 0)
offset += i+1
output = torch.mean(states[-CONCAT:], dim=0)
return output
class RNNModelSearch(RNNModel):
def __init__(self, *args):
super(RNNModelSearch, self).__init__(*args)
self._args = copy.deepcopy( args )
k = sum(i for i in range(1, STEPS+1))
self.arch_weights = nn.Parameter(torch.Tensor(k, len(PRIMITIVES)))
nn.init.normal_(self.arch_weights, 0, 0.001)
def base_parameters(self):
lists = list(self.lockdrop.parameters())
lists += list(self.encoder.parameters())
lists += list(self.rnns.parameters())
lists += list(self.decoder.parameters())
return lists
def arch_parameters(self):
return [self.arch_weights]
def genotype(self):
def _parse(probs):
gene = []
start = 0
for i in range(STEPS):
end = start + i + 1
W = probs[start:end].copy()
#j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[0]
j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) ))[0]
k_best = None
for k in range(len(W[j])):
#if k != PRIMITIVES.index('none'):
# if k_best is None or W[j][k] > W[j][k_best]:
# k_best = k
if k_best is None or W[j][k] > W[j][k_best]:
k_best = k
gene.append((PRIMITIVES[k_best], j))
start = end
return gene
with torch.no_grad():
gene = _parse(F.softmax(self.arch_weights, dim=-1).cpu().numpy())
genotype = Genotype(recurrent=gene, concat=list(range(STEPS+1)[-CONCAT:]))
return genotype

66
lib/nas_rnn/utils.py Normal file
View File

@ -0,0 +1,66 @@
import torch
import torch.nn as nn
import os, shutil
import numpy as np
def repackage_hidden(h):
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def batchify(data, bsz, use_cuda):
nbatch = data.size(0) // bsz
data = data.narrow(0, 0, nbatch * bsz)
data = data.view(bsz, -1).t().contiguous()
if use_cuda: return data.cuda()
else : return data
def get_batch(source, i, seq_len):
seq_len = min(seq_len, len(source) - 1 - i)
data = source[i:i+seq_len].clone()
target = source[i+1:i+1+seq_len].clone()
return data, target
def embedded_dropout(embed, words, dropout=0.1, scale=None):
if dropout:
mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
mask.requires_grad_(True)
masked_embed_weight = mask * embed.weight
else:
masked_embed_weight = embed.weight
if scale:
masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
padding_idx = embed.padding_idx
if padding_idx is None:
padding_idx = -1
X = torch.nn.functional.embedding(
words, masked_embed_weight,
padding_idx, embed.max_norm, embed.norm_type,
embed.scale_grad_by_freq, embed.sparse)
return X
class LockedDropout(nn.Module):
def __init__(self):
super(LockedDropout, self).__init__()
def forward(self, x, dropout=0.5):
if not self.training or not dropout:
return x
m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
mask = m.div_(1 - dropout).detach()
mask = mask.expand_as(x)
return mask * x
def mask2d(B, D, keep_prob, cuda=True):
m = torch.floor(torch.rand(B, D) + keep_prob) / keep_prob
if cuda: return m.cuda()
else : return m

View File

@ -0,0 +1,2 @@
from .utils import load_config
from .scheduler import MultiStepLR, obtain_scheduler

View File

@ -0,0 +1,29 @@
import torch
from bisect import bisect_right
class MultiStepLR(torch.optim.lr_scheduler._LRScheduler):
def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
if not list(milestones) == sorted(milestones):
raise ValueError('Milestones should be a list of'
' increasing integers. Got {:}', milestones)
assert len(milestones) == len(gammas), '{:} vs {:}'.format(milestones, gammas)
self.milestones = milestones
self.gammas = gammas
super(MultiStepLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
LR = 1
for x in self.gammas[:bisect_right(self.milestones, self.last_epoch)]: LR = LR * x
return [base_lr * LR for base_lr in self.base_lrs]
def obtain_scheduler(config, optimizer):
if config.type == 'multistep':
scheduler = MultiStepLR(optimizer, milestones=config.milestones, gammas=config.gammas)
elif config.type == 'cosine':
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)
else:
raise ValueError('Unknown learning rate scheduler type : {:}'.format(config.type))
return scheduler

46
lib/scheduler/utils.py Normal file
View File

@ -0,0 +1,46 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import os, sys, json
from pathlib import Path
from collections import namedtuple
support_types = ('str', 'int', 'bool', 'float')
def convert_param(original_lists):
assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
ctype, value = original_lists[0], original_lists[1]
assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
is_list = isinstance(value, list)
if not is_list: value = [value]
outs = []
for x in value:
if ctype == 'int':
x = int(x)
elif ctype == 'str':
x = str(x)
elif ctype == 'bool':
x = bool(int(x))
elif ctype == 'float':
x = float(x)
else:
raise TypeError('Does not know this type : {:}'.format(ctype))
outs.append(x)
if not is_list: outs = outs[0]
return outs
def load_config(path):
path = str(path)
assert os.path.exists(path), 'Can not find {:}'.format(path)
# Reading data back
with open(path, 'r') as f:
data = json.load(f)
f.close()
content = { k: convert_param(v) for k,v in data.items()}
Arguments = namedtuple('Configure', ' '.join(content.keys()))
content = Arguments(**content)
return content

14
lib/utils/__init__.py Normal file
View File

@ -0,0 +1,14 @@
from .utils import AverageMeter, RecorderMeter, convert_secs2time
from .utils import time_file_str, time_string
from .utils import test_imagenet_data
from .utils import print_log
from .evaluation_utils import obtain_accuracy
from .draw_pts import draw_points
from .fb_transform import ApplyOffset
from .gpu_manager import GPUManager
from .save_meta import Save_Meta
from .model_utils import count_parameters_in_MB
from .model_utils import Cutout
from .flop_benchmark import print_FLOPs

41
lib/utils/draw_pts.py Normal file
View File

@ -0,0 +1,41 @@
import os, sys, time
import numpy as np
import matplotlib
import random
matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.cm as cm
def draw_points(points, labels, save_path):
title = 'the visualized features'
dpi = 100
width, height = 1000, 1000
legend_fontsize = 10
figsize = width / float(dpi), height / float(dpi)
fig = plt.figure(figsize=figsize)
classes = np.unique(labels).tolist()
colors = cm.rainbow(np.linspace(0, 1, len(classes)))
legends = []
legendnames = []
for cls, c in zip(classes, colors):
indexes = labels == cls
ptss = points[indexes, :]
x = ptss[:,0]
y = ptss[:,1]
if cls % 2 == 0: marker = 'x'
else: marker = 'o'
legend = plt.scatter(x, y, color=c, s=1, marker=marker)
legendname = '{:02d}'.format(cls+1)
legends.append( legend )
legendnames.append( legendname )
plt.legend(legends, legendnames, scatterpoints=1, ncol=5, fontsize=8)
if save_path is not None:
fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
print ('---- save figure {} into {}'.format(title, save_path))
plt.close(fig)

View File

@ -0,0 +1,16 @@
import torch
def obtain_accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res

14
lib/utils/fb_transform.py Normal file
View File

@ -0,0 +1,14 @@
import torch
import random
import numpy as np
class ApplyOffset(object):
def __init__(self, offset):
assert isinstance(offset, int), 'The offset is not right : {}'.format(offset)
self.offset = offset
def __call__(self, x):
if isinstance(x, np.ndarray) and x.dtype == 'uint8':
x = x.astype(int)
if isinstance(x, np.ndarray) and x.size == 1:
x = int(x)
return x + self.offset

113
lib/utils/flop_benchmark.py Normal file
View File

@ -0,0 +1,113 @@
# modified from https://github.com/warmspringwinds/pytorch-segmentation-detection/blob/master/pytorch_segmentation_detection/utils/flops_benchmark.py
import copy, torch
def print_FLOPs(model, shape, logs):
print_log, log = logs
model = copy.deepcopy( model )
model = add_flops_counting_methods(model)
model = model.cuda()
model.eval()
cache_inputs = torch.zeros(*shape).cuda()
#print_log('In the calculating function : cache input size : {:}'.format(cache_inputs.size()), log)
_ = model(cache_inputs)
FLOPs = compute_average_flops_cost( model ) / 1e6
print_log('FLOPs : {:} MB'.format(FLOPs), log)
torch.cuda.empty_cache()
# ---- Public functions
def add_flops_counting_methods( model ):
model.__batch_counter__ = 0
add_batch_counter_hook_function( model )
model.apply( add_flops_counter_variable_or_reset )
model.apply( add_flops_counter_hook_function )
return model
def compute_average_flops_cost(model):
"""
A method that will be available after add_flops_counting_methods() is called on a desired net object.
Returns current mean flops consumption per image.
"""
batches_count = model.__batch_counter__
flops_sum = 0
for module in model.modules():
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
flops_sum += module.__flops__
return flops_sum / batches_count
# ---- Internal functions
def pool_flops_counter_hook(pool_module, inputs, output):
batch_size = inputs[0].size(0)
kernel_size = pool_module.kernel_size
out_C, output_height, output_width = output.shape[1:]
assert out_C == inputs[0].size(1), '{:} vs. {:}'.format(out_C, inputs[0].size())
overall_flops = batch_size * out_C * output_height * output_width * kernel_size * kernel_size
pool_module.__flops__ += overall_flops
def fc_flops_counter_hook(fc_module, inputs, output):
batch_size = inputs[0].size(0)
xin, xout = fc_module.in_features, fc_module.out_features
assert xin == inputs[0].size(1) and xout == output.size(1), 'IO=({:}, {:})'.format(xin, xout)
overall_flops = batch_size * xin * xout
if fc_module.bias is not None:
overall_flops += batch_size * xout
fc_module.__flops__ += overall_flops
def conv_flops_counter_hook(conv_module, inputs, output):
batch_size = inputs[0].size(0)
output_height, output_width = output.shape[2:]
kernel_height, kernel_width = conv_module.kernel_size
in_channels = conv_module.in_channels
out_channels = conv_module.out_channels
groups = conv_module.groups
conv_per_position_flops = kernel_height * kernel_width * in_channels * out_channels / groups
active_elements_count = batch_size * output_height * output_width
overall_flops = conv_per_position_flops * active_elements_count
if conv_module.bias is not None:
overall_flops += out_channels * active_elements_count
conv_module.__flops__ += overall_flops
def batch_counter_hook(module, inputs, output):
# Can have multiple inputs, getting the first one
inputs = inputs[0]
batch_size = inputs.shape[0]
module.__batch_counter__ += batch_size
def add_batch_counter_hook_function(module):
if not hasattr(module, '__batch_counter_handle__'):
handle = module.register_forward_hook(batch_counter_hook)
module.__batch_counter_handle__ = handle
def add_flops_counter_variable_or_reset(module):
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
module.__flops__ = 0
def add_flops_counter_hook_function(module):
if isinstance(module, torch.nn.Conv2d):
if not hasattr(module, '__flops_handle__'):
handle = module.register_forward_hook(conv_flops_counter_hook)
module.__flops_handle__ = handle
elif isinstance(module, torch.nn.Linear):
if not hasattr(module, '__flops_handle__'):
handle = module.register_forward_hook(fc_flops_counter_hook)
module.__flops_handle__ = handle
elif isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
if not hasattr(module, '__flops_handle__'):
handle = module.register_forward_hook(pool_flops_counter_hook)
module.__flops_handle__ = handle

70
lib/utils/gpu_manager.py Normal file
View File

@ -0,0 +1,70 @@
import os
class GPUManager():
queries = ('index', 'gpu_name', 'memory.free', 'memory.used', 'memory.total', 'power.draw', 'power.limit')
def __init__(self):
all_gpus = self.query_gpu(False)
def get_info(self, ctype):
cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(ctype)
lines = os.popen(cmd).readlines()
lines = [line.strip('\n') for line in lines]
return lines
def query_gpu(self, show=True):
num_gpus = len( self.get_info('index') )
all_gpus = [ {} for i in range(num_gpus) ]
for query in self.queries:
infos = self.get_info(query)
for idx, info in enumerate(infos):
all_gpus[idx][query] = info
if 'CUDA_VISIBLE_DEVICES' in os.environ:
CUDA_VISIBLE_DEVICES = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
selected_gpus = []
for idx, CUDA_VISIBLE_DEVICE in enumerate(CUDA_VISIBLE_DEVICES):
find = False
for gpu in all_gpus:
if gpu['index'] == CUDA_VISIBLE_DEVICE:
assert find==False, 'Duplicate cuda device index : {}'.format(CUDA_VISIBLE_DEVICE)
find = True
selected_gpus.append( gpu.copy() )
selected_gpus[-1]['index'] = '{}'.format(idx)
assert find, 'Does not find the device : {}'.format(CUDA_VISIBLE_DEVICE)
all_gpus = selected_gpus
if show:
allstrings = ''
for gpu in all_gpus:
string = '| '
for query in self.queries:
if query.find('memory') == 0: xinfo = '{:>9}'.format(gpu[query])
else: xinfo = gpu[query]
string = string + query + ' : ' + xinfo + ' | '
allstrings = allstrings + string + '\n'
return allstrings
else:
return all_gpus
def select_by_memory(self, numbers=1):
all_gpus = self.query_gpu(False)
assert numbers <= len(all_gpus), 'Require {} gpus more than you have'.format(numbers)
alls = []
for idx, gpu in enumerate(all_gpus):
free_memory = gpu['memory.free']
free_memory = free_memory.split(' ')[0]
free_memory = int(free_memory)
index = gpu['index']
alls.append((free_memory, index))
alls.sort(reverse = True)
alls = [ int(alls[i][1]) for i in range(numbers) ]
return sorted(alls)
"""
if __name__ == '__main__':
manager = GPUManager()
manager.query_gpu(True)
indexes = manager.select_by_memory(3)
print (indexes)
"""

34
lib/utils/model_utils.py Normal file
View File

@ -0,0 +1,34 @@
import torch
import torch.nn as nn
import numpy as np
def count_parameters_in_MB(model):
if isinstance(model, nn.Module):
return np.sum(np.prod(v.size()) for v in model.parameters())/1e6
else:
return np.sum(np.prod(v.size()) for v in model)/1e6
class Cutout(object):
def __init__(self, length):
self.length = length
def __repr__(self):
return ('{name}(length={length})'.format(name=self.__class__.__name__, **self.__dict__))
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img

50
lib/utils/save_meta.py Normal file
View File

@ -0,0 +1,50 @@
import torch
import os, sys
import os.path as osp
import numpy as np
def tensor2np(x):
if isinstance(x, np.ndarray): return x
if x.is_cuda: x = x.cpu()
return x.numpy()
class Save_Meta():
def __init__(self):
self.reset()
def __repr__(self):
return ('{name}'.format(name=self.__class__.__name__)+'(number of data = {})'.format(len(self)))
def reset(self):
self.predictions = []
self.groundtruth = []
def __len__(self):
return len(self.predictions)
def append(self, _pred, _ground):
_pred, _ground = tensor2np(_pred), tensor2np(_ground)
assert _ground.shape[0] == _pred.shape[0] and len(_pred.shape) == 2 and len(_ground.shape) == 1, 'The shapes are wrong : {} & {}'.format(_pred.shape, _ground.shape)
self.predictions.append(_pred)
self.groundtruth.append(_ground)
def save(self, save_dir, filename, test=True):
meta = {'predictions': self.predictions,
'groundtruth': self.groundtruth}
filename = osp.join(save_dir, filename)
torch.save(meta, filename)
if test:
predictions = np.concatenate(self.predictions)
groundtruth = np.concatenate(self.groundtruth)
predictions = np.argmax(predictions, axis=1)
accuracy = np.sum(groundtruth==predictions) * 100.0 / predictions.size
else:
accuracy = None
print ('save save_meta into {} with accuracy = {}'.format(filename, accuracy))
def load(self, filename):
assert os.path.isfile(filename), '{} is not a file'.format(filename)
checkpoint = torch.load(filename)
self.predictions = checkpoint['predictions']
self.groundtruth = checkpoint['groundtruth']

137
lib/utils/utils.py Normal file
View File

@ -0,0 +1,137 @@
import os, sys, time
import numpy as np
import matplotlib
import random
matplotlib.use('agg')
import matplotlib.pyplot as plt
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
class RecorderMeter(object):
"""Computes and stores the minimum loss value and its epoch index"""
def __init__(self, total_epoch):
self.reset(total_epoch)
def reset(self, total_epoch):
assert total_epoch > 0
self.total_epoch = total_epoch
self.current_epoch = 0
self.epoch_losses = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
self.epoch_losses = self.epoch_losses - 1
self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
self.epoch_accuracy= self.epoch_accuracy
def update(self, idx, train_loss, train_acc, val_loss, val_acc):
assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
self.epoch_losses [idx, 0] = train_loss
self.epoch_losses [idx, 1] = val_loss
self.epoch_accuracy[idx, 0] = train_acc
self.epoch_accuracy[idx, 1] = val_acc
self.current_epoch = idx + 1
return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]
def max_accuracy(self, istrain):
if self.current_epoch <= 0: return 0
if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
else: return self.epoch_accuracy[:self.current_epoch, 1].max()
def plot_curve(self, save_path):
title = 'the accuracy/loss curve of train/val'
dpi = 100
width, height = 1600, 1000
legend_fontsize = 10
figsize = width / float(dpi), height / float(dpi)
fig = plt.figure(figsize=figsize)
x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
y_axis = np.zeros(self.total_epoch)
plt.xlim(0, self.total_epoch)
plt.ylim(0, 100)
interval_y = 5
interval_x = 5
plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
plt.yticks(np.arange(0, 100 + interval_y, interval_y))
plt.grid()
plt.title(title, fontsize=20)
plt.xlabel('the training epoch', fontsize=16)
plt.ylabel('accuracy', fontsize=16)
y_axis[:] = self.epoch_accuracy[:, 0]
plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
plt.legend(loc=4, fontsize=legend_fontsize)
y_axis[:] = self.epoch_accuracy[:, 1]
plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
plt.legend(loc=4, fontsize=legend_fontsize)
y_axis[:] = self.epoch_losses[:, 0]
plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
plt.legend(loc=4, fontsize=legend_fontsize)
y_axis[:] = self.epoch_losses[:, 1]
plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
plt.legend(loc=4, fontsize=legend_fontsize)
if save_path is not None:
fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
print ('---- save figure {} into {}'.format(title, save_path))
plt.close(fig)
def print_log(print_string, log):
print("{}".format(print_string))
if log is not None:
log.write('{}\n'.format(print_string))
log.flush()
def time_file_str():
ISOTIMEFORMAT='%Y-%m-%d'
string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
return string + '-{}'.format(random.randint(1, 10000))
def time_string():
ISOTIMEFORMAT='%Y-%m-%d-%X'
string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
return string
def convert_secs2time(epoch_time, return_str=False):
need_hour = int(epoch_time / 3600)
need_mins = int((epoch_time - 3600*need_hour) / 60)
need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
if return_str == False:
return need_hour, need_mins, need_secs
else:
return '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
def test_imagenet_data(imagenet):
total_length = len(imagenet)
assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
map_id = {}
for index in range(total_length):
path, target = imagenet.imgs[index]
folder, image_name = os.path.split(path)
_, folder = os.path.split(folder)
if folder not in map_id:
map_id[folder] = target
else:
assert map_id[folder] == target, 'Class : {} is not {}'.format(folder, target)
assert image_name.find(folder) == 0, '{} is wrong.'.format(path)
print ('Check ImageNet Dataset OK')

View File

@ -0,0 +1,30 @@
#!/usr/bin/env sh
if [ "$#" -ne 3 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 3 parameters for the GPUs and the epochs and the cutout"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=acc2
cutout=$3
dataset=cifar10
epoch=$2
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.05 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--tau_max 10 --tau_min 4 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,30 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the epochs"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=acc2
cutout=0
dataset=cifar10
epoch=$2
SAVED=./snapshots/NAS/ACC-V3-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v3.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.01 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--tau_max 10 --tau_min 1 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

57
scripts-cnn/README.md Normal file
View File

@ -0,0 +1,57 @@
# Neural-Architecture-Search
### Baseline
```
bash ./scripts-nas/search.sh 1 base cifar10
bash ./scripts-nas/search.sh 1 share
bash ./scripts-nas/batch-base-search.sh 1
bash ./scripts-nas/batch-base-model.sh 1
```
### Meta
```
bash ./scripts-nas/meta-search.sh 0 meta 20 5
```
### Acceleration
```
bash ./scripts-nas/search-acc-v2.sh 3 acc2
bash ./scripts-nas/DMS-V-Train.sh 0
bash ./scripts-nas/search-acc-simple.sh 3 NetworkV2
```
### Base Model Training
```
bash ./scripts-nas/train-model.sh 3 AmoebaNet
bash ./scripts-nas/train-model.sh 3 NASNet
bash ./scripts-nas/train-model.sh 3 DARTS_V1
bash ./scripts-nas/train-model-simple.sh 3 AmoebaNet
bash ./scripts-nas/train-imagenet.sh 3 DARTS_V2 50 14
bash scripts-nas/TRAIN-BASE.sh 0 PNASNet cifar10 nocut 48 11
bash scripts-nas/TRAIN-BASE.sh 0 AmoebaNet cifar10 nocut 36 20
bash scripts-nas/TRAIN-BASE.sh 0 NASNet cifar10 nocut 33 20
bash scripts-nas/TRAIN-BASE.sh 0 DMS_F1 cifar10 nocut 36 20
bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut 36 20
bash scripts-nas/TRAIN-BASE.sh 0 GDAS_CC cifar10 nocut 36 20
bash scripts-nas/train-imagenet.sh 3 DMS_F1 52 14
bash scripts-nas/train-imagenet.sh 3 DMS_V1 50 14
bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut 36 20
```
### Visualization
```
python ./exps-nas/vis-arch.py --checkpoint --save_dir
python ./exps-nas/cvpr-vis.py --save_dir ./snapshots/NAS-VIS/
```
### Test datasets
```
cd ./lib/datasets/
python test_NLP.py
```

30
scripts-cnn/TRAIN-BASE.sh Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env sh
# bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut init-channel layers
if [ "$#" -ne 6 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 6 parameters for the GPUs, the architecture, the dataset, the config, the initial channel, and the number of layers"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
dataset=$3
config=$4
C=$5
N=$6
SAVED=./snapshots/NAS/${arch}-${C}-${N}-${dataset}-${config}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
--data_path $TORCH_HOME/cifar.python \
--dataset ${dataset} --arch ${arch} \
--save_path ${SAVED} \
--grad_clip 5 \
--init_channels ${C} --layers ${N} \
--model_config ./configs/nas-cifar-cos-${config}.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,23 @@
#!/usr/bin/env sh
set -e
if [ "$#" -ne 1 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 1 parameters for the GPUs"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
bash ./scripts-nas/train-model.sh ${gpus} AmoebaNet 0
bash ./scripts-nas/train-model.sh ${gpus} NASNet 0
bash ./scripts-nas/train-model.sh ${gpus} DARTS_V1 0
bash ./scripts-nas/train-model.sh ${gpus} DARTS_V2 0

View File

@ -0,0 +1,19 @@
#!/usr/bin/env sh
if [ "$#" -ne 1 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 1 parameters for the GPUs"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
Times="1 2 3"
for time in ${Times}; do
bash ./scripts-nas/search.sh ${gpus}
done

View File

@ -0,0 +1,30 @@
#!/usr/bin/env sh
if [ "$#" -ne 4 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 4 parameters for the GPUs and the network and N-way and K-shot"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
n_way=$3
k_shot=$4
cutout=16
epoch=60
SAVED=./snapshots/NAS/Meta-Search-${arch}-N${n_way}-K${k_shot}-cut${cutout}-${epoch}
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/meta_search.py \
--data_path $TORCH_HOME/tiered-imagenet \
--arch ${arch} --n_way ${n_way} --k_shot ${k_shot} \
--save_path ${SAVED} \
--learning_rate_max 0.001 --learning_rate_min 0.0001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos-cut.config \
--print_freq 200 --workers 16

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=100
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E100
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos-simple.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=150
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=200
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=300
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=50
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,29 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the network"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=cifar10
epoch=100
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
--data_path $TORCH_HOME/cifar.python \
--arch ${arch} --dataset ${dataset} --batch_size 128 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

45
scripts-cnn/search.sh Normal file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env sh
if [ "$#" -ne 3 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 3 parameters for the GPUs and the network and the dataset"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
cutout=0
dataset=$3
epoch=50
SAVED=./snapshots/NAS/Search-${arch}-${dataset}-cut${cutout}-${epoch}
if [ "$dataset" == "cifar10" ] ;then
dataset_root=$TORCH_HOME/cifar.python
print_freq=100
elif [ "$dataset" == "cifar100" ] ;then
dataset_root=$TORCH_HOME/cifar.python
print_freq=100
elif [ "$dataset" == "tiered" ] ;then
dataset_root=$TORCH_HOME/tiered-imagenet
print_freq=500
else
echo 'invalid dataset-name :'${dataset}
exit 1
fi
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_search.py \
--data_path ${dataset_root} \
--arch ${arch} \
--dataset ${dataset} --batch_size 64 \
--save_path ${SAVED} \
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
--init_channels 16 --layers 8 \
--manualSeed 3858 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq ${print_freq} --workers 8

View File

@ -0,0 +1,26 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs, the architecture"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
dataset=cifar100
SAVED=./snapshots/NAS/${arch}-${dataset}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
--data_path $TORCH_HOME/cifar.python \
--dataset ${dataset} --arch ${arch} \
--save_path ${SAVED} \
--grad_clip 5 \
--init_channels 36 --layers 20 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,28 @@
#!/usr/bin/env sh
if [ "$#" -ne 4 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 4 parameters for the GPUs, the architecture, and the channel and the layers"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
dataset=imagenet
channels=$3
layers=$4
SAVED=./snapshots/NAS/${arch}-${dataset}-C${channels}-L${layers}-E250
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
--data_path $TORCH_HOME/ILSVRC2012 \
--dataset ${dataset} --arch ${arch} \
--save_path ${SAVED} \
--grad_clip 5 \
--init_channels ${channels} --layers ${layers} \
--model_config ./configs/nas-imagenet.config \
--print_freq 200 --workers 20

View File

@ -0,0 +1,25 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs and the architecture"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
dataset=cifar10
SAVED=./snapshots/NAS/${arch}-${dataset}-E100
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
--data_path $TORCH_HOME/cifar.python \
--dataset ${dataset} --arch ${arch} \
--save_path ${SAVED} \
--grad_clip 5 \
--model_config ./configs/nas-cifar-cos-simple.config \
--print_freq 100 --workers 8

View File

@ -0,0 +1,26 @@
#!/usr/bin/env sh
if [ "$#" -ne 2 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 2 parameters for the GPUs, the architecture"
exit 1
fi
if [ "$TORCH_HOME" = "" ]; then
echo "Must set TORCH_HOME envoriment variable for data dir saving"
exit 1
else
echo "TORCH_HOME : $TORCH_HOME"
fi
gpus=$1
arch=$2
dataset=cifar10
SAVED=./snapshots/NAS/${arch}-${dataset}-E600
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
--data_path $TORCH_HOME/cifar.python \
--dataset ${dataset} --arch ${arch} \
--save_path ${SAVED} \
--grad_clip 5 \
--init_channels 36 --layers 20 \
--model_config ./configs/nas-cifar-cos.config \
--print_freq 100 --workers 8

9
scripts-cnn/vis.sh Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env sh
seeds="seed-8167 seed-908 seed-9242"
for seed in ${seeds}; do
python ./exps-nas/vis-arch.py --checkpoint ./snapshots/NAS/Search-cifar10-cut16-100/${seed}/checkpoint-search.pth \
--save_dir ./snapshots/NAS-VIS/Search-cut16-100/${seed}
done

12
scripts-rnn/README.md Normal file
View File

@ -0,0 +1,12 @@
# Search RNN cell
```
bash scripts-nas-rnn/search-baseline.sh 3
bash scripts-nas-rnn/search-accelerate.sh 0 200 10 1
```
# Train the Searched Model
```
bash scripts-nas-rnn/train-PTB.sh 3 DARTS_V1
bash scripts-nas-rnn/train-WT2.sh 3 DARTS_V1
bash scripts-nas-rnn/train-PTB.sh 3 DARTS_V2
```

Some files were not shown because too many files have changed in this diff Show More