init
This commit is contained in:
commit
13e908f4df
104
.gitignore
vendored
Executable file
104
.gitignore
vendored
Executable file
@ -0,0 +1,104 @@
|
||||
.DS_Store
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# IPython Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# Pycharm project
|
||||
.idea
|
||||
snapshots
|
||||
*.pytorch
|
||||
*.tar.bz
|
||||
data
|
||||
.*.swp
|
||||
main_main.py
|
||||
*.pdf
|
||||
*/*.pdf
|
||||
|
||||
# Device
|
||||
scripts-nas/.nfs00*
|
||||
*/.nfs00*
|
21
LICENSE
Executable file
21
LICENSE
Executable file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Xuanyi Dong
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
17
README.md
Normal file
17
README.md
Normal file
@ -0,0 +1,17 @@
|
||||
# GDAS
|
||||
By Xuanyi Dong and Yi Yang
|
||||
|
||||
University of Technology Sydney
|
||||
|
||||
Requirements
|
||||
- PyTorch 1.0
|
||||
- Python 3.6
|
||||
```
|
||||
conda install pytorch torchvision cuda100 -c pytorch
|
||||
```
|
||||
|
||||
## Algorithm
|
||||
|
||||
Searching CNNs
|
||||
```
|
||||
```
|
27
configs/NAS-PTB-BASE.config
Normal file
27
configs/NAS-PTB-BASE.config
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
"data_name" : ["str", "PTB"],
|
||||
"data_path" : ["str", "./data/data/penn"],
|
||||
"emsize" : ["int", 850],
|
||||
"nhid" : ["int", 850],
|
||||
"nhidlast" : ["int", 850],
|
||||
"LR" : ["float", 20],
|
||||
"clip" : ["float", 0.25],
|
||||
"epochs" : ["int", 3000],
|
||||
"train_batch": ["int", 64],
|
||||
"eval_batch": ["int", 10],
|
||||
"test_batch": ["int", 1],
|
||||
"bptt" : ["int", 35],
|
||||
|
||||
"dropout" : ["float", 0.75],
|
||||
"dropouth" : ["float", 0.25],
|
||||
"dropoutx" : ["float", 0.75],
|
||||
"dropouti" : ["float", 0.2],
|
||||
"dropoute" : ["float", 0.1],
|
||||
|
||||
"nonmono" : ["int", 5],
|
||||
"alpha" : ["float", 0],
|
||||
"beta" : ["float", 1e-3],
|
||||
"wdecay" : ["float", 8e-7],
|
||||
|
||||
"max_seq_len_delta" : ["int", 20]
|
||||
}
|
27
configs/NAS-WT2-BASE.config
Normal file
27
configs/NAS-WT2-BASE.config
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
"data_name" : ["str", "WT2"],
|
||||
"data_path" : ["str", "./data/data/wikitext-2"],
|
||||
"emsize" : ["int", 700],
|
||||
"nhid" : ["int", 700],
|
||||
"nhidlast" : ["int", 700],
|
||||
"LR" : ["float", 20],
|
||||
"clip" : ["float", 0.25],
|
||||
"epochs" : ["int", 3000],
|
||||
"train_batch": ["int", 64],
|
||||
"eval_batch": ["int", 10],
|
||||
"test_batch": ["int", 1],
|
||||
"bptt" : ["int", 35],
|
||||
|
||||
"dropout" : ["float", 0.75],
|
||||
"dropouth" : ["float", 0.15],
|
||||
"dropoutx" : ["float", 0.75],
|
||||
"dropouti" : ["float", 0.2],
|
||||
"dropoute" : ["float", 0.1],
|
||||
|
||||
"nonmono" : ["int", 5],
|
||||
"alpha" : ["float", 0],
|
||||
"beta" : ["float", 1e-3],
|
||||
"wdecay" : ["float", 5e-7],
|
||||
|
||||
"max_seq_len_delta" : ["int", 20]
|
||||
}
|
8
configs/cos1800.config
Normal file
8
configs/cos1800.config
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"type" : ["str", "cosine"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 1800],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0001],
|
||||
"LR" : ["float", 0.2]
|
||||
}
|
8
configs/cos600.config
Normal file
8
configs/cos600.config
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"type" : ["str", "cosine"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 600],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0005],
|
||||
"LR" : ["float", 0.2]
|
||||
}
|
13
configs/nas-cifar-cos-cut-v1.config
Normal file
13
configs/nas-cifar-cos-cut-v1.config
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"type" : ["str", "cosine"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 600],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0005],
|
||||
"LR" : ["float", 0.025],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"cutout" : ["int", 16],
|
||||
"drop_path_prob" : ["float", 0.2]
|
||||
}
|
13
configs/nas-cifar-cos-cut.config
Normal file
13
configs/nas-cifar-cos-cut.config
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"type" : ["str", "cosine"],
|
||||
"batch_size": ["int", 96],
|
||||
"epochs" : ["int", 600],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0003],
|
||||
"LR" : ["float", 0.025],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"cutout" : ["int", 16],
|
||||
"drop_path_prob" : ["float", 0.2]
|
||||
}
|
13
configs/nas-cifar-cos-nocut.config
Normal file
13
configs/nas-cifar-cos-nocut.config
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"type" : ["str", "cosine"],
|
||||
"batch_size": ["int", 96],
|
||||
"epochs" : ["int", 600],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0003],
|
||||
"LR" : ["float", 0.025],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"cutout" : ["int", 0],
|
||||
"drop_path_prob" : ["float", 0.3]
|
||||
}
|
15
configs/nas-imagenet.config
Normal file
15
configs/nas-imagenet.config
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"type" : ["str", "steplr"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 250],
|
||||
"decay_period": ["int", 1],
|
||||
"gamma" : ["float", 0.97],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.00003],
|
||||
"LR" : ["float", 0.1],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"drop_path_prob" : ["float", 0]
|
||||
}
|
10
configs/pyramidC10.config
Normal file
10
configs/pyramidC10.config
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"type" : ["str", "multistep"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 300],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0001],
|
||||
"LR" : ["float", 0.1],
|
||||
"milestones": ["int", [150, 225]],
|
||||
"gammas" : ["float", [0.1, 0.1]]
|
||||
}
|
10
configs/pyramidC100.config
Normal file
10
configs/pyramidC100.config
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"type" : ["str", "multistep"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 300],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0001],
|
||||
"LR" : ["float", 0.5],
|
||||
"milestones": ["int", [150, 225]],
|
||||
"gammas" : ["float", [0.1, 0.1]]
|
||||
}
|
10
configs/resnet165.config
Normal file
10
configs/resnet165.config
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"type" : ["str", "multistep"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 165],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0001],
|
||||
"LR" : ["float", 0.01],
|
||||
"milestones": ["int", [1, 83, 124]],
|
||||
"gammas" : ["float", [10, 0.1, 0.1]]
|
||||
}
|
10
configs/resnet200.config
Normal file
10
configs/resnet200.config
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"type" : ["str", "multistep"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 200],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.0005],
|
||||
"LR" : ["float", 0.01],
|
||||
"milestones": ["int", [1 , 60, 120, 160]],
|
||||
"gammas" : ["float", [10, 0.2, 0.2, 0.2]]
|
||||
}
|
49
data/Get-PTB-WT2.sh
Normal file
49
data/Get-PTB-WT2.sh
Normal file
@ -0,0 +1,49 @@
|
||||
# https://github.com/salesforce/awd-lstm-lm
|
||||
echo "=== Acquiring datasets ==="
|
||||
echo "---"
|
||||
mkdir -p save
|
||||
|
||||
mkdir -p data
|
||||
cd data
|
||||
|
||||
echo "- Downloading WikiText-2 (WT2)"
|
||||
wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
|
||||
unzip -q wikitext-2-v1.zip
|
||||
cd wikitext-2
|
||||
mv wiki.train.tokens train.txt
|
||||
mv wiki.valid.tokens valid.txt
|
||||
mv wiki.test.tokens test.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading WikiText-103 (WT2)"
|
||||
wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
|
||||
unzip -q wikitext-103-v1.zip
|
||||
cd wikitext-103
|
||||
mv wiki.train.tokens train.txt
|
||||
mv wiki.valid.tokens valid.txt
|
||||
mv wiki.test.tokens test.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading Penn Treebank (PTB)"
|
||||
wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
|
||||
tar -xzf simple-examples.tgz
|
||||
|
||||
mkdir -p penn
|
||||
cd penn
|
||||
mv ../simple-examples/data/ptb.train.txt train.txt
|
||||
mv ../simple-examples/data/ptb.test.txt test.txt
|
||||
mv ../simple-examples/data/ptb.valid.txt valid.txt
|
||||
cd ..
|
||||
|
||||
echo "- Downloading Penn Treebank (Character)"
|
||||
mkdir -p pennchar
|
||||
cd pennchar
|
||||
mv ../simple-examples/data/ptb.char.train.txt train.txt
|
||||
mv ../simple-examples/data/ptb.char.test.txt test.txt
|
||||
mv ../simple-examples/data/ptb.char.valid.txt valid.txt
|
||||
cd ..
|
||||
|
||||
rm -rf simple-examples/
|
||||
|
||||
echo "---"
|
||||
echo "Happy language modeling :)"
|
90
data/README.BACK
Executable file
90
data/README.BACK
Executable file
@ -0,0 +1,90 @@
|
||||
# EraseReLU: A Simple Way to Ease the Training of Deep Convolution Neural Networks
|
||||
|
||||
This project implements [this paper](https://arxiv.org/abs/1709.07634) in [PyTorch](pytorch.org). The implementation refers to [ResNeXt-DenseNet](https://github.com/D-X-Y/ResNeXt-DenseNet)
|
||||
|
||||
## Usage
|
||||
All the model definations are located in the directory `models`.
|
||||
|
||||
All the training scripts are located in the directory `scripts` and `Xscripts`.
|
||||
|
||||
To train the ResNet-110 with EraseReLU on CIFAR-10:
|
||||
```bash
|
||||
sh scripts/warmup_train_2gpu.sh resnet110_erase cifar10
|
||||
```
|
||||
|
||||
To train the original ResNet-110 on CIFAR-10:
|
||||
```bash
|
||||
sh scripts/warmup_train_2gpu.sh resnet110 cifar10
|
||||
```
|
||||
|
||||
### MiniImageNet for PatchShuffle
|
||||
```
|
||||
sh scripts-shuffle/train_resnet_00000.sh ResNet18
|
||||
sh scripts-shuffle/train_resnet_10000.sh ResNet18
|
||||
sh scripts-shuffle/train_resnet_11000.sh ResNet18
|
||||
```
|
||||
|
||||
```
|
||||
sh scripts-shuffle/train_pmd_00000.sh PMDNet18_300
|
||||
sh scripts-shuffle/train_pmd_00000.sh PMDNet34_300
|
||||
sh scripts-shuffle/train_pmd_00000.sh PMDNet50_300
|
||||
|
||||
sh scripts-shuffle/train_pmd_11000.sh PMDNet18_300
|
||||
sh scripts-shuffle/train_pmd_11000.sh PMDNet34_300
|
||||
sh scripts-shuffle/train_pmd_11000.sh PMDNet50_300
|
||||
```
|
||||
|
||||
### ImageNet
|
||||
- Use the scripts `train_imagenet.sh` to train models in PyTorch.
|
||||
- Or you can use the codes in `extra_torch` to train models in Torch.
|
||||
|
||||
#### Group Noramlization
|
||||
```
|
||||
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 256
|
||||
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 64
|
||||
sh Xscripts/train_vgg_gn.sh 0,1,2,3,4,5,6,7 vgg16_gn 16
|
||||
sh Xscripts/train_res_gn.sh 0,1,2,3,4,5,6,7 resnext50_32_4_gn 16
|
||||
```
|
||||
|
||||
| Model | Batch Size | Top-1 Error | Top-5 Errpr |
|
||||
|:--------------:|:----------:|:-----------:|:-----------:|
|
||||
| VGG16-GN | 256 | 28.82 | 9.64 |
|
||||
|
||||
|
||||
## Results
|
||||
|
||||
| Model | Error on CIFAR-10 | Error on CIFAR-100|
|
||||
|:--------------:|:-----------------:|:-----------------:|
|
||||
| ResNet-56 | 6.97 | 30.60 |
|
||||
| ResNet-56 (ER) | 6.23 | 28.56 |
|
||||
|
||||
|
||||
## Citation
|
||||
If you find this project helos your research, please consider cite the paper:
|
||||
```
|
||||
@article{dong2017eraserelu,
|
||||
title={EraseReLU: A Simple Way to Ease the Training of Deep Convolution Neural Networks},
|
||||
author={Dong, Xuanyi and Kang, Guoliang and Zhan, Kun and Yang, Yi},
|
||||
journal={arXiv preprint arXiv:1709.07634},
|
||||
year={2017}
|
||||
}
|
||||
```
|
||||
|
||||
## Download the ImageNet dataset
|
||||
The ImageNet Large Scale Visual Recognition Challenge (ILSVRC) dataset has 1000 categories and 1.2 million images. The images do not need to be preprocessed or packaged in any database, but the validation images need to be moved into appropriate subfolders.
|
||||
|
||||
1. Download the images from http://image-net.org/download-images
|
||||
|
||||
2. Extract the training data:
|
||||
```bash
|
||||
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
|
||||
tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
|
||||
find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
|
||||
cd ..
|
||||
```
|
||||
|
||||
3. Extract the validation data and move images to subfolders:
|
||||
```bash
|
||||
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xvf ILSVRC2012_img_val.tar
|
||||
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
|
||||
```
|
5
data/README.md
Normal file
5
data/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Tiny-ImageNet
|
||||
The official website is [here](https://tiny-imagenet.herokuapp.com/). Please run `python tiny-imagenet.py` to generate the correct format of Tiny ImageNet for training.
|
||||
|
||||
# PTB and WT2
|
||||
`bash Get-PTB-WT2.sh`
|
1000
data/classes.txt
Normal file
1000
data/classes.txt
Normal file
File diff suppressed because it is too large
Load Diff
3761
data/data/penn/test.txt
Normal file
3761
data/data/penn/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
42068
data/data/penn/train.txt
Normal file
42068
data/data/penn/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
3370
data/data/penn/valid.txt
Normal file
3370
data/data/penn/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
4358
data/data/wikitext-2/test.txt
Normal file
4358
data/data/wikitext-2/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
36718
data/data/wikitext-2/train.txt
Normal file
36718
data/data/wikitext-2/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
3760
data/data/wikitext-2/valid.txt
Normal file
3760
data/data/wikitext-2/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
53
data/tiny-imagenet.py
Normal file
53
data/tiny-imagenet.py
Normal file
@ -0,0 +1,53 @@
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
|
||||
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
|
||||
|
||||
def load_val():
|
||||
path = 'tiny-imagenet-200/val/val_annotations.txt'
|
||||
cfile = open(path, 'r')
|
||||
content = cfile.readlines()
|
||||
content = [x.strip().split('\t') for x in content]
|
||||
cfile.close()
|
||||
images = [x[0] for x in content]
|
||||
labels = [x[1] for x in content]
|
||||
return images, labels
|
||||
|
||||
def main():
|
||||
os.system("wget {:}".format(url))
|
||||
os.system("rm -rf tiny-imagenet-200")
|
||||
os.system("unzip -o tiny-imagenet-200.zip")
|
||||
images, labels = load_val()
|
||||
savedir = 'tiny-imagenet-200/new_val'
|
||||
if not os.path.exists(savedir): os.makedirs(savedir)
|
||||
for image, label in zip(images, labels):
|
||||
cdir = savedir + '/' + label
|
||||
if not os.path.exists(cdir): os.makedirs(cdir)
|
||||
ori_path = 'tiny-imagenet-200/val/images/' + image
|
||||
os.system("cp {:} {:}".format(ori_path, cdir))
|
||||
os.system("rm -rf tiny-imagenet-200/val")
|
||||
os.system("mv {:} tiny-imagenet-200/val".format(savedir))
|
||||
|
||||
def generate_salt_pepper():
|
||||
targetdir = Path('tiny-imagenet-200/val')
|
||||
noisedir = Path('tiny-imagenet-200/val-noise')
|
||||
assert targetdir.exists(), '{:} does not exist'.format(targetdir)
|
||||
from imgaug import augmenters as iaa
|
||||
import cv2
|
||||
aug = iaa.SaltAndPepper(p=0.2)
|
||||
|
||||
for sub in targetdir.iterdir():
|
||||
if not sub.is_dir(): continue
|
||||
subdir = noisedir / sub.name
|
||||
if not subdir.exists(): os.makedirs('{:}'.format(subdir))
|
||||
images = sub.glob('*.JPEG')
|
||||
for image in images:
|
||||
I = cv2.imread(str(image))
|
||||
Inoise = aug.augment_image(I)
|
||||
savepath = subdir / image.name
|
||||
cv2.imwrite(str(savepath), Inoise)
|
||||
print ('{:} done'.format(sub))
|
||||
|
||||
if __name__ == "__main__":
|
||||
#main()
|
||||
generate_salt_pepper()
|
310
exps-cnn/acc_search_v2.py
Normal file
310
exps-cnn/acc_search_v2.py
Normal file
@ -0,0 +1,310 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import Cutout, count_parameters_in_MB
|
||||
from nas import Network, NetworkACC2, NetworkV3, NetworkV4, NetworkV5, NetworkFACC1
|
||||
from nas import return_alphas_str
|
||||
from train_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
Networks = {'base': Network, 'acc2': NetworkACC2, 'facc1': NetworkFACC1, 'NetworkV3': NetworkV3, 'NetworkV4': NetworkV4, 'NetworkV5': NetworkV5}
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("cifar")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
|
||||
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
|
||||
parser.add_argument('--batch_size', type=int, help='the batch size')
|
||||
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
|
||||
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
|
||||
parser.add_argument('--tau_max', type=float, help='initial tau')
|
||||
parser.add_argument('--tau_min', type=float, help='minimum tau')
|
||||
parser.add_argument('--momentum', type=float, help='momentum')
|
||||
parser.add_argument('--weight_decay', type=float, help='weight decay')
|
||||
parser.add_argument('--epochs', type=int, help='num of training epochs')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
#
|
||||
parser.add_argument('--init_channels', type=int, help='num of init channels')
|
||||
parser.add_argument('--layers', type=int, help='total number of layers')
|
||||
#
|
||||
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
|
||||
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
|
||||
parser.add_argument('--model_config', type=str , help='the model configuration')
|
||||
|
||||
# resume
|
||||
parser.add_argument('--resume', type=str , help='the resume path')
|
||||
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
|
||||
# split data
|
||||
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
|
||||
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
|
||||
# log
|
||||
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
args.dataset = args.dataset.lower()
|
||||
|
||||
# Mean + Std
|
||||
if args.dataset == 'cifar10':
|
||||
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
|
||||
std = [x / 255 for x in [63.0, 62.1, 66.7]]
|
||||
elif args.dataset == 'cifar100':
|
||||
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
|
||||
std = [x / 255 for x in [68.2, 65.4, 70.4]]
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Argumentation
|
||||
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
|
||||
transforms.Normalize(mean, std)]
|
||||
if args.cutout > 0 : lists += [Cutout(args.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Datasets
|
||||
if args.dataset == 'cifar10':
|
||||
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes = 10
|
||||
elif args.dataset == 'cifar100':
|
||||
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes = 100
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Loader
|
||||
if args.validate:
|
||||
indices = list(range(len(train_data)))
|
||||
split = int(args.train_portion * len(indices))
|
||||
random.shuffle(indices)
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
else:
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
|
||||
# network and criterion
|
||||
criterion = torch.nn.CrossEntropyLoss().cuda()
|
||||
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers)
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
|
||||
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
|
||||
|
||||
# optimizer and LR-scheduler
|
||||
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
#base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
|
||||
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
|
||||
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
|
||||
if args.resume is not None and os.path.isfile(args.resume):
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
|
||||
elif os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes = 0, {}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
config = load_config(args.model_config)
|
||||
|
||||
if args.only_base:
|
||||
print_log('---- Only Train the Searched Model ----', log)
|
||||
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
|
||||
return
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
base_scheduler.step()
|
||||
|
||||
basemodel.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
|
||||
#if epoch + 2 == args.epochs:
|
||||
# torch.cuda.empty_cache()
|
||||
# basemodel.set_gumbel(False)
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}], tau={:}'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size, basemodel.get_tau()), log)
|
||||
|
||||
genotype = basemodel.genotype()
|
||||
print_log('genotype = {:}'.format(genotype), log)
|
||||
|
||||
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
|
||||
|
||||
# training
|
||||
train_acc1, train_acc5, train_obj, train_time \
|
||||
= train(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
|
||||
total_train_time += train_time
|
||||
# validation
|
||||
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
|
||||
print_log('{:03d}/{:03d}, Train-Accuracy = {:.2f}, Test-Accuracy = {:.2f}'.format(epoch, args.epochs, train_acc1, valid_acc1), log)
|
||||
# save genotype
|
||||
genotypes[epoch] = basemodel.genotype()
|
||||
# save checkpoint
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict(),
|
||||
'base_scheduler' : base_scheduler.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
|
||||
log.close()
|
||||
|
||||
|
||||
def train(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
valid_iter = iter(valid_queue)
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(train_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# get a random minibatch from the search queue with replacement
|
||||
try:
|
||||
input_search, target_search = next(valid_iter)
|
||||
except:
|
||||
valid_iter = iter(valid_queue)
|
||||
input_search, target_search = next(valid_iter)
|
||||
|
||||
target_search = target_search.cuda(non_blocking=True)
|
||||
|
||||
# update the architecture
|
||||
arch_optimizer.zero_grad()
|
||||
output_search = model(input_search)
|
||||
arch_loss = criterion(output_search, target_search)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# update the parameters
|
||||
base_optimizer.zero_grad()
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
|
||||
base_optimizer.step()
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(train_queue):
|
||||
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
def infer(valid_queue, model, criterion, epoch, log):
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for step, (inputs, targets) in enumerate(valid_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
|
||||
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
397
exps-cnn/acc_search_v3.py
Normal file
397
exps-cnn/acc_search_v3.py
Normal file
@ -0,0 +1,397 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import Cutout, count_parameters_in_MB
|
||||
from nas import Network, NetworkACC2, NetworkV3, NetworkV4, NetworkV5, NetworkFACC1
|
||||
from nas import return_alphas_str
|
||||
from train_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
Networks = {'base': Network, 'acc2': NetworkACC2, 'facc1': NetworkFACC1, 'NetworkV3': NetworkV3, 'NetworkV4': NetworkV4, 'NetworkV5': NetworkV5}
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("cifar")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
|
||||
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
|
||||
parser.add_argument('--batch_size', type=int, help='the batch size')
|
||||
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
|
||||
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
|
||||
parser.add_argument('--tau_max', type=float, help='initial tau')
|
||||
parser.add_argument('--tau_min', type=float, help='minimum tau')
|
||||
parser.add_argument('--momentum', type=float, help='momentum')
|
||||
parser.add_argument('--weight_decay', type=float, help='weight decay')
|
||||
parser.add_argument('--epochs', type=int, help='num of training epochs')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
#
|
||||
parser.add_argument('--init_channels', type=int, help='num of init channels')
|
||||
parser.add_argument('--layers', type=int, help='total number of layers')
|
||||
#
|
||||
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
|
||||
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
|
||||
parser.add_argument('--model_config', type=str , help='the model configuration')
|
||||
|
||||
# resume
|
||||
parser.add_argument('--resume', type=str , help='the resume path')
|
||||
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
|
||||
# split data
|
||||
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
|
||||
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
|
||||
# log
|
||||
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
args.dataset = args.dataset.lower()
|
||||
|
||||
# Mean + Std
|
||||
if args.dataset == 'cifar10':
|
||||
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
|
||||
std = [x / 255 for x in [63.0, 62.1, 66.7]]
|
||||
elif args.dataset == 'cifar100':
|
||||
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
|
||||
std = [x / 255 for x in [68.2, 65.4, 70.4]]
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Argumentation
|
||||
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
|
||||
transforms.Normalize(mean, std)]
|
||||
if args.cutout > 0 : lists += [Cutout(args.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Datasets
|
||||
if args.dataset == 'cifar10':
|
||||
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes = 10
|
||||
elif args.dataset == 'cifar100':
|
||||
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes = 100
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Loader
|
||||
if args.validate:
|
||||
indices = list(range(len(train_data)))
|
||||
split = int(args.train_portion * len(indices))
|
||||
random.shuffle(indices)
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
else:
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
|
||||
# network and criterion
|
||||
criterion = torch.nn.CrossEntropyLoss().cuda()
|
||||
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers)
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
|
||||
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
|
||||
|
||||
# optimizer and LR-scheduler
|
||||
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
#base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
|
||||
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
|
||||
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
|
||||
if args.resume is not None and os.path.isfile(args.resume):
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
|
||||
elif os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes = 0, {}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
config = load_config(args.model_config)
|
||||
|
||||
if args.only_base:
|
||||
print_log('---- Only Train the Searched Model ----', log)
|
||||
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
|
||||
return
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
base_scheduler.step()
|
||||
|
||||
basemodel.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
|
||||
#if epoch + 1 == args.epochs:
|
||||
# torch.cuda.empty_cache()
|
||||
# basemodel.set_gumbel(False)
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}], tau={:}'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size, basemodel.get_tau()), log)
|
||||
|
||||
genotype = basemodel.genotype()
|
||||
print_log('genotype = {:}'.format(genotype), log)
|
||||
|
||||
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
|
||||
|
||||
# training
|
||||
if epoch + 1 == args.epochs:
|
||||
train_acc1, train_acc5, train_obj, train_time \
|
||||
= train_joint(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
|
||||
total_train_time += train_time
|
||||
else:
|
||||
train_acc1, train_acc5, train_obj, train_time \
|
||||
= train_base(train_loader, None, model, criterion, base_optimizer, None, epoch, log)
|
||||
total_train_time += train_time
|
||||
Arch__acc1, Arch__acc5, Arch__obj, train_time \
|
||||
= train_arch(None , test_loader, model, criterion, None, arch_optimizer, epoch, log)
|
||||
total_train_time += train_time
|
||||
# validation
|
||||
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
|
||||
print_log('{:03d}/{:03d}, Train-Accuracy = {:.2f}, Arch-Accuracy = {:.2f}, Test-Accuracy = {:.2f}'.format(epoch, args.epochs, train_acc1, Arch__acc1, valid_acc1), log)
|
||||
|
||||
# save genotype
|
||||
genotypes[epoch] = basemodel.genotype()
|
||||
# save checkpoint
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict(),
|
||||
'base_scheduler' : base_scheduler.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
|
||||
log.close()
|
||||
|
||||
|
||||
def train_base(train_queue, _, model, criterion, base_optimizer, __, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(train_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# update the parameters
|
||||
base_optimizer.zero_grad()
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
|
||||
base_optimizer.step()
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(train_queue):
|
||||
Sstr = ' TRAIN-BASE ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
def train_arch(_, valid_queue, model, criterion, __, arch_optimizer, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(valid_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# update the architecture
|
||||
arch_optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
arch_loss = criterion(outputs, targets)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
prec1, prec5 = obtain_accuracy(outputs.data, targets.data, topk=(1, 5))
|
||||
objs.update(arch_loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
|
||||
Sstr = ' TRAIN-ARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
def train_joint(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
valid_iter = iter(valid_queue)
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(train_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# get a random minibatch from the search queue with replacement
|
||||
try:
|
||||
input_search, target_search = next(valid_iter)
|
||||
except:
|
||||
valid_iter = iter(valid_queue)
|
||||
input_search, target_search = next(valid_iter)
|
||||
|
||||
target_search = target_search.cuda(non_blocking=True)
|
||||
|
||||
# update the architecture
|
||||
arch_optimizer.zero_grad()
|
||||
output_search = model(input_search)
|
||||
arch_loss = criterion(output_search, target_search)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# update the parameters
|
||||
base_optimizer.zero_grad()
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
|
||||
base_optimizer.step()
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(train_queue):
|
||||
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
def infer(valid_queue, model, criterion, epoch, log):
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for step, (inputs, targets) in enumerate(valid_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
|
||||
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
94
exps-cnn/cvpr-vis.py
Normal file
94
exps-cnn/cvpr-vis.py
Normal file
@ -0,0 +1,94 @@
|
||||
# python ./exps-nas/cvpr-vis.py --save_dir ./snapshots/NAS-VIS/
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from nas import DMS_V1, DMS_F1
|
||||
from nas_rnn import DARTS_V2, GDAS
|
||||
from graphviz import Digraph
|
||||
|
||||
parser = argparse.ArgumentParser("Visualize the Networks")
|
||||
parser.add_argument('--save_dir', type=str, help='The directory to save the network plot.')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def plot_cnn(genotype, filename):
|
||||
g = Digraph(
|
||||
format='pdf',
|
||||
edge_attr=dict(fontsize='20', fontname="times"),
|
||||
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
|
||||
engine='dot')
|
||||
g.body.extend(['rankdir=LR'])
|
||||
|
||||
g.node("c_{k-2}", fillcolor='darkseagreen2')
|
||||
g.node("c_{k-1}", fillcolor='darkseagreen2')
|
||||
assert len(genotype) % 2 == 0, '{:}'.format(genotype)
|
||||
steps = len(genotype) // 2
|
||||
|
||||
for i in range(steps):
|
||||
g.node(str(i), fillcolor='lightblue')
|
||||
|
||||
for i in range(steps):
|
||||
for k in [2*i, 2*i + 1]:
|
||||
op, j, weight = genotype[k]
|
||||
if j == 0:
|
||||
u = "c_{k-2}"
|
||||
elif j == 1:
|
||||
u = "c_{k-1}"
|
||||
else:
|
||||
u = str(j-2)
|
||||
v = str(i)
|
||||
g.edge(u, v, label=op, fillcolor="gray")
|
||||
|
||||
g.node("c_{k}", fillcolor='palegoldenrod')
|
||||
for i in range(steps):
|
||||
g.edge(str(i), "c_{k}", fillcolor="gray")
|
||||
|
||||
g.render(filename, view=False)
|
||||
|
||||
def plot_rnn(genotype, filename):
|
||||
g = Digraph(
|
||||
format='pdf',
|
||||
edge_attr=dict(fontsize='20', fontname="times"),
|
||||
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
|
||||
engine='dot')
|
||||
g.body.extend(['rankdir=LR'])
|
||||
|
||||
g.node("x_{t}", fillcolor='darkseagreen2')
|
||||
g.node("h_{t-1}", fillcolor='darkseagreen2')
|
||||
g.node("0", fillcolor='lightblue')
|
||||
g.edge("x_{t}", "0", fillcolor="gray")
|
||||
g.edge("h_{t-1}", "0", fillcolor="gray")
|
||||
steps = len(genotype)
|
||||
|
||||
for i in range(1, steps + 1):
|
||||
g.node(str(i), fillcolor='lightblue')
|
||||
|
||||
for i, (op, j) in enumerate(genotype):
|
||||
g.edge(str(j), str(i + 1), label=op, fillcolor="gray")
|
||||
|
||||
g.node("h_{t}", fillcolor='palegoldenrod')
|
||||
for i in range(1, steps + 1):
|
||||
g.edge(str(i), "h_{t}", fillcolor="gray")
|
||||
|
||||
g.render(filename, view=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
save_dir = Path(args.save_dir)
|
||||
|
||||
save_path = str(save_dir / 'DMS_V1-normal')
|
||||
plot_cnn(DMS_V1.normal, save_path)
|
||||
save_path = str(save_dir / 'DMS_V1-reduce')
|
||||
plot_cnn(DMS_V1.reduce, save_path)
|
||||
save_path = str(save_dir / 'DMS_F1-normal')
|
||||
plot_cnn(DMS_F1.normal, save_path)
|
||||
|
||||
save_path = str(save_dir / 'DARTS-V2-RNN')
|
||||
plot_rnn(DARTS_V2.recurrent, save_path)
|
||||
|
||||
save_path = str(save_dir / 'GDAS-V1-RNN')
|
||||
plot_rnn(GDAS.recurrent, save_path)
|
312
exps-cnn/meta_search.py
Normal file
312
exps-cnn/meta_search.py
Normal file
@ -0,0 +1,312 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from datasets import TieredImageNet, MetaBatchSampler
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import Cutout, count_parameters_in_MB
|
||||
from meta_nas import return_alphas_str, MetaNetwork
|
||||
from train_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
Networks = {'meta': MetaNetwork}
|
||||
|
||||
parser = argparse.ArgumentParser("cifar")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
|
||||
parser.add_argument('--n_way', type=int, help='N-WAY.')
|
||||
parser.add_argument('--k_shot', type=int, help='K-SHOT.')
|
||||
# Learning Parameters
|
||||
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
|
||||
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
|
||||
parser.add_argument('--momentum', type=float, help='momentum')
|
||||
parser.add_argument('--weight_decay', type=float, help='weight decay')
|
||||
parser.add_argument('--epochs', type=int, help='num of training epochs')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
#
|
||||
parser.add_argument('--init_channels', type=int, help='num of init channels')
|
||||
parser.add_argument('--layers', type=int, help='total number of layers')
|
||||
#
|
||||
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
|
||||
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
|
||||
parser.add_argument('--model_config', type=str , help='the model configuration')
|
||||
|
||||
# resume
|
||||
parser.add_argument('--resume', type=str , help='the resume path')
|
||||
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
|
||||
# split data
|
||||
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
|
||||
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
|
||||
# log
|
||||
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
|
||||
# Mean + Std
|
||||
means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
|
||||
# Data Argumentation
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(),
|
||||
transforms.Normalize(means, stds)]
|
||||
if args.cutout > 0 : lists += [Cutout(args.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(means, stds)])
|
||||
|
||||
train_data = TieredImageNet(args.data_path, 'train', train_transform)
|
||||
test_data = TieredImageNet(args.data_path, 'val' , test_transform )
|
||||
|
||||
train_sampler = MetaBatchSampler(train_data.labels, args.n_way, args.k_shot * 2, len(train_data) // (args.n_way*args.k_shot))
|
||||
test_sampler = MetaBatchSampler( test_data.labels, args.n_way, args.k_shot * 2, len( test_data) // (args.n_way*args.k_shot))
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_sampler=train_sampler)
|
||||
test_loader = torch.utils.data.DataLoader( test_data, batch_sampler= test_sampler)
|
||||
|
||||
# network
|
||||
basemodel = Networks[args.arch](args.init_channels, args.layers, head='imagenet')
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
|
||||
print_log("Train-transformation : {:}\nTest--transformation : {:}".format(train_transform, test_transform), log)
|
||||
|
||||
# optimizer and LR-scheduler
|
||||
#base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
base_optimizer = torch.optim.Adam(basemodel.base_parameters(), lr=args.learning_rate_max, betas=(0.5, 0.999), weight_decay=args.weight_decay)
|
||||
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
|
||||
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-meta-search.pth')
|
||||
if args.resume is not None and os.path.isfile(args.resume):
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
|
||||
elif os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes = 0, {}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
config = load_config(args.model_config)
|
||||
|
||||
if args.only_base:
|
||||
print_log('---- Only Train the Searched Model ----', log)
|
||||
CIFAR_DATA_DIR = os.environ['TORCH_HOME'] + '/cifar.python'
|
||||
main_procedure(config, 'cifar10', CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
|
||||
return
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
base_scheduler.step()
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr())), log)
|
||||
|
||||
genotype = basemodel.genotype()
|
||||
print_log('genotype = {:}'.format(genotype), log)
|
||||
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
|
||||
|
||||
# training
|
||||
train_acc1, train_obj, train_time \
|
||||
= train(train_loader, test_loader, model, args.n_way, base_optimizer, arch_optimizer, epoch, log)
|
||||
total_train_time += train_time
|
||||
# validation
|
||||
valid_acc1, valid_obj = infer(test_loader, model, epoch, args.n_way, log)
|
||||
|
||||
print_log('META -> {:}-way {:}-shot : {:03d}/{:03d} : Train Acc : {:.2f}, Test Acc : {:.2f}'.format(args.n_way, args.k_shot, epoch, args.epochs, train_acc1, valid_acc1), log)
|
||||
# save genotype
|
||||
genotypes[epoch] = basemodel.genotype()
|
||||
|
||||
# save checkpoint
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict(),
|
||||
'base_scheduler' : base_scheduler.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
CIFAR_DATA_DIR = os.environ['TORCH_HOME'] + '/cifar.python'
|
||||
print_log('test for CIFAR-10', log)
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, 'cifar10' , CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
|
||||
print_log('test for CIFAR-100', log)
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, 'cifar100', CIFAR_DATA_DIR, args, basemodel.genotype(), 36, 20, log)
|
||||
log.close()
|
||||
|
||||
|
||||
|
||||
def euclidean_dist(A, B):
|
||||
na, da = A.size()
|
||||
nb, db = B.size()
|
||||
assert da == db, 'invalid feature dim : {:} vs. {:}'.format(da, db)
|
||||
X, Y = A.view(na, 1, da), B.view(1, nb, db)
|
||||
return torch.pow(X-Y, 2).sum(2)
|
||||
|
||||
|
||||
|
||||
def get_loss(features, targets, n_way):
|
||||
classes = torch.unique(targets)
|
||||
shot = features.size(0) // n_way // 2
|
||||
|
||||
support_index, query_index, labels = [], [], []
|
||||
for idx, cls in enumerate( classes.tolist() ):
|
||||
indexs = (targets == cls).nonzero().view(-1).tolist()
|
||||
support_index.append(indexs[:shot])
|
||||
query_index += indexs[shot:]
|
||||
labels += [idx] * shot
|
||||
query_features = features[query_index, :]
|
||||
support_features = features[support_index, :]
|
||||
support_features = torch.mean(support_features, dim=1)
|
||||
|
||||
labels = torch.LongTensor(labels).cuda(non_blocking=True)
|
||||
logits = -euclidean_dist(query_features, support_features)
|
||||
loss = F.cross_entropy(logits, labels)
|
||||
accuracy = obtain_accuracy(logits.data, labels.data, topk=(1,))[0]
|
||||
return loss, accuracy
|
||||
|
||||
|
||||
|
||||
def train(train_queue, valid_queue, model, n_way, base_optimizer, arch_optimizer, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, accuracies = AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
valid_iter = iter(valid_queue)
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(train_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
#targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# get a random minibatch from the search queue with replacement
|
||||
try:
|
||||
input_search, target_search = next(valid_iter)
|
||||
except:
|
||||
valid_iter = iter(valid_queue)
|
||||
input_search, target_search = next(valid_iter)
|
||||
|
||||
#target_search = target_search.cuda(non_blocking=True)
|
||||
|
||||
# update the architecture
|
||||
arch_optimizer.zero_grad()
|
||||
feature_search = model(input_search)
|
||||
arch_loss, arch_accuracy = get_loss(feature_search, target_search, n_way)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# update the parameters
|
||||
base_optimizer.zero_grad()
|
||||
feature_model = model(inputs)
|
||||
model_loss, model_accuracy = get_loss(feature_model, targets, n_way)
|
||||
|
||||
model_loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
|
||||
base_optimizer.step()
|
||||
|
||||
objs.update(model_loss.item() , batch)
|
||||
accuracies.update(model_accuracy.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(train_queue):
|
||||
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f})'.format(loss=objs, top1=accuracies)
|
||||
Istr = 'I : {:}'.format( list(inputs.size()) )
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Istr, log)
|
||||
|
||||
return accuracies.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
|
||||
def infer(valid_queue, model, epoch, n_way, log):
|
||||
objs, accuracies = AverageMeter(), AverageMeter()
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for step, (inputs, targets) in enumerate(valid_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
#targets = targets.cuda(non_blocking=True)
|
||||
|
||||
features = model(inputs)
|
||||
loss, accuracy = get_loss(features, targets, n_way)
|
||||
|
||||
objs.update(loss.item() , batch)
|
||||
accuracies.update(accuracy.item(), batch)
|
||||
|
||||
if step % (args.print_freq*4) == 0 or (step+1) == len(valid_queue):
|
||||
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f})'.format(loss=objs, top1=accuracies)
|
||||
print_log(Sstr + ' ' + Lstr, log)
|
||||
|
||||
return accuracies.avg, objs.avg
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
96
exps-cnn/train_base.py
Normal file
96
exps-cnn/train_base.py
Normal file
@ -0,0 +1,96 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import Cutout, count_parameters_in_MB
|
||||
from nas import DARTS_V1, DARTS_V2, NASNet, PNASNet, AmoebaNet, ENASNet
|
||||
from nas import DMS_V1, DMS_F1, GDAS_CC
|
||||
from meta_nas import META_V1, META_V2
|
||||
from train_utils import main_procedure
|
||||
from train_utils_imagenet import main_procedure_imagenet
|
||||
from scheduler import load_config
|
||||
|
||||
models = {'DARTS_V1': DARTS_V1,
|
||||
'DARTS_V2': DARTS_V2,
|
||||
'NASNet' : NASNet,
|
||||
'PNASNet' : PNASNet,
|
||||
'ENASNet' : ENASNet,
|
||||
'DMS_V1' : DMS_V1,
|
||||
'DMS_F1' : DMS_F1,
|
||||
'GDAS_CC' : GDAS_CC,
|
||||
'META_V1' : META_V1,
|
||||
'META_V2' : META_V2,
|
||||
'AmoebaNet' : AmoebaNet}
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("cifar")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--dataset', type=str, choices=['imagenet', 'cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
|
||||
parser.add_argument('--arch', type=str, choices=models.keys(), help='the searched model.')
|
||||
#
|
||||
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
|
||||
parser.add_argument('--model_config', type=str , help='the model configuration')
|
||||
parser.add_argument('--init_channels', type=int , help='the initial number of channels')
|
||||
parser.add_argument('--layers', type=int , help='the number of layers.')
|
||||
|
||||
# log
|
||||
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
args.dataset = args.dataset.lower()
|
||||
|
||||
config = load_config(args.model_config)
|
||||
genotype = models[args.arch]
|
||||
print_log('configuration : {:}'.format(config), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
if args.dataset == 'imagenet':
|
||||
main_procedure_imagenet(config, args.data_path, args, genotype, args.init_channels, args.layers, log)
|
||||
else:
|
||||
main_procedure(config, args.dataset, args.data_path, args, genotype, args.init_channels, args.layers, log)
|
||||
log.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
312
exps-cnn/train_search.py
Normal file
312
exps-cnn/train_search.py
Normal file
@ -0,0 +1,312 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import Cutout, count_parameters_in_MB
|
||||
from datasets import TieredImageNet
|
||||
from nas import return_alphas_str, Network, NetworkV1, NetworkF1
|
||||
from train_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
Networks = {'base': Network, 'share': NetworkV1, 'fix': NetworkF1}
|
||||
|
||||
parser = argparse.ArgumentParser("CNN")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--dataset', type=str, choices=['cifar10', 'cifar100', 'tiered'], help='Choose between Cifar10/100 and TieredImageNet.')
|
||||
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='Choose networks.')
|
||||
parser.add_argument('--batch_size', type=int, help='the batch size')
|
||||
parser.add_argument('--learning_rate_max', type=float, help='initial learning rate')
|
||||
parser.add_argument('--learning_rate_min', type=float, help='minimum learning rate')
|
||||
parser.add_argument('--momentum', type=float, help='momentum')
|
||||
parser.add_argument('--weight_decay', type=float, help='weight decay')
|
||||
parser.add_argument('--epochs', type=int, help='num of training epochs')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
#
|
||||
parser.add_argument('--init_channels', type=int, help='num of init channels')
|
||||
parser.add_argument('--layers', type=int, help='total number of layers')
|
||||
#
|
||||
parser.add_argument('--cutout', type=int, help='cutout length, negative means no cutout')
|
||||
parser.add_argument('--grad_clip', type=float, help='gradient clipping')
|
||||
parser.add_argument('--model_config', type=str , help='the model configuration')
|
||||
|
||||
# resume
|
||||
parser.add_argument('--resume', type=str , help='the resume path')
|
||||
parser.add_argument('--only_base',action='store_true', default=False, help='only train the searched model')
|
||||
# split data
|
||||
parser.add_argument('--validate', action='store_true', default=False, help='split train-data int train/val or not')
|
||||
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
|
||||
# log
|
||||
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
args.dataset = args.dataset.lower()
|
||||
|
||||
# Mean + Std
|
||||
if args.dataset == 'cifar10':
|
||||
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
|
||||
std = [x / 255 for x in [63.0, 62.1, 66.7]]
|
||||
elif args.dataset == 'cifar100':
|
||||
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
|
||||
std = [x / 255 for x in [68.2, 65.4, 70.4]]
|
||||
elif args.dataset == 'tiered':
|
||||
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Argumentation
|
||||
if args.dataset == 'cifar10' or args.dataset == 'cifar100':
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
|
||||
transforms.Normalize(mean, std)]
|
||||
if args.cutout > 0 : lists += [Cutout(args.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
elif args.dataset == 'tiered':
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
|
||||
if args.cutout > 0 : lists += [Cutout(args.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Datasets
|
||||
if args.dataset == 'cifar10':
|
||||
train_data = dset.CIFAR10(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes, head = 10, 'cifar'
|
||||
elif args.dataset == 'cifar100':
|
||||
train_data = dset.CIFAR100(args.data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform , download=True)
|
||||
num_classes, head = 100, 'cifar'
|
||||
elif args.dataset == 'tiered':
|
||||
train_data = TieredImageNet(args.data_path, 'train-val', train_transform)
|
||||
test_data = None
|
||||
num_classes, head = train_data.n_classes, 'imagenet'
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(args.dataset))
|
||||
# Data Loader
|
||||
if args.validate:
|
||||
indices = list(range(len(train_data)))
|
||||
split = int(args.train_portion * len(indices))
|
||||
random.shuffle(indices)
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
test_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,
|
||||
sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
|
||||
pin_memory=True, num_workers=args.workers)
|
||||
else:
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
|
||||
|
||||
# network and criterion
|
||||
criterion = torch.nn.CrossEntropyLoss().cuda()
|
||||
basemodel = Networks[args.arch](args.init_channels, num_classes, args.layers, head=head)
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
print_log("Network : {:}".format(model), log)
|
||||
print_log("Parameter size = {:.3f} MB".format(count_parameters_in_MB(basemodel.base_parameters())), log)
|
||||
print_log("Train-transformation : {:}\nTest--transformation : {:}\nClass number : {:}".format(train_transform, test_transform, num_classes), log)
|
||||
|
||||
# optimizer and LR-scheduler
|
||||
base_optimizer = torch.optim.SGD (basemodel.base_parameters(), args.learning_rate_max, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(base_optimizer, float(args.epochs), eta_min=args.learning_rate_min)
|
||||
arch_optimizer = torch.optim.Adam(basemodel.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
|
||||
if args.resume is not None and os.path.isfile(args.resume):
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load resume from {:} with start-epoch = {:}'.format(args.resume, start_epoch), log)
|
||||
elif os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
base_scheduler.load_state_dict( checkpoint['base_scheduler'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes = 0, {}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
config = load_config(args.model_config)
|
||||
|
||||
if args.only_base:
|
||||
print_log('---- Only Train the Searched Model ----', log)
|
||||
main_procedure(config, args.dataset, args.data_path, args, basemodel.genotype(), 36, 20, log)
|
||||
return
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
base_scheduler.step()
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f} ~ {:6.4f}] [Batch={:d}]'.format(time_string(), epoch, args.epochs, need_time, min(base_scheduler.get_lr()), max(base_scheduler.get_lr()), args.batch_size), log)
|
||||
|
||||
genotype = basemodel.genotype()
|
||||
print_log('genotype = {:}'.format(genotype), log)
|
||||
|
||||
print_log('{:03d}/{:03d} alphas :\n{:}'.format(epoch, args.epochs, return_alphas_str(basemodel)), log)
|
||||
|
||||
# training
|
||||
train_acc1, train_acc5, train_obj, train_time \
|
||||
= train(train_loader, test_loader, model, criterion, base_optimizer, arch_optimizer, epoch, log)
|
||||
total_train_time += train_time
|
||||
# validation
|
||||
valid_acc1, valid_acc5, valid_obj = infer(test_loader, model, criterion, epoch, log)
|
||||
print_log('Base-Search : {:03d}/{:03d} : Train-Acc={:.3f}, Test-Acc={:.3f}'.format(epoch, args.epochs, train_acc1, valid_acc1), log)
|
||||
# save genotype
|
||||
genotypes[epoch] = basemodel.genotype()
|
||||
# save checkpoint
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict(),
|
||||
'base_scheduler' : base_scheduler.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, 'cifar10', os.environ['TORCH_HOME'] + '/cifar.python', args, basemodel.genotype(), 36, 20, log)
|
||||
log.close()
|
||||
|
||||
|
||||
def train(train_queue, valid_queue, model, criterion, base_optimizer, arch_optimizer, epoch, log):
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
model.train()
|
||||
|
||||
valid_iter = iter(valid_queue)
|
||||
end = time.time()
|
||||
for step, (inputs, targets) in enumerate(train_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
|
||||
#inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
# get a random minibatch from the search queue with replacement
|
||||
try:
|
||||
input_search, target_search = next(valid_iter)
|
||||
except:
|
||||
valid_iter = iter(valid_queue)
|
||||
input_search, target_search = next(valid_iter)
|
||||
|
||||
target_search = target_search.cuda(non_blocking=True)
|
||||
|
||||
# update the architecture
|
||||
arch_optimizer.zero_grad()
|
||||
output_search = model(input_search)
|
||||
arch_loss = criterion(output_search, target_search)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# update the parameters
|
||||
base_optimizer.zero_grad()
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.module.base_parameters(), args.grad_clip)
|
||||
base_optimizer.step()
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(train_queue):
|
||||
Sstr = ' TRAIN-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(train_queue))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg, batch_time.sum
|
||||
|
||||
|
||||
def infer(valid_queue, model, criterion, epoch, log):
|
||||
objs, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for step, (inputs, targets) in enumerate(valid_queue):
|
||||
batch, C, H, W = inputs.size()
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
|
||||
logits = model(inputs)
|
||||
loss = criterion(logits, targets)
|
||||
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
objs.update(loss.item() , batch)
|
||||
top1.update(prec1.item(), batch)
|
||||
top5.update(prec5.item(), batch)
|
||||
|
||||
if step % args.print_freq == 0 or (step+1) == len(valid_queue):
|
||||
Sstr = ' VALID-SEARCH ' + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, step, len(valid_queue))
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=objs, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Lstr, log)
|
||||
|
||||
return top1.avg, top5.avg, objs.avg
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
184
exps-cnn/train_utils.py
Normal file
184
exps-cnn/train_utils.py
Normal file
@ -0,0 +1,184 @@
|
||||
import os, sys, time
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torchvision.datasets as dset
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
from utils import print_log, obtain_accuracy, AverageMeter
|
||||
from utils import time_string, convert_secs2time
|
||||
from utils import count_parameters_in_MB
|
||||
from utils import Cutout
|
||||
from nas import NetworkCIFAR as Network
|
||||
|
||||
def obtain_best(accuracies):
|
||||
if len(accuracies) == 0: return (0, 0)
|
||||
tops = [value for key, value in accuracies.items()]
|
||||
s2b = sorted( tops )
|
||||
return s2b[-1]
|
||||
|
||||
def main_procedure(config, dataset, data_path, args, genotype, init_channels, layers, log):
|
||||
|
||||
# Mean + Std
|
||||
if dataset == 'cifar10':
|
||||
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
|
||||
std = [x / 255 for x in [63.0, 62.1, 66.7]]
|
||||
elif dataset == 'cifar100':
|
||||
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
|
||||
std = [x / 255 for x in [68.2, 65.4, 70.4]]
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(dataset))
|
||||
# Dataset Transformation
|
||||
if dataset == 'cifar10' or dataset == 'cifar100':
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
|
||||
transforms.Normalize(mean, std)]
|
||||
if config.cutout > 0 : lists += [Cutout(config.cutout)]
|
||||
train_transform = transforms.Compose(lists)
|
||||
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(dataset))
|
||||
# Dataset Defination
|
||||
if dataset == 'cifar10':
|
||||
train_data = dset.CIFAR10(data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR10(data_path, train=False, transform=test_transform , download=True)
|
||||
class_num = 10
|
||||
elif dataset == 'cifar100':
|
||||
train_data = dset.CIFAR100(data_path, train= True, transform=train_transform, download=True)
|
||||
test_data = dset.CIFAR100(data_path, train=False, transform=test_transform , download=True)
|
||||
class_num = 100
|
||||
else:
|
||||
raise TypeError("Unknow dataset : {:}".format(dataset))
|
||||
|
||||
|
||||
print_log('-------------------------------------- main-procedure', log)
|
||||
print_log('config : {:}'.format(config), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('init_channels : {:}'.format(init_channels), log)
|
||||
print_log('layers : {:}'.format(layers), log)
|
||||
print_log('class_num : {:}'.format(class_num), log)
|
||||
basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
|
||||
total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
|
||||
print_log('Network =>\n{:}'.format(basemodel), log)
|
||||
print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
|
||||
print_log('config : {:}'.format(config), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('args : {:}'.format(args), log)
|
||||
print_log('Train-Dataset : {:}'.format(train_data), log)
|
||||
print_log('Train-Trans : {:}'.format(train_transform), log)
|
||||
print_log('Test--Dataset : {:}'.format(test_data ), log)
|
||||
print_log('Test--Trans : {:}'.format(test_transform ), log)
|
||||
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
test_loader = torch.utils.data.DataLoader(test_data , batch_size=config.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
|
||||
criterion = torch.nn.CrossEntropyLoss().cuda()
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay)
|
||||
#optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nestero=True)
|
||||
if config.type == 'cosine':
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs))
|
||||
else:
|
||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
|
||||
if os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load( checkpoint_path )
|
||||
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
scheduler.load_state_dict(checkpoint['scheduler'])
|
||||
accuracies = checkpoint['accuracies']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, accuracies = 0, {}
|
||||
print_log('Train model from scratch without pre-trained model or snapshot', log)
|
||||
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time = time.time(), AverageMeter()
|
||||
for epoch in range(start_epoch, config.epochs):
|
||||
scheduler.step()
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
|
||||
print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size), log)
|
||||
|
||||
basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
|
||||
|
||||
train_acc1, train_acc5, train_los = _train(train_loader, model, criterion, optimizer, 'train', epoch, config, args.print_freq, log)
|
||||
|
||||
with torch.no_grad():
|
||||
valid_acc1, valid_acc5, valid_los = _train(test_loader, model, criterion, optimizer, 'test', epoch, config, args.print_freq, log)
|
||||
accuracies[epoch] = (valid_acc1, valid_acc5)
|
||||
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
'scheduler' : scheduler.state_dict(),
|
||||
'accuracies': accuracies},
|
||||
checkpoint_path)
|
||||
best_acc = obtain_best( accuracies )
|
||||
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
|
||||
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
|
||||
if mode == 'train':
|
||||
model.train()
|
||||
elif mode == 'test':
|
||||
model.eval()
|
||||
else: raise ValueError("The mode is not right : {:}".format(mode))
|
||||
|
||||
end = time.time()
|
||||
for i, (inputs, targets) in enumerate(xloader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
# calculate prediction and loss
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
|
||||
if mode == 'train': optimizer.zero_grad()
|
||||
|
||||
if config.auxiliary and model.training:
|
||||
logits, logits_aux = model(inputs)
|
||||
else:
|
||||
logits = model(inputs)
|
||||
|
||||
loss = criterion(logits, targets)
|
||||
if config.auxiliary and model.training:
|
||||
loss_aux = criterion(logits_aux, targets)
|
||||
loss += config.auxiliary_weight * loss_aux
|
||||
|
||||
if mode == 'train':
|
||||
loss.backward()
|
||||
if config.grad_clip > 0:
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
|
||||
optimizer.step()
|
||||
# record
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
losses.update(loss.item(), inputs.size(0))
|
||||
top1.update (prec1.item(), inputs.size(0))
|
||||
top5.update (prec5.item(), inputs.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % print_freq == 0 or (i+1) == len(xloader):
|
||||
Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
|
||||
return top1.avg, top5.avg, losses.avg
|
207
exps-cnn/train_utils_imagenet.py
Normal file
207
exps-cnn/train_utils_imagenet.py
Normal file
@ -0,0 +1,207 @@
|
||||
import os, sys, time
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dset
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
from utils import print_log, obtain_accuracy, AverageMeter
|
||||
from utils import time_string, convert_secs2time
|
||||
from utils import count_parameters_in_MB
|
||||
from utils import print_FLOPs
|
||||
from utils import Cutout
|
||||
from nas import NetworkImageNet as Network
|
||||
|
||||
|
||||
def obtain_best(accuracies):
|
||||
if len(accuracies) == 0: return (0, 0)
|
||||
tops = [value for key, value in accuracies.items()]
|
||||
s2b = sorted( tops )
|
||||
return s2b[-1]
|
||||
|
||||
|
||||
class CrossEntropyLabelSmooth(nn.Module):
|
||||
|
||||
def __init__(self, num_classes, epsilon):
|
||||
super(CrossEntropyLabelSmooth, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.epsilon = epsilon
|
||||
self.logsoftmax = nn.LogSoftmax(dim=1)
|
||||
|
||||
def forward(self, inputs, targets):
|
||||
log_probs = self.logsoftmax(inputs)
|
||||
targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
|
||||
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
|
||||
loss = (-targets * log_probs).mean(0).sum()
|
||||
return loss
|
||||
|
||||
|
||||
def main_procedure_imagenet(config, data_path, args, genotype, init_channels, layers, log):
|
||||
|
||||
# training data and testing data
|
||||
traindir = os.path.join(data_path, 'train')
|
||||
validdir = os.path.join(data_path, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_data = dset.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ColorJitter(
|
||||
brightness=0.4,
|
||||
contrast=0.4,
|
||||
saturation=0.4,
|
||||
hue=0.2),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
valid_data = dset.ImageFolder(
|
||||
validdir,
|
||||
transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
train_queue = torch.utils.data.DataLoader(
|
||||
train_data, batch_size=config.batch_size, shuffle= True, pin_memory=True, num_workers=args.workers)
|
||||
|
||||
valid_queue = torch.utils.data.DataLoader(
|
||||
valid_data, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)
|
||||
|
||||
class_num = 1000
|
||||
|
||||
|
||||
print_log('-------------------------------------- main-procedure', log)
|
||||
print_log('config : {:}'.format(config), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('init_channels : {:}'.format(init_channels), log)
|
||||
print_log('layers : {:}'.format(layers), log)
|
||||
print_log('class_num : {:}'.format(class_num), log)
|
||||
basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
|
||||
model = torch.nn.DataParallel(basemodel).cuda()
|
||||
|
||||
total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
|
||||
print_log('Network =>\n{:}'.format(basemodel), log)
|
||||
#print_FLOPs(basemodel, (1,3,224,224), [print_log, log])
|
||||
print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
|
||||
print_log('config : {:}'.format(config), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('Train-Dataset : {:}'.format(train_data), log)
|
||||
print_log('Valid--Dataset : {:}'.format(valid_data), log)
|
||||
print_log('Args : {:}'.format(args), log)
|
||||
|
||||
|
||||
criterion = torch.nn.CrossEntropyLoss().cuda()
|
||||
criterion_smooth = CrossEntropyLabelSmooth(class_num, config.label_smooth).cuda()
|
||||
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay)
|
||||
#optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nestero=True)
|
||||
if config.type == 'cosine':
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs))
|
||||
elif config.type == 'steplr':
|
||||
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.decay_period, gamma=config.gamma)
|
||||
else:
|
||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||
|
||||
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth')
|
||||
if os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load( checkpoint_path )
|
||||
|
||||
start_epoch = checkpoint['epoch']
|
||||
basemodel.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
scheduler.load_state_dict(checkpoint['scheduler'])
|
||||
accuracies = checkpoint['accuracies']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, accuracies = 0, {}
|
||||
print_log('Train model from scratch without pre-trained model or snapshot', log)
|
||||
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time = time.time(), AverageMeter()
|
||||
for epoch in range(start_epoch, config.epochs):
|
||||
scheduler.step()
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
|
||||
print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size), log)
|
||||
|
||||
basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
|
||||
|
||||
train_acc1, train_acc5, train_los = _train(train_queue, model, criterion_smooth, optimizer, 'train', epoch, config, args.print_freq, log)
|
||||
|
||||
with torch.no_grad():
|
||||
valid_acc1, valid_acc5, valid_los = _train(valid_queue, model, criterion, None, 'test' , epoch, config, args.print_freq, log)
|
||||
accuracies[epoch] = (valid_acc1, valid_acc5)
|
||||
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': basemodel.state_dict(),
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
'scheduler' : scheduler.state_dict(),
|
||||
'accuracies': accuracies},
|
||||
checkpoint_path)
|
||||
best_acc = obtain_best( accuracies )
|
||||
print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
|
||||
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
|
||||
if mode == 'train':
|
||||
model.train()
|
||||
elif mode == 'test':
|
||||
model.eval()
|
||||
else: raise ValueError("The mode is not right : {:}".format(mode))
|
||||
|
||||
end = time.time()
|
||||
for i, (inputs, targets) in enumerate(xloader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
# calculate prediction and loss
|
||||
targets = targets.cuda(non_blocking=True)
|
||||
|
||||
if mode == 'train': optimizer.zero_grad()
|
||||
|
||||
if config.auxiliary and model.training:
|
||||
logits, logits_aux = model(inputs)
|
||||
else:
|
||||
logits = model(inputs)
|
||||
|
||||
loss = criterion(logits, targets)
|
||||
if config.auxiliary and model.training:
|
||||
loss_aux = criterion(logits_aux, targets)
|
||||
loss += config.auxiliary_weight * loss_aux
|
||||
|
||||
if mode == 'train':
|
||||
loss.backward()
|
||||
if config.grad_clip > 0:
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
|
||||
optimizer.step()
|
||||
# record
|
||||
prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
|
||||
losses.update(loss.item(), inputs.size(0))
|
||||
top1.update (prec1.item(), inputs.size(0))
|
||||
top5.update (prec5.item(), inputs.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % print_freq == 0 or (i+1) == len(xloader):
|
||||
Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
|
||||
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
|
||||
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
|
||||
print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
|
||||
|
||||
print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
|
||||
return top1.avg, top5.avg, losses.avg
|
69
exps-cnn/vis-arch.py
Normal file
69
exps-cnn/vis-arch.py
Normal file
@ -0,0 +1,69 @@
|
||||
import os, sys, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from graphviz import Digraph
|
||||
|
||||
parser = argparse.ArgumentParser("Visualize the Networks")
|
||||
parser.add_argument('--checkpoint', type=str, help='The path to the checkpoint.')
|
||||
parser.add_argument('--save_dir', type=str, help='The directory to save the network plot.')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def plot(genotype, filename):
|
||||
g = Digraph(
|
||||
format='pdf',
|
||||
edge_attr=dict(fontsize='20', fontname="times"),
|
||||
node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
|
||||
engine='dot')
|
||||
g.body.extend(['rankdir=LR'])
|
||||
|
||||
g.node("c_{k-2}", fillcolor='darkseagreen2')
|
||||
g.node("c_{k-1}", fillcolor='darkseagreen2')
|
||||
assert len(genotype) % 2 == 0
|
||||
steps = len(genotype) // 2
|
||||
|
||||
for i in range(steps):
|
||||
g.node(str(i), fillcolor='lightblue')
|
||||
|
||||
for i in range(steps):
|
||||
for k in [2*i, 2*i + 1]:
|
||||
op, j, weight = genotype[k]
|
||||
if j == 0:
|
||||
u = "c_{k-2}"
|
||||
elif j == 1:
|
||||
u = "c_{k-1}"
|
||||
else:
|
||||
u = str(j-2)
|
||||
v = str(i)
|
||||
g.edge(u, v, label=op, fillcolor="gray")
|
||||
|
||||
g.node("c_{k}", fillcolor='palegoldenrod')
|
||||
for i in range(steps):
|
||||
g.edge(str(i), "c_{k}", fillcolor="gray")
|
||||
|
||||
g.render(filename, view=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
checkpoint = args.checkpoint
|
||||
assert os.path.isfile(checkpoint), 'Invalid path for checkpoint : {:}'.format(checkpoint)
|
||||
checkpoint = torch.load( checkpoint, map_location='cpu' )
|
||||
genotypes = checkpoint['genotypes']
|
||||
save_dir = Path(args.save_dir)
|
||||
subs = ['normal', 'reduce']
|
||||
for sub in subs:
|
||||
if not (save_dir / sub).exists():
|
||||
(save_dir / sub).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for key, network in genotypes.items():
|
||||
save_path = str(save_dir / 'normal' / 'epoch-{:03d}'.format( int(key) ))
|
||||
print('save into {:}'.format(save_path))
|
||||
plot(network.normal, save_path)
|
||||
|
||||
save_path = str(save_dir / 'reduce' / 'epoch-{:03d}'.format( int(key) ))
|
||||
print('save into {:}'.format(save_path))
|
||||
plot(network.reduce, save_path)
|
276
exps-rnn/acc_rnn_search.py
Normal file
276
exps-rnn/acc_rnn_search.py
Normal file
@ -0,0 +1,276 @@
|
||||
import os, gc, sys, math, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import count_parameters_in_MB
|
||||
from datasets import Corpus
|
||||
from nas_rnn import batchify, get_batch, repackage_hidden
|
||||
from nas_rnn import DARTSCellSearch, RNNModelSearch
|
||||
from train_rnn_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
parser = argparse.ArgumentParser("RNN")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--emsize', type=int, default=300, help='size of word embeddings')
|
||||
parser.add_argument('--nhid', type=int, default=300, help='number of hidden units per layer')
|
||||
parser.add_argument('--nhidlast', type=int, default=300, help='number of hidden units for the last rnn layer')
|
||||
parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping')
|
||||
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
|
||||
parser.add_argument('--batch_size', type=int, default=256, help='the batch size')
|
||||
parser.add_argument('--eval_batch_size', type=int, default=10, help='the evaluation batch size')
|
||||
parser.add_argument('--bptt', type=int, default=35, help='the sequence length')
|
||||
# DropOut
|
||||
parser.add_argument('--dropout', type=float, default=0.75, help='dropout applied to layers (0 = no dropout)')
|
||||
parser.add_argument('--dropouth', type=float, default=0.25, help='dropout for hidden nodes in rnn layers (0 = no dropout)')
|
||||
parser.add_argument('--dropoutx', type=float, default=0.75, help='dropout for input nodes in rnn layers (0 = no dropout)')
|
||||
parser.add_argument('--dropouti', type=float, default=0.2, help='dropout for input embedding layers (0 = no dropout)')
|
||||
parser.add_argument('--dropoute', type=float, default=0, help='dropout to remove words from embedding layer (0 = no dropout)')
|
||||
# Regularization
|
||||
parser.add_argument('--lr', type=float, default=20, help='initial learning rate')
|
||||
parser.add_argument('--alpha', type=float, default=0, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
|
||||
parser.add_argument('--beta', type=float, default=1e-3, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
|
||||
parser.add_argument('--wdecay', type=float, default=5e-7, help='weight decay applied to all weights')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_lr', type=float, default=3e-3, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_wdecay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
|
||||
# acceleration
|
||||
parser.add_argument('--tau_max', type=float, help='initial tau')
|
||||
parser.add_argument('--tau_min', type=float, help='minimum tau')
|
||||
# log
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
if args.nhidlast < 0:
|
||||
args.nhidlast = args.emsize
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
|
||||
# Dataset
|
||||
corpus = Corpus(args.data_path)
|
||||
train_data = batchify(corpus.train, args.batch_size, True)
|
||||
search_data = batchify(corpus.valid, args.batch_size, True)
|
||||
valid_data = batchify(corpus.valid, args.eval_batch_size, True)
|
||||
print_log("Train--Data Size : {:}".format(train_data.size()), log)
|
||||
print_log("Search-Data Size : {:}".format(search_data.size()), log)
|
||||
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
|
||||
|
||||
ntokens = len(corpus.dictionary)
|
||||
model = RNNModelSearch(ntokens, args.emsize, args.nhid, args.nhidlast,
|
||||
args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute,
|
||||
DARTSCellSearch, None)
|
||||
model = model.cuda()
|
||||
print_log('model ==>> : {:}'.format(model), log)
|
||||
print_log('Parameter size : {:} MB'.format(count_parameters_in_MB(model)), log)
|
||||
|
||||
base_optimizer = torch.optim.SGD(model.base_parameters(), lr=args.lr, weight_decay=args.wdecay)
|
||||
arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_lr, weight_decay=args.arch_wdecay)
|
||||
|
||||
config = load_config(args.config_path)
|
||||
print_log('Load config from {:} ==>>\n {:}'.format(args.config_path, config), log)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
|
||||
if os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
model.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
valid_losses = checkpoint['valid_losses']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes, valid_losses = 0, {}, {-1:1e8}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
model.set_gumbel(True, False)
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
|
||||
model.set_tau( args.tau_max - epoch*1.0/args.epochs*(args.tau_max-args.tau_min) )
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} || tau={:}'.format(time_string(), epoch, args.epochs, need_time, model.get_tau()), log)
|
||||
|
||||
# training
|
||||
data_time, train_time = train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log)
|
||||
total_train_time += train_time
|
||||
# evaluation
|
||||
|
||||
# validation
|
||||
valid_loss = infer(model, corpus, valid_data, args.eval_batch_size)
|
||||
# save genotype
|
||||
if valid_loss < min( valid_losses.values() ): is_best = True
|
||||
else : is_best = False
|
||||
print_log('-'*10 + ' [Epoch={:03d}/{:03d}] : is-best={:}, validation-loss={:}, validation-PPL={:}'.format(epoch, args.epochs, is_best, valid_loss, math.exp(valid_loss)), log)
|
||||
print_log('{:}'.format(F.softmax(model.arch_weights, dim=-1)), log)
|
||||
print_log('genotype : {:}'.format(model.genotype()), log)
|
||||
|
||||
valid_losses[epoch] = valid_loss
|
||||
genotypes[epoch] = model.genotype()
|
||||
print_log(' the {:}-th genotype = {:}'.format(epoch, genotypes[epoch]), log)
|
||||
# save checkpoint
|
||||
if is_best:
|
||||
genotypes['best'] = model.genotype()
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': model.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'valid_losses' : valid_losses,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, genotypes['best'], args.save_path, args.print_freq, log)
|
||||
log.close()
|
||||
|
||||
|
||||
def train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log):
|
||||
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
# Turn on training mode which enables dropout.
|
||||
total_loss = 0
|
||||
start_time = time.time()
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden_train, hidden_valid = model.init_hidden(args.batch_size), model.init_hidden(args.batch_size)
|
||||
|
||||
batch, i = 0, 0
|
||||
|
||||
while i < train_data.size(0) - 1 - 1:
|
||||
seq_len = int( args.bptt if np.random.random() < 0.95 else args.bptt / 2. )
|
||||
# Prevent excessively small or negative sequence lengths
|
||||
# seq_len = max(5, int(np.random.normal(bptt, 5)))
|
||||
# # There's a very small chance that it could select a very long sequence length resulting in OOM
|
||||
# seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
|
||||
for param_group in base_optimizer.param_groups:
|
||||
param_group['lr'] *= float( seq_len / args.bptt )
|
||||
|
||||
model.train()
|
||||
|
||||
data_valid, targets_valid = get_batch(search_data, i % (search_data.size(0) - 1), args.bptt)
|
||||
data_train, targets_train = get_batch(train_data , i, seq_len)
|
||||
|
||||
hidden_train = repackage_hidden(hidden_train)
|
||||
hidden_valid = repackage_hidden(hidden_valid)
|
||||
|
||||
data_time.update(time.time() - start_time)
|
||||
|
||||
# validation loss
|
||||
targets_valid = targets_valid.contiguous().view(-1)
|
||||
|
||||
arch_optimizer.step()
|
||||
log_prob, hidden_valid = model(data_valid, hidden_valid, return_h=False)
|
||||
arch_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_valid)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# model update
|
||||
base_optimizer.zero_grad()
|
||||
targets_train = targets_train.contiguous().view(-1)
|
||||
|
||||
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data_train, hidden_train, return_h=True)
|
||||
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_train)
|
||||
|
||||
loss = raw_loss
|
||||
# Activiation Regularization
|
||||
if args.alpha > 0:
|
||||
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
|
||||
# Temporal Activation Regularization (slowness)
|
||||
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
|
||||
loss.backward()
|
||||
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
|
||||
nn.utils.clip_grad_norm_(model.base_parameters(), args.clip)
|
||||
base_optimizer.step()
|
||||
|
||||
for param_group in base_optimizer.param_groups:
|
||||
param_group['lr'] /= float( seq_len / args.bptt )
|
||||
|
||||
total_loss += raw_loss.item()
|
||||
gc.collect()
|
||||
|
||||
batch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
batch, i = batch + 1, i + seq_len
|
||||
|
||||
if batch % args.print_freq == 0 or i >= train_data.size(0) - 1 - 1:
|
||||
print_log(' || Epoch: {:03d} :: {:03d}/{:03d} '.format(epoch, batch, len(train_data) // args.bptt), log)
|
||||
#print_log(' || Epoch: {:03d} :: {:03d}/{:03d} = {:}'.format(epoch, batch, len(train_data) // args.bptt, model.genotype()), log)
|
||||
cur_loss = total_loss / args.print_freq
|
||||
print_log(' [TRAIN] Time : data {:.3f} ({:.3f}) batch {:.3f} ({:.3f}) Loss : {:}, PPL : {:}'.format(data_time.val, data_time.avg, batch_time.val, batch_time.avg, cur_loss, math.exp(cur_loss)), log)
|
||||
#print(F.softmax(model.arch_weights, dim=-1))
|
||||
total_loss = 0
|
||||
|
||||
return data_time.sum, batch_time.sum
|
||||
|
||||
|
||||
def infer(model, corpus, data_source, batch_size):
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
total_loss = 0
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden = model.init_hidden(batch_size)
|
||||
for i in range(0, data_source.size(0) - 1, args.bptt):
|
||||
data, targets = get_batch(data_source, i, args.bptt)
|
||||
targets = targets.view(-1)
|
||||
|
||||
log_prob, hidden = model(data, hidden)
|
||||
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
|
||||
|
||||
total_loss += loss.item() * len(data)
|
||||
|
||||
hidden = repackage_hidden(hidden)
|
||||
return total_loss / len(data_source)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
75
exps-rnn/debug_test.py
Normal file
75
exps-rnn/debug_test.py
Normal file
@ -0,0 +1,75 @@
|
||||
import os, gc, sys, time, math
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import print_log, obtain_accuracy, AverageMeter
|
||||
from utils import time_string, convert_secs2time
|
||||
from utils import count_parameters_in_MB
|
||||
from datasets import Corpus
|
||||
from nas_rnn import batchify, get_batch, repackage_hidden
|
||||
from nas_rnn import DARTS
|
||||
from nas_rnn import DARTSCell, RNNModel
|
||||
from nas_rnn import basemodel as model
|
||||
from scheduler import load_config
|
||||
|
||||
|
||||
def main_procedure(config, genotype, print_freq, log):
|
||||
|
||||
print_log('-'*90, log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('config : {:}'.format(config.bptt), log)
|
||||
|
||||
corpus = Corpus(config.data_path)
|
||||
train_data = batchify(corpus.train, config.train_batch, True)
|
||||
valid_data = batchify(corpus.valid, config.eval_batch , True)
|
||||
test_data = batchify(corpus.test, config.test_batch , True)
|
||||
ntokens = len(corpus.dictionary)
|
||||
print_log("Train--Data Size : {:}".format(train_data.size()), log)
|
||||
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
|
||||
print_log("Test---Data Size : {:}".format( test_data.size()), log)
|
||||
print_log("ntokens = {:}".format(ntokens), log)
|
||||
|
||||
model = RNNModel(ntokens, config.emsize, config.nhid, config.nhidlast,
|
||||
config.dropout, config.dropouth, config.dropoutx, config.dropouti, config.dropoute,
|
||||
cell_cls=DARTSCell, genotype=genotype)
|
||||
model = model.cuda()
|
||||
print_log('Network =>\n{:}'.format(model), log)
|
||||
print_log('Genotype : {:}'.format(genotype), log)
|
||||
print_log('Parameters : {:.3f} MB'.format(count_parameters_in_MB(model)), log)
|
||||
|
||||
|
||||
print_log('--------------------- Finish Training ----------------', log)
|
||||
test_loss = evaluate(model, corpus, test_data , config.test_batch, config.bptt)
|
||||
print_log('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(test_loss, math.exp(test_loss)), log)
|
||||
vali_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
|
||||
print_log('| End of training | valid loss {:5.2f} | valid ppl {:8.2f}'.format(vali_loss, math.exp(vali_loss)), log)
|
||||
|
||||
|
||||
|
||||
def evaluate(model, corpus, data_source, batch_size, bptt):
|
||||
# Turn on evaluation mode which disables dropout.
|
||||
model.eval()
|
||||
total_loss, total_length = 0.0, 0.0
|
||||
with torch.no_grad():
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden = model.init_hidden(batch_size)
|
||||
for i in range(0, data_source.size(0) - 1, bptt):
|
||||
data, targets = get_batch(data_source, i, bptt)
|
||||
targets = targets.view(-1)
|
||||
|
||||
log_prob, hidden = model(data, hidden)
|
||||
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
|
||||
|
||||
total_loss += loss.item() * len(data)
|
||||
total_length += len(data)
|
||||
hidden = repackage_hidden(hidden)
|
||||
return total_loss / total_length
|
||||
|
||||
if __name__ == '__main__':
|
||||
path = './configs/NAS-PTB-BASE.config'
|
||||
config = load_config(path)
|
||||
main_procedure(config, DARTS, 10, None)
|
70
exps-rnn/train_rnn_base.py
Normal file
70
exps-rnn/train_rnn_base.py
Normal file
@ -0,0 +1,70 @@
|
||||
import os, gc, sys, math, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, time_file_str, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import count_parameters_in_MB
|
||||
from nas_rnn import DARTS_V1, DARTS_V2, GDAS
|
||||
from train_rnn_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
Networks = {'DARTS_V1': DARTS_V1,
|
||||
'DARTS_V2': DARTS_V2,
|
||||
'GDAS' : GDAS}
|
||||
|
||||
parser = argparse.ArgumentParser("RNN")
|
||||
parser.add_argument('--arch', type=str, choices=Networks.keys(), help='the network architecture')
|
||||
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
|
||||
# log
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
parser.add_argument('--threads', type=int, default=10, help='the number of threads')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
torch.set_num_threads(args.threads)
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}-{:}.txt'.format(args.manualSeed, time_file_str())), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
|
||||
config = load_config( args.config_path )
|
||||
genotype = Networks[ args.arch ]
|
||||
|
||||
main_procedure(config, genotype, args.save_path, args.print_freq, log)
|
||||
log.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
267
exps-rnn/train_rnn_search.py
Normal file
267
exps-rnn/train_rnn_search.py
Normal file
@ -0,0 +1,267 @@
|
||||
import os, gc, sys, math, time, glob, random, argparse
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision.datasets as dset
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from pathlib import Path
|
||||
lib_dir = (Path(__file__).parent / '..' / '..' / 'lib').resolve()
|
||||
if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
|
||||
from utils import AverageMeter, time_string, convert_secs2time
|
||||
from utils import print_log, obtain_accuracy
|
||||
from utils import count_parameters_in_MB
|
||||
from datasets import Corpus
|
||||
from nas_rnn import batchify, get_batch, repackage_hidden
|
||||
from nas_rnn import DARTSCellSearch, RNNModelSearch
|
||||
from train_rnn_utils import main_procedure
|
||||
from scheduler import load_config
|
||||
|
||||
parser = argparse.ArgumentParser("RNN")
|
||||
parser.add_argument('--data_path', type=str, help='Path to dataset')
|
||||
parser.add_argument('--emsize', type=int, default=300, help='size of word embeddings')
|
||||
parser.add_argument('--nhid', type=int, default=300, help='number of hidden units per layer')
|
||||
parser.add_argument('--nhidlast', type=int, default=300, help='number of hidden units for the last rnn layer')
|
||||
parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping')
|
||||
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
|
||||
parser.add_argument('--batch_size', type=int, default=256, help='the batch size')
|
||||
parser.add_argument('--eval_batch_size', type=int, default=10, help='the evaluation batch size')
|
||||
parser.add_argument('--bptt', type=int, default=35, help='the sequence length')
|
||||
# DropOut
|
||||
parser.add_argument('--dropout', type=float, default=0.75, help='dropout applied to layers (0 = no dropout)')
|
||||
parser.add_argument('--dropouth', type=float, default=0.25, help='dropout for hidden nodes in rnn layers (0 = no dropout)')
|
||||
parser.add_argument('--dropoutx', type=float, default=0.75, help='dropout for input nodes in rnn layers (0 = no dropout)')
|
||||
parser.add_argument('--dropouti', type=float, default=0.2, help='dropout for input embedding layers (0 = no dropout)')
|
||||
parser.add_argument('--dropoute', type=float, default=0, help='dropout to remove words from embedding layer (0 = no dropout)')
|
||||
# Regularization
|
||||
parser.add_argument('--lr', type=float, default=20, help='initial learning rate')
|
||||
parser.add_argument('--alpha', type=float, default=0, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
|
||||
parser.add_argument('--beta', type=float, default=1e-3, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
|
||||
parser.add_argument('--wdecay', type=float, default=5e-7, help='weight decay applied to all weights')
|
||||
# architecture leraning rate
|
||||
parser.add_argument('--arch_lr', type=float, default=3e-3, help='learning rate for arch encoding')
|
||||
parser.add_argument('--arch_wdecay', type=float, default=1e-3, help='weight decay for arch encoding')
|
||||
parser.add_argument('--config_path', type=str, help='the training configure for the discovered model')
|
||||
# log
|
||||
parser.add_argument('--save_path', type=str, help='Folder to save checkpoints and log.')
|
||||
parser.add_argument('--print_freq', type=int, help='print frequency (default: 200)')
|
||||
parser.add_argument('--manualSeed', type=int, help='manual seed')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
if args.manualSeed is None:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
if args.nhidlast < 0:
|
||||
args.nhidlast = args.emsize
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
cudnn.enabled = True
|
||||
torch.manual_seed(args.manualSeed)
|
||||
torch.cuda.manual_seed_all(args.manualSeed)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('save path : {}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
print_log("Random Seed: {}".format(args.manualSeed), log)
|
||||
print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
|
||||
print_log("Torch version : {}".format(torch.__version__), log)
|
||||
print_log("CUDA version : {}".format(torch.version.cuda), log)
|
||||
print_log("cuDNN version : {}".format(cudnn.version()), log)
|
||||
print_log("Num of GPUs : {}".format(torch.cuda.device_count()), log)
|
||||
|
||||
# Dataset
|
||||
corpus = Corpus(args.data_path)
|
||||
train_data = batchify(corpus.train, args.batch_size, True)
|
||||
search_data = batchify(corpus.valid, args.batch_size, True)
|
||||
valid_data = batchify(corpus.valid, args.eval_batch_size, True)
|
||||
print_log("Train--Data Size : {:}".format(train_data.size()), log)
|
||||
print_log("Search-Data Size : {:}".format(search_data.size()), log)
|
||||
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
|
||||
|
||||
ntokens = len(corpus.dictionary)
|
||||
model = RNNModelSearch(ntokens, args.emsize, args.nhid, args.nhidlast,
|
||||
args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute,
|
||||
DARTSCellSearch, None)
|
||||
model = model.cuda()
|
||||
print_log('model ==>> : {:}'.format(model), log)
|
||||
print_log('Parameter size : {:} MB'.format(count_parameters_in_MB(model)), log)
|
||||
|
||||
base_optimizer = torch.optim.SGD(model.base_parameters(), lr=args.lr, weight_decay=args.wdecay)
|
||||
arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_lr, weight_decay=args.arch_wdecay)
|
||||
|
||||
config = load_config(args.config_path)
|
||||
print_log('Load config from {:} ==>>\n {:}'.format(args.config_path, config), log)
|
||||
|
||||
# snapshot
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-search.pth')
|
||||
if os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
start_epoch = checkpoint['epoch']
|
||||
model.load_state_dict( checkpoint['state_dict'] )
|
||||
base_optimizer.load_state_dict( checkpoint['base_optimizer'] )
|
||||
arch_optimizer.load_state_dict( checkpoint['arch_optimizer'] )
|
||||
genotypes = checkpoint['genotypes']
|
||||
valid_losses = checkpoint['valid_losses']
|
||||
print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
|
||||
else:
|
||||
start_epoch, genotypes, valid_losses = 0, {}, {-1:1e8}
|
||||
print_log('Train model-search from scratch.', log)
|
||||
|
||||
# Main loop
|
||||
start_time, epoch_time, total_train_time = time.time(), AverageMeter(), 0
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
|
||||
need_time = convert_secs2time(epoch_time.val * (args.epochs-epoch), True)
|
||||
print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s}'.format(time_string(), epoch, args.epochs, need_time), log)
|
||||
# training
|
||||
data_time, train_time = train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log)
|
||||
total_train_time += train_time
|
||||
# evaluation
|
||||
|
||||
# validation
|
||||
valid_loss = infer(model, corpus, valid_data, args.eval_batch_size)
|
||||
# save genotype
|
||||
if valid_loss < min( valid_losses.values() ): is_best = True
|
||||
else : is_best = False
|
||||
print_log('-'*10 + ' [Epoch={:03d}/{:03d}] : is-best={:}, validation-loss={:}, validation-PPL={:}'.format(epoch, args.epochs, is_best, valid_loss, math.exp(valid_loss)), log)
|
||||
|
||||
valid_losses[epoch] = valid_loss
|
||||
genotypes[epoch] = model.genotype()
|
||||
print_log(' the {:}-th genotype = {:}'.format(epoch, genotypes[epoch]), log)
|
||||
# save checkpoint
|
||||
if is_best:
|
||||
genotypes['best'] = model.genotype()
|
||||
torch.save({'epoch' : epoch + 1,
|
||||
'args' : deepcopy(args),
|
||||
'state_dict': model.state_dict(),
|
||||
'genotypes' : genotypes,
|
||||
'valid_losses' : valid_losses,
|
||||
'base_optimizer' : base_optimizer.state_dict(),
|
||||
'arch_optimizer' : arch_optimizer.state_dict()},
|
||||
checkpoint_path)
|
||||
print_log('----> Save into {:}'.format(checkpoint_path), log)
|
||||
|
||||
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
print_log('Finish with training time = {:}'.format( convert_secs2time(total_train_time, True) ), log)
|
||||
|
||||
# clear GPU cache
|
||||
torch.cuda.empty_cache()
|
||||
main_procedure(config, genotypes['best'], args.save_path, args.print_freq, log)
|
||||
log.close()
|
||||
|
||||
|
||||
def train(model, base_optimizer, arch_optimizer, corpus, train_data, search_data, epoch, log):
|
||||
|
||||
data_time, batch_time = AverageMeter(), AverageMeter()
|
||||
# Turn on training mode which enables dropout.
|
||||
total_loss = 0
|
||||
start_time = time.time()
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden_train, hidden_valid = model.init_hidden(args.batch_size), model.init_hidden(args.batch_size)
|
||||
|
||||
batch, i = 0, 0
|
||||
|
||||
while i < train_data.size(0) - 1 - 1:
|
||||
seq_len = int( args.bptt if np.random.random() < 0.95 else args.bptt / 2. )
|
||||
# Prevent excessively small or negative sequence lengths
|
||||
# seq_len = max(5, int(np.random.normal(bptt, 5)))
|
||||
# # There's a very small chance that it could select a very long sequence length resulting in OOM
|
||||
# seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
|
||||
for param_group in base_optimizer.param_groups:
|
||||
param_group['lr'] *= float( seq_len / args.bptt )
|
||||
|
||||
model.train()
|
||||
|
||||
data_valid, targets_valid = get_batch(search_data, i % (search_data.size(0) - 1), args.bptt)
|
||||
data_train, targets_train = get_batch(train_data , i, seq_len)
|
||||
|
||||
hidden_train = repackage_hidden(hidden_train)
|
||||
hidden_valid = repackage_hidden(hidden_valid)
|
||||
|
||||
data_time.update(time.time() - start_time)
|
||||
|
||||
# validation loss
|
||||
targets_valid = targets_valid.contiguous().view(-1)
|
||||
|
||||
arch_optimizer.step()
|
||||
log_prob, hidden_valid = model(data_valid, hidden_valid, return_h=False)
|
||||
arch_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_valid)
|
||||
arch_loss.backward()
|
||||
arch_optimizer.step()
|
||||
|
||||
# model update
|
||||
base_optimizer.zero_grad()
|
||||
targets_train = targets_train.contiguous().view(-1)
|
||||
|
||||
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data_train, hidden_train, return_h=True)
|
||||
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets_train)
|
||||
|
||||
loss = raw_loss
|
||||
# Activiation Regularization
|
||||
if args.alpha > 0:
|
||||
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
|
||||
# Temporal Activation Regularization (slowness)
|
||||
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
|
||||
loss.backward()
|
||||
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
|
||||
nn.utils.clip_grad_norm_(model.base_parameters(), args.clip)
|
||||
base_optimizer.step()
|
||||
|
||||
for param_group in base_optimizer.param_groups:
|
||||
param_group['lr'] /= float( seq_len / args.bptt )
|
||||
|
||||
total_loss += raw_loss.item()
|
||||
gc.collect()
|
||||
|
||||
batch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
batch, i = batch + 1, i + seq_len
|
||||
|
||||
if batch % args.print_freq == 0 or i >= train_data.size(0) - 1 - 1:
|
||||
print_log(' || Epoch: {:03d} :: {:03d}/{:03d}'.format(epoch, batch, len(train_data) // args.bptt), log)
|
||||
#print_log(' || Epoch: {:03d} :: {:03d}/{:03d} = {:}'.format(epoch, batch, len(train_data) // args.bptt, model.genotype()), log)
|
||||
cur_loss = total_loss / args.print_freq
|
||||
print_log(' ---> Time : data {:.3f} ({:.3f}) batch {:.3f} ({:.3f}) Loss : {:}, PPL : {:}'.format(data_time.val, data_time.avg, batch_time.val, batch_time.avg, cur_loss, math.exp(cur_loss)), log)
|
||||
print(F.softmax(model.arch_weights, dim=-1))
|
||||
total_loss = 0
|
||||
|
||||
return data_time.sum, batch_time.sum
|
||||
|
||||
|
||||
def infer(model, corpus, data_source, batch_size):
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
total_loss, total_length = 0, 0
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden = model.init_hidden(batch_size)
|
||||
for i in range(0, data_source.size(0) - 1, args.bptt):
|
||||
data, targets = get_batch(data_source, i, args.bptt)
|
||||
targets = targets.view(-1)
|
||||
|
||||
log_prob, hidden = model(data, hidden)
|
||||
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
|
||||
|
||||
total_loss += loss.item() * len(data)
|
||||
total_length += len(data)
|
||||
hidden = repackage_hidden(hidden)
|
||||
return total_loss / total_length
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
220
exps-rnn/train_rnn_utils.py
Normal file
220
exps-rnn/train_rnn_utils.py
Normal file
@ -0,0 +1,220 @@
|
||||
import os, gc, sys, time, math
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from utils import print_log, obtain_accuracy, AverageMeter
|
||||
from utils import time_string, convert_secs2time
|
||||
from utils import count_parameters_in_MB
|
||||
from datasets import Corpus
|
||||
from nas_rnn import batchify, get_batch, repackage_hidden
|
||||
from nas_rnn import DARTSCell, RNNModel
|
||||
|
||||
|
||||
def obtain_best(accuracies):
|
||||
if len(accuracies) == 0: return (0, 0)
|
||||
tops = [value for key, value in accuracies.items()]
|
||||
s2b = sorted( tops )
|
||||
return s2b[-1]
|
||||
|
||||
|
||||
def main_procedure(config, genotype, save_dir, print_freq, log):
|
||||
|
||||
print_log('-'*90, log)
|
||||
print_log('save-dir : {:}'.format(save_dir), log)
|
||||
print_log('genotype : {:}'.format(genotype), log)
|
||||
print_log('config : {:}'.format(config), log)
|
||||
|
||||
corpus = Corpus(config.data_path)
|
||||
train_data = batchify(corpus.train, config.train_batch, True)
|
||||
valid_data = batchify(corpus.valid, config.eval_batch , True)
|
||||
test_data = batchify(corpus.test, config.test_batch , True)
|
||||
ntokens = len(corpus.dictionary)
|
||||
print_log("Train--Data Size : {:}".format(train_data.size()), log)
|
||||
print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
|
||||
print_log("Test---Data Size : {:}".format( test_data.size()), log)
|
||||
print_log("ntokens = {:}".format(ntokens), log)
|
||||
|
||||
model = RNNModel(ntokens, config.emsize, config.nhid, config.nhidlast,
|
||||
config.dropout, config.dropouth, config.dropoutx, config.dropouti, config.dropoute,
|
||||
cell_cls=DARTSCell, genotype=genotype)
|
||||
model = model.cuda()
|
||||
print_log('Network =>\n{:}'.format(model), log)
|
||||
print_log('Genotype : {:}'.format(genotype), log)
|
||||
print_log('Parameters : {:.3f} MB'.format(count_parameters_in_MB(model)), log)
|
||||
|
||||
checkpoint_path = os.path.join(save_dir, 'checkpoint-{:}.pth'.format(config.data_name))
|
||||
|
||||
Soptimizer = torch.optim.SGD (model.parameters(), lr=config.LR, weight_decay=config.wdecay)
|
||||
Aoptimizer = torch.optim.ASGD(model.parameters(), lr=config.LR, t0=0, lambd=0., weight_decay=config.wdecay)
|
||||
if os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
model.load_state_dict( checkpoint['state_dict'] )
|
||||
Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
|
||||
Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
|
||||
epoch = checkpoint['epoch']
|
||||
use_asgd = checkpoint['use_asgd']
|
||||
print_log('load checkpoint from {:} and start train from {:}'.format(checkpoint_path, epoch), log)
|
||||
else:
|
||||
epoch, use_asgd = 0, False
|
||||
|
||||
start_time, epoch_time = time.time(), AverageMeter()
|
||||
valid_loss_from_sgd, losses = [], {-1 : 1e9}
|
||||
while epoch < config.epochs:
|
||||
need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
|
||||
print_log("\n==>>{:s} [Epoch={:04d}/{:04d}] {:}".format(time_string(), epoch, config.epochs, need_time), log)
|
||||
if use_asgd : optimizer = Aoptimizer
|
||||
else : optimizer = Soptimizer
|
||||
|
||||
try:
|
||||
Dtime, Btime = train(model, optimizer, corpus, train_data, config, epoch, print_freq, log)
|
||||
except:
|
||||
torch.cuda.empty_cache()
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
model.load_state_dict( checkpoint['state_dict'] )
|
||||
Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
|
||||
Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
|
||||
epoch = checkpoint['epoch']
|
||||
use_asgd = checkpoint['use_asgd']
|
||||
valid_loss_from_sgd = checkpoint['valid_loss_from_sgd']
|
||||
continue
|
||||
if use_asgd:
|
||||
tmp = {}
|
||||
for prm in model.parameters():
|
||||
tmp[prm] = prm.data.clone()
|
||||
prm.data = Aoptimizer.state[prm]['ax'].clone()
|
||||
|
||||
val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
|
||||
|
||||
for prm in model.parameters():
|
||||
prm.data = tmp[prm].clone()
|
||||
else:
|
||||
val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
|
||||
if len(valid_loss_from_sgd) > config.nonmono and val_loss > min(valid_loss_from_sgd):
|
||||
use_asgd = True
|
||||
valid_loss_from_sgd.append( val_loss )
|
||||
|
||||
print_log('{:} end of epoch {:3d} with {:} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(time_string(), epoch, 'ASGD' if use_asgd else 'SGD', val_loss, math.exp(val_loss)), log)
|
||||
|
||||
if val_loss < min(losses.values()):
|
||||
if use_asgd:
|
||||
tmp = {}
|
||||
for prm in model.parameters():
|
||||
tmp[prm] = prm.data.clone()
|
||||
prm.data = Aoptimizer.state[prm]['ax'].clone()
|
||||
torch.save({'epoch' : epoch,
|
||||
'use_asgd' : use_asgd,
|
||||
'valid_loss_from_sgd': valid_loss_from_sgd,
|
||||
'state_dict': model.state_dict(),
|
||||
'SGD_optimizer' : Soptimizer.state_dict(),
|
||||
'ASGD_optimizer': Aoptimizer.state_dict()},
|
||||
checkpoint_path)
|
||||
if use_asgd:
|
||||
for prm in model.parameters():
|
||||
prm.data = tmp[prm].clone()
|
||||
print_log('save into {:}'.format(checkpoint_path), log)
|
||||
if use_asgd:
|
||||
tmp = {}
|
||||
for prm in model.parameters():
|
||||
tmp[prm] = prm.data.clone()
|
||||
prm.data = Aoptimizer.state[prm]['ax'].clone()
|
||||
test_loss = evaluate(model, corpus, test_data, config.test_batch, config.bptt)
|
||||
if use_asgd:
|
||||
for prm in model.parameters():
|
||||
prm.data = tmp[prm].clone()
|
||||
print_log('| epoch={:03d} | test loss {:5.2f} | test ppl {:8.2f}'.format(epoch, test_loss, math.exp(test_loss)), log)
|
||||
losses[epoch] = val_loss
|
||||
epoch = epoch + 1
|
||||
# measure elapsed time
|
||||
epoch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
print_log('--------------------- Finish Training ----------------', log)
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
model.load_state_dict( checkpoint['state_dict'] )
|
||||
test_loss = evaluate(model, corpus, test_data , config.test_batch, config.bptt)
|
||||
print_log('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(test_loss, math.exp(test_loss)), log)
|
||||
vali_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
|
||||
print_log('| End of training | valid loss {:5.2f} | valid ppl {:8.2f}'.format(vali_loss, math.exp(vali_loss)), log)
|
||||
|
||||
|
||||
|
||||
def evaluate(model, corpus, data_source, batch_size, bptt):
|
||||
# Turn on evaluation mode which disables dropout.
|
||||
model.eval()
|
||||
total_loss, total_length = 0.0, 0.0
|
||||
with torch.no_grad():
|
||||
ntokens = len(corpus.dictionary)
|
||||
hidden = model.init_hidden(batch_size)
|
||||
for i in range(0, data_source.size(0) - 1, bptt):
|
||||
data, targets = get_batch(data_source, i, bptt)
|
||||
targets = targets.view(-1)
|
||||
|
||||
log_prob, hidden = model(data, hidden)
|
||||
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
|
||||
|
||||
total_loss += loss.item() * len(data)
|
||||
total_length += len(data)
|
||||
hidden = repackage_hidden(hidden)
|
||||
return total_loss / total_length
|
||||
|
||||
|
||||
|
||||
def train(model, optimizer, corpus, train_data, config, epoch, print_freq, log):
|
||||
# Turn on training mode which enables dropout.
|
||||
total_loss, data_time, batch_time = 0, AverageMeter(), AverageMeter()
|
||||
start_time = time.time()
|
||||
ntokens = len(corpus.dictionary)
|
||||
|
||||
hidden_train = model.init_hidden(config.train_batch)
|
||||
|
||||
batch, i = 0, 0
|
||||
while i < train_data.size(0) - 1 - 1:
|
||||
bptt = config.bptt if np.random.random() < 0.95 else config.bptt / 2.
|
||||
# Prevent excessively small or negative sequence lengths
|
||||
seq_len = max(5, int(np.random.normal(bptt, 5)))
|
||||
# There's a very small chance that it could select a very long sequence length resulting in OOM
|
||||
seq_len = min(seq_len, config.bptt + config.max_seq_len_delta)
|
||||
|
||||
|
||||
lr2 = optimizer.param_groups[0]['lr']
|
||||
optimizer.param_groups[0]['lr'] = lr2 * seq_len / config.bptt
|
||||
|
||||
model.train()
|
||||
data, targets = get_batch(train_data, i, seq_len)
|
||||
targets = targets.contiguous().view(-1)
|
||||
# count data preparation time
|
||||
data_time.update(time.time() - start_time)
|
||||
|
||||
optimizer.zero_grad()
|
||||
hidden_train = repackage_hidden(hidden_train)
|
||||
log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data, hidden_train, return_h=True)
|
||||
raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
|
||||
|
||||
loss = raw_loss
|
||||
# Activiation Regularization
|
||||
if config.alpha > 0:
|
||||
loss = loss + sum(config.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
|
||||
# Temporal Activation Regularization (slowness)
|
||||
loss = loss + sum(config.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)
|
||||
optimizer.step()
|
||||
|
||||
gc.collect()
|
||||
|
||||
optimizer.param_groups[0]['lr'] = lr2
|
||||
|
||||
total_loss += raw_loss.item()
|
||||
assert torch.isnan(loss) == False, '--- Epoch={:04d} :: {:03d}/{:03d} Get Loss = Nan'.format(epoch, batch, len(train_data)//config.bptt)
|
||||
|
||||
batch_time.update(time.time() - start_time)
|
||||
start_time = time.time()
|
||||
batch, i = batch + 1, i + seq_len
|
||||
|
||||
if batch % print_freq == 0:
|
||||
cur_loss = total_loss / print_freq
|
||||
print_log(' >> Epoch: {:04d} :: {:03d}/{:03d} || loss = {:5.2f}, ppl = {:8.2f}'.format(epoch, batch, len(train_data) // config.bptt, cur_loss, math.exp(cur_loss)), log)
|
||||
total_loss = 0
|
||||
return data_time.sum, batch_time.sum
|
122
lib/datasets/LanguageDataset.py
Normal file
122
lib/datasets/LanguageDataset.py
Normal file
@ -0,0 +1,122 @@
|
||||
import os
|
||||
import torch
|
||||
|
||||
from collections import Counter
|
||||
|
||||
|
||||
class Dictionary(object):
|
||||
def __init__(self):
|
||||
self.word2idx = {}
|
||||
self.idx2word = []
|
||||
self.counter = Counter()
|
||||
self.total = 0
|
||||
|
||||
def add_word(self, word):
|
||||
if word not in self.word2idx:
|
||||
self.idx2word.append(word)
|
||||
self.word2idx[word] = len(self.idx2word) - 1
|
||||
token_id = self.word2idx[word]
|
||||
self.counter[token_id] += 1
|
||||
self.total += 1
|
||||
return self.word2idx[word]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.idx2word)
|
||||
|
||||
|
||||
class Corpus(object):
|
||||
def __init__(self, path):
|
||||
self.dictionary = Dictionary()
|
||||
self.train = self.tokenize(os.path.join(path, 'train.txt'))
|
||||
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
|
||||
self.test = self.tokenize(os.path.join(path, 'test.txt'))
|
||||
|
||||
def tokenize(self, path):
|
||||
"""Tokenizes a text file."""
|
||||
assert os.path.exists(path)
|
||||
# Add words to the dictionary
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
tokens = 0
|
||||
for line in f:
|
||||
words = line.split() + ['<eos>']
|
||||
tokens += len(words)
|
||||
for word in words:
|
||||
self.dictionary.add_word(word)
|
||||
|
||||
# Tokenize file content
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
ids = torch.LongTensor(tokens)
|
||||
token = 0
|
||||
for line in f:
|
||||
words = line.split() + ['<eos>']
|
||||
for word in words:
|
||||
ids[token] = self.dictionary.word2idx[word]
|
||||
token += 1
|
||||
|
||||
return ids
|
||||
|
||||
class SentCorpus(object):
|
||||
def __init__(self, path):
|
||||
self.dictionary = Dictionary()
|
||||
self.train = self.tokenize(os.path.join(path, 'train.txt'))
|
||||
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
|
||||
self.test = self.tokenize(os.path.join(path, 'test.txt'))
|
||||
|
||||
def tokenize(self, path):
|
||||
"""Tokenizes a text file."""
|
||||
assert os.path.exists(path)
|
||||
# Add words to the dictionary
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
tokens = 0
|
||||
for line in f:
|
||||
words = line.split() + ['<eos>']
|
||||
tokens += len(words)
|
||||
for word in words:
|
||||
self.dictionary.add_word(word)
|
||||
|
||||
# Tokenize file content
|
||||
sents = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if not line:
|
||||
continue
|
||||
words = line.split() + ['<eos>']
|
||||
sent = torch.LongTensor(len(words))
|
||||
for i, word in enumerate(words):
|
||||
sent[i] = self.dictionary.word2idx[word]
|
||||
sents.append(sent)
|
||||
|
||||
return sents
|
||||
|
||||
class BatchSentLoader(object):
|
||||
def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
|
||||
self.sents = sents
|
||||
self.batch_size = batch_size
|
||||
self.sort_sents = sorted(sents, key=lambda x: x.size(0))
|
||||
self.cuda = cuda
|
||||
self.volatile = volatile
|
||||
self.pad_id = pad_id
|
||||
|
||||
def __next__(self):
|
||||
if self.idx >= len(self.sort_sents):
|
||||
raise StopIteration
|
||||
|
||||
batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
|
||||
batch = self.sort_sents[self.idx:self.idx+batch_size]
|
||||
max_len = max([s.size(0) for s in batch])
|
||||
tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
|
||||
for i in range(len(batch)):
|
||||
s = batch[i]
|
||||
tensor[:s.size(0),i].copy_(s)
|
||||
if self.cuda:
|
||||
tensor = tensor.cuda()
|
||||
|
||||
self.idx += batch_size
|
||||
|
||||
return tensor
|
||||
|
||||
next = __next__
|
||||
|
||||
def __iter__(self):
|
||||
self.idx = 0
|
||||
return self
|
65
lib/datasets/MetaBatchSampler.py
Normal file
65
lib/datasets/MetaBatchSampler.py
Normal file
@ -0,0 +1,65 @@
|
||||
# coding=utf-8
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
class MetaBatchSampler(object):
|
||||
|
||||
def __init__(self, labels, classes_per_it, num_samples, iterations):
|
||||
'''
|
||||
Initialize MetaBatchSampler
|
||||
Args:
|
||||
- labels: an iterable containing all the labels for the current dataset
|
||||
samples indexes will be infered from this iterable.
|
||||
- classes_per_it: number of random classes for each iteration
|
||||
- num_samples: number of samples for each iteration for each class (support + query)
|
||||
- iterations: number of iterations (episodes) per epoch
|
||||
'''
|
||||
super(MetaBatchSampler, self).__init__()
|
||||
self.labels = labels.copy()
|
||||
self.classes_per_it = classes_per_it
|
||||
self.sample_per_class = num_samples
|
||||
self.iterations = iterations
|
||||
|
||||
self.classes, self.counts = np.unique(self.labels, return_counts=True)
|
||||
assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0
|
||||
assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes))
|
||||
self.classes = torch.LongTensor(self.classes)
|
||||
|
||||
# create a matrix, indexes, of dim: classes X max(elements per class)
|
||||
# fill it with nans
|
||||
# for every class c, fill the relative row with the indices samples belonging to c
|
||||
# in numel_per_class we store the number of samples for each class/row
|
||||
self.indexes = { x.item() : [] for x in self.classes }
|
||||
indexes = { x.item() : [] for x in self.classes }
|
||||
|
||||
for idx, label in enumerate(self.labels):
|
||||
indexes[ label.item() ].append( idx )
|
||||
for key, value in indexes.items():
|
||||
self.indexes[ key ] = torch.LongTensor( value )
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
# yield a batch of indexes
|
||||
spc = self.sample_per_class
|
||||
cpi = self.classes_per_it
|
||||
|
||||
for it in range(self.iterations):
|
||||
batch_size = spc * cpi
|
||||
batch = torch.LongTensor(batch_size)
|
||||
assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes))
|
||||
c_idxs = torch.randperm(len(self.classes))[:cpi]
|
||||
|
||||
for i, cls in enumerate(self.classes[c_idxs]):
|
||||
s = slice(i * spc, (i + 1) * spc)
|
||||
num = self.indexes[ cls.item() ].nelement()
|
||||
assert spc < num, '{:} vs. {:}'.format(spc, num)
|
||||
sample_idxs = torch.randperm( num )[:spc]
|
||||
batch[s] = self.indexes[ cls.item() ][sample_idxs]
|
||||
|
||||
batch = batch[torch.randperm(len(batch))]
|
||||
yield batch
|
||||
|
||||
def __len__(self):
|
||||
# returns the number of iterations (episodes) per epoch
|
||||
return self.iterations
|
84
lib/datasets/TieredImageNet.py
Normal file
84
lib/datasets/TieredImageNet.py
Normal file
@ -0,0 +1,84 @@
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import pickle as pkl
|
||||
import os, cv2, csv, glob
|
||||
import torch
|
||||
import torch.utils.data as data
|
||||
|
||||
|
||||
class TieredImageNet(data.Dataset):
|
||||
|
||||
def __init__(self, root_dir, split, transform=None):
|
||||
self.split = split
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
splits = split.split('-')
|
||||
|
||||
images, labels, last = [], [], 0
|
||||
for split in splits:
|
||||
labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split)
|
||||
images_name = '{:}/{:}_images.npz'.format(self.root_dir, split)
|
||||
# decompress images if npz not exits
|
||||
if not os.path.exists(images_name):
|
||||
png_pkl = images_name[:-4] + '_png.pkl'
|
||||
if os.path.exists(png_pkl):
|
||||
decompress(images_name, png_pkl)
|
||||
else:
|
||||
raise ValueError('png_pkl {:} not exits'.format( png_pkl ))
|
||||
assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name)
|
||||
print ("Prepare {:} done".format(images_name))
|
||||
try:
|
||||
with open(labels_name) as f:
|
||||
data = pkl.load(f)
|
||||
label_specific = data["label_specific"]
|
||||
except:
|
||||
with open(labels_name, 'rb') as f:
|
||||
data = pkl.load(f, encoding='bytes')
|
||||
label_specific = data[b'label_specific']
|
||||
with np.load(images_name, mmap_mode="r", encoding='latin1') as data:
|
||||
image_data = data["images"]
|
||||
images.append( image_data )
|
||||
label_specific = label_specific + last
|
||||
labels.append( label_specific )
|
||||
last = np.max(label_specific) + 1
|
||||
print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific)))
|
||||
images, labels = np.concatenate(images), np.concatenate(labels)
|
||||
|
||||
self.images = images
|
||||
self.labels = labels
|
||||
self.n_classes = int( np.max(labels) + 1 )
|
||||
self.dict_index_label = {}
|
||||
for cls in range(self.n_classes):
|
||||
idxs = np.where(labels==cls)[0]
|
||||
self.dict_index_label[cls] = idxs
|
||||
self.length = len(labels)
|
||||
print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels)))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__))
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, index):
|
||||
assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
|
||||
image = self.images[index].copy()
|
||||
label = int(self.labels[index])
|
||||
image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB')
|
||||
if self.transform is not None:
|
||||
image = self.transform( image )
|
||||
return image, label
|
||||
|
||||
|
||||
|
||||
|
||||
def decompress(path, output):
|
||||
with open(output, 'rb') as f:
|
||||
array = pkl.load(f, encoding='bytes')
|
||||
images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8)
|
||||
for ii, item in enumerate(array):
|
||||
im = cv2.imdecode(item, 1)
|
||||
images[ii] = im
|
||||
np.savez(path, images=images)
|
3
lib/datasets/__init__.py
Normal file
3
lib/datasets/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .MetaBatchSampler import MetaBatchSampler
|
||||
from .TieredImageNet import TieredImageNet
|
||||
from .LanguageDataset import Corpus
|
10
lib/datasets/test_NLP.py
Normal file
10
lib/datasets/test_NLP.py
Normal file
@ -0,0 +1,10 @@
|
||||
import os, sys, torch
|
||||
|
||||
from LanguageDataset import SentCorpus, BatchSentLoader
|
||||
|
||||
if __name__ == '__main__':
|
||||
path = '../../data/data/penn'
|
||||
corpus = SentCorpus( path )
|
||||
loader = BatchSentLoader(corpus.test, 10)
|
||||
for i, d in enumerate(loader):
|
||||
print('{:} :: {:}'.format(i, d.size()))
|
33
lib/datasets/test_dataset.py
Normal file
33
lib/datasets/test_dataset.py
Normal file
@ -0,0 +1,33 @@
|
||||
import os, sys, torch
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
from TieredImageNet import TieredImageNet
|
||||
from MetaBatchSampler import MetaBatchSampler
|
||||
|
||||
root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet'
|
||||
print ('root : {:}'.format(root_dir))
|
||||
means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
|
||||
|
||||
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)]
|
||||
transform = transforms.Compose(lists)
|
||||
|
||||
dataset = TieredImageNet(root_dir, 'val-test', transform)
|
||||
image, label = dataset[111]
|
||||
print ('image shape = {:}, label = {:}'.format(image.size(), label))
|
||||
print ('image : min = {:}, max = {:} ||| label : {:}'.format(image.min(), image.max(), label))
|
||||
|
||||
|
||||
sampler = MetaBatchSampler(dataset.labels, 250, 100, 10)
|
||||
|
||||
dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)
|
||||
|
||||
print ('the length of dataset : {:}'.format( len(dataset) ))
|
||||
print ('the length of loader : {:}'.format( len(dataloader) ))
|
||||
|
||||
for images, labels in dataloader:
|
||||
print ('images : {:}'.format( images.size() ))
|
||||
print ('labels : {:}'.format( labels.size() ))
|
||||
for i in range(3):
|
||||
print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std()))
|
||||
|
||||
print('-----')
|
4
lib/move.sh
Normal file
4
lib/move.sh
Normal file
@ -0,0 +1,4 @@
|
||||
rm -rf pytorch
|
||||
git clone https://github.com/pytorch/pytorch.git
|
||||
cp -r ./pytorch/torch/nn xnn
|
||||
rm -rf pytorch
|
89
lib/nas/CifarNet.py
Normal file
89
lib/nas/CifarNet.py
Normal file
@ -0,0 +1,89 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from .construct_utils import Cell, Transition
|
||||
|
||||
class AuxiliaryHeadCIFAR(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes):
|
||||
"""assuming input size 8x8"""
|
||||
super(AuxiliaryHeadCIFAR, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
nn.ReLU(inplace=True),
|
||||
nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
|
||||
nn.Conv2d(C, 128, 1, bias=False),
|
||||
nn.BatchNorm2d(128),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(128, 768, 2, bias=False),
|
||||
nn.BatchNorm2d(768),
|
||||
nn.ReLU(inplace=True)
|
||||
)
|
||||
self.classifier = nn.Linear(768, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.classifier(x.view(x.size(0),-1))
|
||||
return x
|
||||
|
||||
|
||||
class NetworkCIFAR(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, auxiliary, genotype):
|
||||
super(NetworkCIFAR, self).__init__()
|
||||
self._layers = layers
|
||||
|
||||
stem_multiplier = 3
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
self.cells = nn.ModuleList()
|
||||
reduction_prev = False
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
if reduction and genotype.reduce is None:
|
||||
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
|
||||
else:
|
||||
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
self.cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
|
||||
if i == 2*layers//3:
|
||||
C_to_auxiliary = C_prev
|
||||
|
||||
if auxiliary:
|
||||
self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
|
||||
else:
|
||||
self.auxiliary_head = None
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.drop_path_prob = -1
|
||||
|
||||
def update_drop_path(self, drop_path_prob):
|
||||
self.drop_path_prob = drop_path_prob
|
||||
|
||||
def auxiliary_param(self):
|
||||
if self.auxiliary_head is None: return []
|
||||
else: return list( self.auxiliary_head.parameters() )
|
||||
|
||||
def forward(self, inputs):
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
|
||||
if i == 2*self._layers//3:
|
||||
if self.auxiliary_head and self.training:
|
||||
logits_aux = self.auxiliary_head(s1)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(out.size(0), -1)
|
||||
logits = self.classifier(out)
|
||||
|
||||
if self.auxiliary_head and self.training:
|
||||
return logits, logits_aux
|
||||
else:
|
||||
return logits
|
101
lib/nas/ImageNet.py
Normal file
101
lib/nas/ImageNet.py
Normal file
@ -0,0 +1,101 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from .construct_utils import Cell, Transition
|
||||
|
||||
class AuxiliaryHeadImageNet(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes):
|
||||
"""assuming input size 14x14"""
|
||||
super(AuxiliaryHeadImageNet, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
nn.ReLU(inplace=True),
|
||||
nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
|
||||
nn.Conv2d(C, 128, 1, bias=False),
|
||||
nn.BatchNorm2d(128),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(128, 768, 2, bias=False),
|
||||
# NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
|
||||
# Commenting it out for consistency with the experiments in the paper.
|
||||
# nn.BatchNorm2d(768),
|
||||
nn.ReLU(inplace=True)
|
||||
)
|
||||
self.classifier = nn.Linear(768, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.classifier(x.view(x.size(0),-1))
|
||||
return x
|
||||
|
||||
|
||||
|
||||
|
||||
class NetworkImageNet(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, auxiliary, genotype):
|
||||
super(NetworkImageNet, self).__init__()
|
||||
self._layers = layers
|
||||
|
||||
self.stem0 = nn.Sequential(
|
||||
nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C // 2),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C),
|
||||
)
|
||||
|
||||
self.stem1 = nn.Sequential(
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C),
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C, C, C
|
||||
|
||||
self.cells = nn.ModuleList()
|
||||
reduction_prev = True
|
||||
for i in range(layers):
|
||||
if i in [layers // 3, 2 * layers // 3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
if reduction and genotype.reduce is None:
|
||||
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
|
||||
else:
|
||||
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
self.cells += [cell]
|
||||
C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
|
||||
if i == 2 * layers // 3:
|
||||
C_to_auxiliary = C_prev
|
||||
|
||||
if auxiliary:
|
||||
self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
|
||||
else:
|
||||
self.auxiliary_head = None
|
||||
self.global_pooling = nn.AvgPool2d(7)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.drop_path_prob = -1
|
||||
|
||||
def update_drop_path(self, drop_path_prob):
|
||||
self.drop_path_prob = drop_path_prob
|
||||
|
||||
def auxiliary_param(self):
|
||||
if self.auxiliary_head is None: return []
|
||||
else: return list( self.auxiliary_head.parameters() )
|
||||
|
||||
def forward(self, input):
|
||||
s0 = self.stem0(input)
|
||||
s1 = self.stem1(s0)
|
||||
for i, cell in enumerate(self.cells):
|
||||
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
|
||||
#print ('{:} : {:} - {:}'.format(i, s0.size(), s1.size()))
|
||||
if i == 2 * self._layers // 3:
|
||||
if self.auxiliary_head and self.training:
|
||||
logits_aux = self.auxiliary_head(s1)
|
||||
out = self.global_pooling(s1)
|
||||
logits = self.classifier(out.view(out.size(0), -1))
|
||||
if self.auxiliary_head and self.training:
|
||||
return logits, logits_aux
|
||||
else:
|
||||
return logits
|
27
lib/nas/SE_Module.py
Normal file
27
lib/nas/SE_Module.py
Normal file
@ -0,0 +1,27 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
# Squeeze and Excitation module
|
||||
|
||||
class SqEx(nn.Module):
|
||||
|
||||
def __init__(self, n_features, reduction=16):
|
||||
super(SqEx, self).__init__()
|
||||
|
||||
if n_features % reduction != 0:
|
||||
raise ValueError('n_features must be divisible by reduction (default = 16)')
|
||||
|
||||
self.linear1 = nn.Linear(n_features, n_features // reduction, bias=True)
|
||||
self.nonlin1 = nn.ReLU(inplace=True)
|
||||
self.linear2 = nn.Linear(n_features // reduction, n_features, bias=True)
|
||||
self.nonlin2 = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
y = F.avg_pool2d(x, kernel_size=x.size()[2:4])
|
||||
y = y.permute(0, 2, 3, 1)
|
||||
y = self.nonlin1(self.linear1(y))
|
||||
y = self.nonlin2(self.linear2(y))
|
||||
y = y.permute(0, 3, 1, 2)
|
||||
y = x * y
|
||||
return y
|
||||
|
18
lib/nas/__init__.py
Normal file
18
lib/nas/__init__.py
Normal file
@ -0,0 +1,18 @@
|
||||
from .model_search import Network
|
||||
from .model_search_v1 import NetworkV1
|
||||
from .model_search_f1 import NetworkF1
|
||||
# acceleration model
|
||||
from .model_search_f1_acc2 import NetworkFACC1
|
||||
from .model_search_acc2 import NetworkACC2
|
||||
from .model_search_v3 import NetworkV3
|
||||
from .model_search_v4 import NetworkV4
|
||||
from .model_search_v5 import NetworkV5
|
||||
from .CifarNet import NetworkCIFAR
|
||||
from .ImageNet import NetworkImageNet
|
||||
|
||||
# genotypes
|
||||
from .genotypes import DARTS_V1, DARTS_V2
|
||||
from .genotypes import NASNet, PNASNet, AmoebaNet, ENASNet
|
||||
from .genotypes import DMS_V1, DMS_F1, GDAS_CC
|
||||
|
||||
from .construct_utils import return_alphas_str
|
151
lib/nas/construct_utils.py
Normal file
151
lib/nas/construct_utils.py
Normal file
@ -0,0 +1,151 @@
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN, Identity
|
||||
|
||||
|
||||
def random_select(length, ratio):
|
||||
clist = []
|
||||
index = random.randint(0, length-1)
|
||||
for i in range(length):
|
||||
if i == index or random.random() < ratio:
|
||||
clist.append( 1 )
|
||||
else:
|
||||
clist.append( 0 )
|
||||
return clist
|
||||
|
||||
|
||||
def all_select(length):
|
||||
return [1 for i in range(length)]
|
||||
|
||||
|
||||
def drop_path(x, drop_prob):
|
||||
if drop_prob > 0.:
|
||||
keep_prob = 1. - drop_prob
|
||||
mask = x.new_zeros(x.size(0), 1, 1, 1)
|
||||
mask = mask.bernoulli_(keep_prob)
|
||||
x.div_(keep_prob)
|
||||
x.mul_(mask)
|
||||
return x
|
||||
|
||||
|
||||
def return_alphas_str(basemodel):
|
||||
string = 'normal : {:}'.format( F.softmax(basemodel.alphas_normal, dim=-1) )
|
||||
if hasattr(basemodel, 'alphas_reduce'):
|
||||
string = string + '\nreduce : {:}'.format( F.softmax(basemodel.alphas_reduce, dim=-1) )
|
||||
return string
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
print(C_prev_prev, C_prev, C)
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
|
||||
|
||||
if reduction:
|
||||
op_names, indices, values = zip(*genotype.reduce)
|
||||
concat = genotype.reduce_concat
|
||||
else:
|
||||
op_names, indices, values = zip(*genotype.normal)
|
||||
concat = genotype.normal_concat
|
||||
self._compile(C, op_names, indices, values, concat, reduction)
|
||||
|
||||
def _compile(self, C, op_names, indices, values, concat, reduction):
|
||||
assert len(op_names) == len(indices)
|
||||
self._steps = len(op_names) // 2
|
||||
self._concat = concat
|
||||
self.multiplier = len(concat)
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for name, index in zip(op_names, indices):
|
||||
stride = 2 if reduction and index < 2 else 1
|
||||
op = OPS[name](C, stride, True)
|
||||
self._ops.append( op )
|
||||
self._indices = indices
|
||||
self._values = values
|
||||
|
||||
def forward(self, s0, s1, drop_prob):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
states = [s0, s1]
|
||||
for i in range(self._steps):
|
||||
h1 = states[self._indices[2*i]]
|
||||
h2 = states[self._indices[2*i+1]]
|
||||
op1 = self._ops[2*i]
|
||||
op2 = self._ops[2*i+1]
|
||||
h1 = op1(h1)
|
||||
h2 = op2(h2)
|
||||
if self.training and drop_prob > 0.:
|
||||
if not isinstance(op1, Identity):
|
||||
h1 = drop_path(h1, drop_prob)
|
||||
if not isinstance(op2, Identity):
|
||||
h2 = drop_path(h2, drop_prob)
|
||||
|
||||
s = h1 + h2
|
||||
|
||||
states += [s]
|
||||
return torch.cat([states[i] for i in self._concat], dim=1)
|
||||
|
||||
|
||||
|
||||
class Transition(nn.Module):
|
||||
|
||||
def __init__(self, C_prev_prev, C_prev, C, reduction_prev, multiplier=4):
|
||||
super(Transition, self).__init__()
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
|
||||
self.multiplier = multiplier
|
||||
|
||||
self.reduction = True
|
||||
self.ops1 = nn.ModuleList(
|
||||
[nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
|
||||
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
|
||||
nn.BatchNorm2d(C, affine=True),
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(C, affine=True)),
|
||||
nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
|
||||
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
|
||||
nn.BatchNorm2d(C, affine=True),
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(C, affine=True))])
|
||||
|
||||
self.ops2 = nn.ModuleList(
|
||||
[nn.Sequential(
|
||||
nn.MaxPool2d(3, stride=1, padding=1),
|
||||
nn.BatchNorm2d(C, affine=True)),
|
||||
nn.Sequential(
|
||||
nn.MaxPool2d(3, stride=2, padding=1),
|
||||
nn.BatchNorm2d(C, affine=True))])
|
||||
|
||||
|
||||
def forward(self, s0, s1, drop_prob = -1):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
X0 = self.ops1[0] (s0)
|
||||
X1 = self.ops1[1] (s1)
|
||||
if self.training and drop_prob > 0.:
|
||||
X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
|
||||
|
||||
X2 = self.ops2[0] (X0+X1)
|
||||
X3 = self.ops2[1] (s1)
|
||||
if self.training and drop_prob > 0.:
|
||||
X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
|
||||
return torch.cat([X0, X1, X2, X3], dim=1)
|
203
lib/nas/genotypes.py
Normal file
203
lib/nas/genotypes.py
Normal file
@ -0,0 +1,203 @@
|
||||
from collections import namedtuple
|
||||
|
||||
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
|
||||
|
||||
PRIMITIVES = [
|
||||
'none',
|
||||
'max_pool_3x3',
|
||||
'avg_pool_3x3',
|
||||
'skip_connect',
|
||||
'sep_conv_3x3',
|
||||
'sep_conv_5x5',
|
||||
'dil_conv_3x3',
|
||||
'dil_conv_5x5'
|
||||
]
|
||||
|
||||
NASNet = Genotype(
|
||||
normal = [
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('skip_connect', 1, 1.0),
|
||||
],
|
||||
normal_concat = [2, 3, 4, 5, 6],
|
||||
reduce = [
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('sep_conv_7x7', 0, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_7x7', 0, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
('skip_connect', 3, 1.0),
|
||||
('avg_pool_3x3', 2, 1.0),
|
||||
('sep_conv_3x3', 2, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
],
|
||||
reduce_concat = [4, 5, 6],
|
||||
)
|
||||
|
||||
AmoebaNet = Genotype(
|
||||
normal = [
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('sep_conv_5x5', 2, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('avg_pool_3x3', 3, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('skip_connect', 1, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
],
|
||||
normal_concat = [4, 5, 6],
|
||||
reduce = [
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('sep_conv_7x7', 2, 1.0),
|
||||
('sep_conv_7x7', 0, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('conv_7x1_1x7', 0, 1.0),
|
||||
('sep_conv_3x3', 5, 1.0),
|
||||
],
|
||||
reduce_concat = [3, 4, 6]
|
||||
)
|
||||
|
||||
DARTS_V1 = Genotype(
|
||||
normal=[
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('skip_connect', 2, 1.0)],
|
||||
normal_concat=[2, 3, 4, 5],
|
||||
reduce=[
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('avg_pool_3x3', 0, 1.0)],
|
||||
reduce_concat=[2, 3, 4, 5]
|
||||
)
|
||||
|
||||
DARTS_V2 = Genotype(
|
||||
normal=[
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('dil_conv_3x3', 2, 1.0)],
|
||||
normal_concat=[2, 3, 4, 5],
|
||||
reduce=[
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('skip_connect', 2, 1.0),
|
||||
('max_pool_3x3', 1, 1.0)],
|
||||
reduce_concat=[2, 3, 4, 5]
|
||||
)
|
||||
|
||||
PNASNet = Genotype(
|
||||
normal = [
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('sep_conv_7x7', 1, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 4, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('skip_connect', 1, 1.0),
|
||||
],
|
||||
normal_concat = [2, 3, 4, 5, 6],
|
||||
reduce = [
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
('max_pool_3x3', 0, 1.0),
|
||||
('sep_conv_7x7', 1, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 4, 1.0),
|
||||
('max_pool_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('skip_connect', 1, 1.0),
|
||||
],
|
||||
reduce_concat = [2, 3, 4, 5, 6],
|
||||
)
|
||||
|
||||
# https://arxiv.org/pdf/1802.03268.pdf
|
||||
ENASNet = Genotype(
|
||||
normal = [
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('skip_connect', 1, 1.0),
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('skip_connect', 0, 1.0),
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('sep_conv_3x3', 0, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
('sep_conv_5x5', 1, 1.0),
|
||||
('avg_pool_3x3', 0, 1.0),
|
||||
],
|
||||
normal_concat = [2, 3, 4, 5, 6],
|
||||
reduce = [
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
('sep_conv_3x3', 1, 1.0), # 2
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0), # 3
|
||||
('sep_conv_3x3', 1, 1.0),
|
||||
('avg_pool_3x3', 1, 1.0), # 4
|
||||
('avg_pool_3x3', 1, 1.0),
|
||||
('sep_conv_5x5', 4, 1.0), # 5
|
||||
('sep_conv_3x3', 5, 1.0),
|
||||
('sep_conv_5x5', 0, 1.0),
|
||||
],
|
||||
reduce_concat = [2, 3, 4, 5, 6],
|
||||
)
|
||||
|
||||
DARTS = DARTS_V2
|
||||
|
||||
# Search by normal and reduce
|
||||
DMS_V1 = Genotype(
|
||||
normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
|
||||
normal_concat=range(2, 6),
|
||||
reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
|
||||
reduce_concat=range(2, 6)
|
||||
)
|
||||
|
||||
# Search by normal and fixing reduction
|
||||
DMS_F1 = Genotype(
|
||||
normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
|
||||
normal_concat=[2, 3, 4, 5],
|
||||
reduce=None,
|
||||
reduce_concat=[2, 3, 4, 5],
|
||||
)
|
||||
|
||||
# Combine DMS_V1 and DMS_F1
|
||||
GDAS_CC = Genotype(
|
||||
normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
|
||||
normal_concat=range(2, 6),
|
||||
reduce=None,
|
||||
reduce_concat=range(2, 6)
|
||||
)
|
19
lib/nas/head_utils.py
Normal file
19
lib/nas/head_utils.py
Normal file
@ -0,0 +1,19 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class ImageNetHEAD(nn.Sequential):
|
||||
def __init__(self, C, stride=2):
|
||||
super(ImageNetHEAD, self).__init__()
|
||||
self.add_module('conv1', nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False))
|
||||
self.add_module('bn1' , nn.BatchNorm2d(C // 2))
|
||||
self.add_module('relu1', nn.ReLU(inplace=True))
|
||||
self.add_module('conv2', nn.Conv2d(C // 2, C, kernel_size=3, stride=stride, padding=1, bias=False))
|
||||
self.add_module('bn2' , nn.BatchNorm2d(C))
|
||||
|
||||
|
||||
class CifarHEAD(nn.Sequential):
|
||||
def __init__(self, C):
|
||||
super(CifarHEAD, self).__init__()
|
||||
self.add_module('conv', nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False))
|
||||
self.add_module('bn', nn.BatchNorm2d(C))
|
166
lib/nas/model_search.py
Normal file
166
lib/nas/model_search.py
Normal file
@ -0,0 +1,166 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
from .head_utils import CifarHEAD, ImageNetHEAD
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights):
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
for j, h in enumerate(states):
|
||||
x = self._ops[offset+j](h, weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class Network(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3, head='cifar'):
|
||||
super(Network, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
if head == 'cifar':
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
elif head == 'imagenet':
|
||||
self.stem = ImageNetHEAD(C_curr, stride=1)
|
||||
else:
|
||||
raise ValueError('Invalid head : {:}'.format(head))
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
return -1
|
||||
|
||||
def get_tau(self):
|
||||
return -1
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal, self.alphas_reduce]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
else:
|
||||
weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
180
lib/nas/model_search_acc2.py
Normal file
180
lib/nas/model_search_acc2.py
Normal file
@ -0,0 +1,180 @@
|
||||
# gumbel softmax
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights, cpu_weights):
|
||||
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
|
||||
if use_sum > 3:
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
else:
|
||||
clist = []
|
||||
for j, cpu_weight in enumerate(cpu_weights):
|
||||
if abs(cpu_weight) > 1e-10:
|
||||
clist.append( weights[j] * self._ops[j](x) )
|
||||
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
|
||||
return sum(clist)
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
cpu_weights = weights.tolist()
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
for j, h in enumerate(states):
|
||||
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkACC2(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkACC2, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.tau = 5
|
||||
self.use_gumbel = True
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_gumbel(self, use_gumbel):
|
||||
self.use_gumbel = use_gumbel
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal, self.alphas_reduce]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
|
||||
else : weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
else:
|
||||
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
|
||||
else : weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
167
lib/nas/model_search_f1.py
Normal file
167
lib/nas/model_search_f1.py
Normal file
@ -0,0 +1,167 @@
|
||||
# share parameters
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .construct_utils import Transition
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights):
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
for j, h in enumerate(states):
|
||||
x = self._ops[offset+j](h, weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkF1(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkF1, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
if reduction:
|
||||
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
|
||||
else:
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
return -1
|
||||
|
||||
def get_tau(self):
|
||||
return -1
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
s0, s1 = s1, cell(s0, s1)
|
||||
else:
|
||||
weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=None , reduce_concat=concat
|
||||
)
|
||||
return genotype
|
183
lib/nas/model_search_f1_acc2.py
Normal file
183
lib/nas/model_search_f1_acc2.py
Normal file
@ -0,0 +1,183 @@
|
||||
# share parameters
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .construct_utils import Transition
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights, cpu_weights):
|
||||
use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
|
||||
if use_sum > 3:
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
else:
|
||||
clist = []
|
||||
for j, cpu_weight in enumerate(cpu_weights):
|
||||
if abs(cpu_weight) > 1e-10:
|
||||
clist.append( weights[j] * self._ops[j](x) )
|
||||
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
|
||||
return sum(clist)
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
cpu_weights = weights.tolist()
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
for j, h in enumerate(states):
|
||||
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkFACC1(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkFACC1, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
self.tau = 5
|
||||
self.use_gumbel = True
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
if reduction:
|
||||
cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
|
||||
else:
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_gumbel(self, use_gumbel):
|
||||
self.use_gumbel = use_gumbel
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
s0, s1 = s1, cell(s0, s1)
|
||||
else:
|
||||
if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
|
||||
else : weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
#print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
#gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=None , reduce_concat=concat
|
||||
)
|
||||
return genotype
|
161
lib/nas/model_search_v1.py
Normal file
161
lib/nas/model_search_v1.py
Normal file
@ -0,0 +1,161 @@
|
||||
# share parameters
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights):
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
for j, h in enumerate(states):
|
||||
x = self._ops[offset+j](h, weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkV1(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkV1, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
#self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
#nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
return -1
|
||||
|
||||
def get_tau(self):
|
||||
return -1
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
else:
|
||||
weights = F.softmax(self.alphas_normal, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
171
lib/nas/model_search_v3.py
Normal file
171
lib/nas/model_search_v3.py
Normal file
@ -0,0 +1,171 @@
|
||||
# random selection
|
||||
import torch
|
||||
import random
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
from .construct_utils import random_select, all_select
|
||||
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights, cpu_weights):
|
||||
return sum(w * op(x) for w, op in zip(weights, self._ops))
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
cpu_weights = weights.tolist()
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
if i == 0:
|
||||
indicator = all_select( len(states) )
|
||||
else:
|
||||
indicator = random_select( len(states), 0.5 )
|
||||
for j, h in enumerate(states):
|
||||
if indicator[j] == 0: continue
|
||||
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist) / sum(indicator)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkV3(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkV3, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.tau = 5
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal, self.alphas_reduce]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
else:
|
||||
weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
176
lib/nas/model_search_v4.py
Normal file
176
lib/nas/model_search_v4.py
Normal file
@ -0,0 +1,176 @@
|
||||
# random selection
|
||||
import torch
|
||||
import random
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
from .construct_utils import random_select, all_select
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights, cpu_weights):
|
||||
indicators = random_select( len(cpu_weights), 0.5 )
|
||||
clist, ws = [], []
|
||||
for w, indicator, op in zip(weights, indicators, self._ops):
|
||||
if indicator:
|
||||
clist.append( w * op(x) )
|
||||
ws.append( w )
|
||||
return sum(clist) / sum(ws)
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
cpu_weights = weights.tolist()
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
if i == 0:
|
||||
indicator = all_select( len(states) )
|
||||
else:
|
||||
indicator = random_select( len(states), 0.5 )
|
||||
for j, h in enumerate(states):
|
||||
if indicator[j] == 0: continue
|
||||
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist) / sum(indicator)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkV4(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkV4, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.tau = 5
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal, self.alphas_reduce]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
else:
|
||||
weights = F.softmax(self.alphas_reduce, dim=-1)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
174
lib/nas/model_search_v5.py
Normal file
174
lib/nas/model_search_v5.py
Normal file
@ -0,0 +1,174 @@
|
||||
# gumbel softmax
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
from .operations import OPS, FactorizedReduce, ReLUConvBN
|
||||
from .genotypes import PRIMITIVES, Genotype
|
||||
from .construct_utils import random_select, all_select
|
||||
|
||||
|
||||
class MixedOp(nn.Module):
|
||||
|
||||
def __init__(self, C, stride):
|
||||
super(MixedOp, self).__init__()
|
||||
self._ops = nn.ModuleList()
|
||||
for primitive in PRIMITIVES:
|
||||
op = OPS[primitive](C, stride, False)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, x, weights, cpu_weights):
|
||||
clist = []
|
||||
for j, cpu_weight in enumerate(cpu_weights):
|
||||
if abs(cpu_weight) > 1e-10:
|
||||
clist.append( weights[j] * self._ops[j](x) )
|
||||
assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
|
||||
if len(clist) == 1: return clist[0]
|
||||
else : return sum(clist)
|
||||
|
||||
|
||||
class Cell(nn.Module):
|
||||
|
||||
def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
|
||||
super(Cell, self).__init__()
|
||||
self.reduction = reduction
|
||||
|
||||
if reduction_prev:
|
||||
self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
|
||||
else:
|
||||
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
|
||||
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
self._ops = nn.ModuleList()
|
||||
for i in range(self._steps):
|
||||
for j in range(2+i):
|
||||
stride = 2 if reduction and j < 2 else 1
|
||||
op = MixedOp(C, stride)
|
||||
self._ops.append(op)
|
||||
|
||||
def forward(self, s0, s1, weights):
|
||||
s0 = self.preprocess0(s0)
|
||||
s1 = self.preprocess1(s1)
|
||||
|
||||
cpu_weights = weights.tolist()
|
||||
states = [s0, s1]
|
||||
offset = 0
|
||||
for i in range(self._steps):
|
||||
clist = []
|
||||
if i == 0: indicator = all_select( len(states) )
|
||||
else : indicator = random_select( len(states), 0.6 )
|
||||
|
||||
for j, h in enumerate(states):
|
||||
if indicator[j] == 0: continue
|
||||
x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
|
||||
clist.append( x )
|
||||
s = sum(clist)
|
||||
offset += len(states)
|
||||
states.append(s)
|
||||
|
||||
return torch.cat(states[-self._multiplier:], dim=1)
|
||||
|
||||
|
||||
class NetworkV5(nn.Module):
|
||||
|
||||
def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
|
||||
super(NetworkV5, self).__init__()
|
||||
self._C = C
|
||||
self._num_classes = num_classes
|
||||
self._layers = layers
|
||||
self._steps = steps
|
||||
self._multiplier = multiplier
|
||||
|
||||
C_curr = stem_multiplier*C
|
||||
self.stem = nn.Sequential(
|
||||
nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(C_curr)
|
||||
)
|
||||
|
||||
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
|
||||
reduction_prev, cells = False, []
|
||||
for i in range(layers):
|
||||
if i in [layers//3, 2*layers//3]:
|
||||
C_curr *= 2
|
||||
reduction = True
|
||||
else:
|
||||
reduction = False
|
||||
cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
|
||||
reduction_prev = reduction
|
||||
cells.append( cell )
|
||||
C_prev_prev, C_prev = C_prev, multiplier*C_curr
|
||||
self.cells = nn.ModuleList(cells)
|
||||
|
||||
self.global_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.classifier = nn.Linear(C_prev, num_classes)
|
||||
self.tau = 5
|
||||
|
||||
# initialize architecture parameters
|
||||
k = sum(1 for i in range(self._steps) for n in range(2+i))
|
||||
num_ops = len(PRIMITIVES)
|
||||
|
||||
self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
|
||||
self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
|
||||
nn.init.normal_(self.alphas_normal, 0, 0.001)
|
||||
nn.init.normal_(self.alphas_reduce, 0, 0.001)
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.alphas_normal, self.alphas_reduce]
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.stem.parameters()) + list(self.cells.parameters())
|
||||
lists += list(self.global_pooling.parameters())
|
||||
lists += list(self.classifier.parameters())
|
||||
return lists
|
||||
|
||||
def forward(self, inputs):
|
||||
batch, C, H, W = inputs.size()
|
||||
s0 = s1 = self.stem(inputs)
|
||||
for i, cell in enumerate(self.cells):
|
||||
if cell.reduction:
|
||||
weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
|
||||
else:
|
||||
weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
|
||||
s0, s1 = s1, cell(s0, s1, weights)
|
||||
out = self.global_pooling(s1)
|
||||
out = out.view(batch, -1)
|
||||
logits = self.classifier(out)
|
||||
return logits
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(weights):
|
||||
gene, n, start = [], 2, 0
|
||||
for i in range(self._steps):
|
||||
end = start + n
|
||||
W = weights[start:end].copy()
|
||||
edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
|
||||
for j in edges:
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
if k != PRIMITIVES.index('none'):
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
|
||||
start = end
|
||||
n += 1
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
|
||||
gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
|
||||
|
||||
concat = range(2+self._steps-self._multiplier, self._steps+2)
|
||||
genotype = Genotype(
|
||||
normal=gene_normal, normal_concat=concat,
|
||||
reduce=gene_reduce, reduce_concat=concat
|
||||
)
|
||||
return genotype
|
122
lib/nas/operations.py
Normal file
122
lib/nas/operations.py
Normal file
@ -0,0 +1,122 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
OPS = {
|
||||
'none' : lambda C, stride, affine: Zero(stride),
|
||||
'avg_pool_3x3' : lambda C, stride, affine: nn.Sequential(
|
||||
nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
|
||||
nn.BatchNorm2d(C, affine=False) ),
|
||||
'max_pool_3x3' : lambda C, stride, affine: nn.Sequential(
|
||||
nn.MaxPool2d(3, stride=stride, padding=1),
|
||||
nn.BatchNorm2d(C, affine=False) ),
|
||||
'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
|
||||
'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
|
||||
'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
|
||||
'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
|
||||
'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
|
||||
'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
|
||||
'conv_7x1_1x7' : lambda C, stride, affine: Conv717(C, C, stride, affine),
|
||||
}
|
||||
|
||||
class Conv717(nn.Module):
|
||||
|
||||
def __init__(self, C_in, C_out, stride, affine):
|
||||
super(Conv717, self).__init__()
|
||||
self.op = nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C_in , C_out, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
|
||||
nn.Conv2d(C_out, C_out, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
|
||||
nn.BatchNorm2d(C_out, affine=affine)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.op(x)
|
||||
|
||||
|
||||
class ReLUConvBN(nn.Module):
|
||||
|
||||
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
|
||||
super(ReLUConvBN, self).__init__()
|
||||
self.op = nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
|
||||
nn.BatchNorm2d(C_out, affine=affine)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.op(x)
|
||||
|
||||
|
||||
class DilConv(nn.Module):
|
||||
|
||||
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
|
||||
super(DilConv, self).__init__()
|
||||
self.op = nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
|
||||
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(C_out, affine=affine),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.op(x)
|
||||
|
||||
|
||||
class SepConv(nn.Module):
|
||||
|
||||
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
|
||||
super(SepConv, self).__init__()
|
||||
self.op = nn.Sequential(
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
|
||||
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(C_in, affine=affine),
|
||||
nn.ReLU(inplace=False),
|
||||
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
|
||||
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(C_out, affine=affine),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.op(x)
|
||||
|
||||
|
||||
class Identity(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
|
||||
class Zero(nn.Module):
|
||||
|
||||
def __init__(self, stride):
|
||||
super(Zero, self).__init__()
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
if self.stride == 1:
|
||||
return x.mul(0.)
|
||||
return x[:,:,::self.stride,::self.stride].mul(0.)
|
||||
|
||||
|
||||
class FactorizedReduce(nn.Module):
|
||||
|
||||
def __init__(self, C_in, C_out, affine=True):
|
||||
super(FactorizedReduce, self).__init__()
|
||||
assert C_out % 2 == 0
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
|
||||
self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
|
||||
self.bn = nn.BatchNorm2d(C_out, affine=affine)
|
||||
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu(x)
|
||||
y = self.pad(x)
|
||||
out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
|
||||
out = self.bn(out)
|
||||
return out
|
9
lib/nas_rnn/__init__.py
Normal file
9
lib/nas_rnn/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
# utils
|
||||
from .utils import batchify, get_batch, repackage_hidden
|
||||
# models
|
||||
from .model_search import RNNModelSearch
|
||||
from .model_search import DARTSCellSearch
|
||||
from .basemodel import DARTSCell, RNNModel
|
||||
# architecture
|
||||
from .genotypes import DARTS_V1, DARTS_V2
|
||||
from .genotypes import GDAS
|
181
lib/nas_rnn/basemodel.py
Normal file
181
lib/nas_rnn/basemodel.py
Normal file
@ -0,0 +1,181 @@
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from .genotypes import STEPS
|
||||
from .utils import mask2d, LockedDropout, embedded_dropout
|
||||
|
||||
|
||||
INITRANGE = 0.04
|
||||
|
||||
def none_func(x):
|
||||
return x * 0
|
||||
|
||||
|
||||
class DARTSCell(nn.Module):
|
||||
|
||||
def __init__(self, ninp, nhid, dropouth, dropoutx, genotype):
|
||||
super(DARTSCell, self).__init__()
|
||||
self.nhid = nhid
|
||||
self.dropouth = dropouth
|
||||
self.dropoutx = dropoutx
|
||||
self.genotype = genotype
|
||||
|
||||
# genotype is None when doing arch search
|
||||
steps = len(self.genotype.recurrent) if self.genotype is not None else STEPS
|
||||
self._W0 = nn.Parameter(torch.Tensor(ninp+nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE))
|
||||
self._Ws = nn.ParameterList([
|
||||
nn.Parameter(torch.Tensor(nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) for i in range(steps)
|
||||
])
|
||||
|
||||
def forward(self, inputs, hidden, arch_probs):
|
||||
T, B = inputs.size(0), inputs.size(1)
|
||||
|
||||
if self.training:
|
||||
x_mask = mask2d(B, inputs.size(2), keep_prob=1.-self.dropoutx)
|
||||
h_mask = mask2d(B, hidden.size(2), keep_prob=1.-self.dropouth)
|
||||
else:
|
||||
x_mask = h_mask = None
|
||||
|
||||
hidden = hidden[0]
|
||||
hiddens = []
|
||||
for t in range(T):
|
||||
hidden = self.cell(inputs[t], hidden, x_mask, h_mask, arch_probs)
|
||||
hiddens.append(hidden)
|
||||
hiddens = torch.stack(hiddens)
|
||||
return hiddens, hiddens[-1].unsqueeze(0)
|
||||
|
||||
def _compute_init_state(self, x, h_prev, x_mask, h_mask):
|
||||
if self.training:
|
||||
xh_prev = torch.cat([x * x_mask, h_prev * h_mask], dim=-1)
|
||||
else:
|
||||
xh_prev = torch.cat([x, h_prev], dim=-1)
|
||||
c0, h0 = torch.split(xh_prev.mm(self._W0), self.nhid, dim=-1)
|
||||
c0 = c0.sigmoid()
|
||||
h0 = h0.tanh()
|
||||
s0 = h_prev + c0 * (h0-h_prev)
|
||||
return s0
|
||||
|
||||
def _get_activation(self, name):
|
||||
if name == 'tanh':
|
||||
f = torch.tanh
|
||||
elif name == 'relu':
|
||||
f = torch.relu
|
||||
elif name == 'sigmoid':
|
||||
f = torch.sigmoid
|
||||
elif name == 'identity':
|
||||
f = lambda x: x
|
||||
elif name == 'none':
|
||||
f = none_func
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return f
|
||||
|
||||
def cell(self, x, h_prev, x_mask, h_mask, _):
|
||||
s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
|
||||
|
||||
states = [s0]
|
||||
for i, (name, pred) in enumerate(self.genotype.recurrent):
|
||||
s_prev = states[pred]
|
||||
if self.training:
|
||||
ch = (s_prev * h_mask).mm(self._Ws[i])
|
||||
else:
|
||||
ch = s_prev.mm(self._Ws[i])
|
||||
c, h = torch.split(ch, self.nhid, dim=-1)
|
||||
c = c.sigmoid()
|
||||
fn = self._get_activation(name)
|
||||
h = fn(h)
|
||||
s = s_prev + c * (h-s_prev)
|
||||
states += [s]
|
||||
output = torch.mean(torch.stack([states[i] for i in self.genotype.concat], -1), -1)
|
||||
return output
|
||||
|
||||
|
||||
class RNNModel(nn.Module):
|
||||
"""Container module with an encoder, a recurrent module, and a decoder."""
|
||||
def __init__(self, ntoken, ninp, nhid, nhidlast,
|
||||
dropout=0.5, dropouth=0.5, dropoutx=0.5, dropouti=0.5, dropoute=0.1,
|
||||
cell_cls=None, genotype=None):
|
||||
super(RNNModel, self).__init__()
|
||||
self.lockdrop = LockedDropout()
|
||||
self.encoder = nn.Embedding(ntoken, ninp)
|
||||
|
||||
assert ninp == nhid == nhidlast
|
||||
if cell_cls == DARTSCell:
|
||||
assert genotype is not None
|
||||
rnns = [cell_cls(ninp, nhid, dropouth, dropoutx, genotype)]
|
||||
else:
|
||||
assert genotype is None
|
||||
rnns = [cell_cls(ninp, nhid, dropouth, dropoutx)]
|
||||
|
||||
self.rnns = torch.nn.ModuleList(rnns)
|
||||
self.decoder = nn.Linear(ninp, ntoken)
|
||||
self.decoder.weight = self.encoder.weight
|
||||
self.init_weights()
|
||||
self.arch_weights = None
|
||||
|
||||
self.ninp = ninp
|
||||
self.nhid = nhid
|
||||
self.nhidlast = nhidlast
|
||||
self.dropout = dropout
|
||||
self.dropouti = dropouti
|
||||
self.dropoute = dropoute
|
||||
self.ntoken = ntoken
|
||||
self.cell_cls = cell_cls
|
||||
# acceleration
|
||||
self.tau = None
|
||||
self.use_gumbel = False
|
||||
|
||||
def set_gumbel(self, use_gumbel, set_check):
|
||||
self.use_gumbel = use_gumbel
|
||||
for i, rnn in enumerate(self.rnns):
|
||||
rnn.set_check(set_check)
|
||||
|
||||
def set_tau(self, tau):
|
||||
self.tau = tau
|
||||
|
||||
def get_tau(self):
|
||||
return self.tau
|
||||
|
||||
def init_weights(self):
|
||||
self.encoder.weight.data.uniform_(-INITRANGE, INITRANGE)
|
||||
self.decoder.bias.data.fill_(0)
|
||||
self.decoder.weight.data.uniform_(-INITRANGE, INITRANGE)
|
||||
|
||||
def forward(self, input, hidden, return_h=False):
|
||||
batch_size = input.size(1)
|
||||
|
||||
emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
|
||||
emb = self.lockdrop(emb, self.dropouti)
|
||||
|
||||
raw_output = emb
|
||||
new_hidden = []
|
||||
raw_outputs = []
|
||||
outputs = []
|
||||
if self.arch_weights is None:
|
||||
arch_probs = None
|
||||
else:
|
||||
if self.use_gumbel: arch_probs = F.gumbel_softmax(self.arch_weights, self.tau, False)
|
||||
else : arch_probs = F.softmax(self.arch_weights, dim=-1)
|
||||
|
||||
for l, rnn in enumerate(self.rnns):
|
||||
current_input = raw_output
|
||||
raw_output, new_h = rnn(raw_output, hidden[l], arch_probs)
|
||||
new_hidden.append(new_h)
|
||||
raw_outputs.append(raw_output)
|
||||
hidden = new_hidden
|
||||
|
||||
output = self.lockdrop(raw_output, self.dropout)
|
||||
outputs.append(output)
|
||||
|
||||
logit = self.decoder(output.view(-1, self.ninp))
|
||||
log_prob = nn.functional.log_softmax(logit, dim=-1)
|
||||
model_output = log_prob
|
||||
model_output = model_output.view(-1, batch_size, self.ntoken)
|
||||
|
||||
if return_h: return model_output, hidden, raw_outputs, outputs
|
||||
else : return model_output, hidden
|
||||
|
||||
def init_hidden(self, bsz):
|
||||
weight = next(self.parameters()).clone()
|
||||
return [weight.new(1, bsz, self.nhid).zero_()]
|
55
lib/nas_rnn/genotypes.py
Normal file
55
lib/nas_rnn/genotypes.py
Normal file
@ -0,0 +1,55 @@
|
||||
from collections import namedtuple
|
||||
|
||||
Genotype = namedtuple('Genotype', 'recurrent concat')
|
||||
|
||||
PRIMITIVES = [
|
||||
'none',
|
||||
'tanh',
|
||||
'relu',
|
||||
'sigmoid',
|
||||
'identity'
|
||||
]
|
||||
STEPS = 8
|
||||
CONCAT = 8
|
||||
|
||||
ENAS = Genotype(
|
||||
recurrent = [
|
||||
('tanh', 0),
|
||||
('tanh', 1),
|
||||
('relu', 1),
|
||||
('tanh', 3),
|
||||
('tanh', 3),
|
||||
('relu', 3),
|
||||
('relu', 4),
|
||||
('relu', 7),
|
||||
('relu', 8),
|
||||
('relu', 8),
|
||||
('relu', 8),
|
||||
],
|
||||
concat = [2, 5, 6, 9, 10, 11]
|
||||
)
|
||||
|
||||
DARTS_V1 = Genotype(
|
||||
recurrent = [
|
||||
('relu', 0),
|
||||
('relu', 1),
|
||||
('tanh', 2),
|
||||
('relu', 3), ('relu', 4), ('identity', 1), ('relu', 5), ('relu', 1)
|
||||
],
|
||||
concat=range(1, 9)
|
||||
)
|
||||
|
||||
DARTS_V2 = Genotype(
|
||||
recurrent = [
|
||||
('sigmoid', 0), ('relu', 1), ('relu', 1),
|
||||
('identity', 1), ('tanh', 2), ('sigmoid', 5),
|
||||
('tanh', 3), ('relu', 5)
|
||||
],
|
||||
concat=range(1, 9)
|
||||
)
|
||||
|
||||
GDAS = Genotype(
|
||||
recurrent=[('relu', 0), ('relu', 0), ('identity', 1), ('relu', 1), ('tanh', 0), ('relu', 2), ('identity', 4), ('identity', 2)],
|
||||
concat=range(1, 9)
|
||||
)
|
||||
|
104
lib/nas_rnn/model_search.py
Normal file
104
lib/nas_rnn/model_search.py
Normal file
@ -0,0 +1,104 @@
|
||||
import copy, torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from collections import namedtuple
|
||||
from .genotypes import PRIMITIVES, STEPS, CONCAT, Genotype
|
||||
from .basemodel import DARTSCell, RNNModel
|
||||
|
||||
|
||||
class DARTSCellSearch(DARTSCell):
|
||||
|
||||
def __init__(self, ninp, nhid, dropouth, dropoutx):
|
||||
super(DARTSCellSearch, self).__init__(ninp, nhid, dropouth, dropoutx, genotype=None)
|
||||
self.bn = nn.BatchNorm1d(nhid, affine=False)
|
||||
self.check_zero = False
|
||||
|
||||
def set_check(self, check_zero):
|
||||
self.check_zero = check_zero
|
||||
|
||||
def cell(self, x, h_prev, x_mask, h_mask, arch_probs):
|
||||
s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
|
||||
s0 = self.bn(s0)
|
||||
if self.check_zero:
|
||||
arch_probs_cpu = arch_probs.cpu().tolist()
|
||||
#arch_probs = F.softmax(self.weights, dim=-1)
|
||||
|
||||
offset = 0
|
||||
states = s0.unsqueeze(0)
|
||||
for i in range(STEPS):
|
||||
if self.training:
|
||||
masked_states = states * h_mask.unsqueeze(0)
|
||||
else:
|
||||
masked_states = states
|
||||
ch = masked_states.view(-1, self.nhid).mm(self._Ws[i]).view(i+1, -1, 2*self.nhid)
|
||||
c, h = torch.split(ch, self.nhid, dim=-1)
|
||||
c = c.sigmoid()
|
||||
|
||||
s = torch.zeros_like(s0)
|
||||
for k, name in enumerate(PRIMITIVES):
|
||||
if name == 'none':
|
||||
continue
|
||||
fn = self._get_activation(name)
|
||||
unweighted = states + c * (fn(h) - states)
|
||||
if self.check_zero:
|
||||
INDEX, INDDX = [], []
|
||||
for jj in range(offset, offset+i+1):
|
||||
if arch_probs_cpu[jj][k] > 0:
|
||||
INDEX.append(jj)
|
||||
INDDX.append(jj-offset)
|
||||
if len(INDEX) == 0: continue
|
||||
s += torch.sum(arch_probs[INDEX, k].unsqueeze(-1).unsqueeze(-1) * unweighted[INDDX, :, :], dim=0)
|
||||
else:
|
||||
s += torch.sum(arch_probs[offset:offset+i+1, k].unsqueeze(-1).unsqueeze(-1) * unweighted, dim=0)
|
||||
s = self.bn(s)
|
||||
states = torch.cat([states, s.unsqueeze(0)], 0)
|
||||
offset += i+1
|
||||
output = torch.mean(states[-CONCAT:], dim=0)
|
||||
return output
|
||||
|
||||
|
||||
class RNNModelSearch(RNNModel):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(RNNModelSearch, self).__init__(*args)
|
||||
self._args = copy.deepcopy( args )
|
||||
|
||||
k = sum(i for i in range(1, STEPS+1))
|
||||
self.arch_weights = nn.Parameter(torch.Tensor(k, len(PRIMITIVES)))
|
||||
nn.init.normal_(self.arch_weights, 0, 0.001)
|
||||
|
||||
def base_parameters(self):
|
||||
lists = list(self.lockdrop.parameters())
|
||||
lists += list(self.encoder.parameters())
|
||||
lists += list(self.rnns.parameters())
|
||||
lists += list(self.decoder.parameters())
|
||||
return lists
|
||||
|
||||
def arch_parameters(self):
|
||||
return [self.arch_weights]
|
||||
|
||||
def genotype(self):
|
||||
|
||||
def _parse(probs):
|
||||
gene = []
|
||||
start = 0
|
||||
for i in range(STEPS):
|
||||
end = start + i + 1
|
||||
W = probs[start:end].copy()
|
||||
#j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[0]
|
||||
j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) ))[0]
|
||||
k_best = None
|
||||
for k in range(len(W[j])):
|
||||
#if k != PRIMITIVES.index('none'):
|
||||
# if k_best is None or W[j][k] > W[j][k_best]:
|
||||
# k_best = k
|
||||
if k_best is None or W[j][k] > W[j][k_best]:
|
||||
k_best = k
|
||||
gene.append((PRIMITIVES[k_best], j))
|
||||
start = end
|
||||
return gene
|
||||
|
||||
with torch.no_grad():
|
||||
gene = _parse(F.softmax(self.arch_weights, dim=-1).cpu().numpy())
|
||||
genotype = Genotype(recurrent=gene, concat=list(range(STEPS+1)[-CONCAT:]))
|
||||
return genotype
|
66
lib/nas_rnn/utils.py
Normal file
66
lib/nas_rnn/utils.py
Normal file
@ -0,0 +1,66 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import os, shutil
|
||||
import numpy as np
|
||||
|
||||
|
||||
def repackage_hidden(h):
|
||||
if isinstance(h, torch.Tensor):
|
||||
return h.detach()
|
||||
else:
|
||||
return tuple(repackage_hidden(v) for v in h)
|
||||
|
||||
|
||||
def batchify(data, bsz, use_cuda):
|
||||
nbatch = data.size(0) // bsz
|
||||
data = data.narrow(0, 0, nbatch * bsz)
|
||||
data = data.view(bsz, -1).t().contiguous()
|
||||
if use_cuda: return data.cuda()
|
||||
else : return data
|
||||
|
||||
|
||||
def get_batch(source, i, seq_len):
|
||||
seq_len = min(seq_len, len(source) - 1 - i)
|
||||
data = source[i:i+seq_len].clone()
|
||||
target = source[i+1:i+1+seq_len].clone()
|
||||
return data, target
|
||||
|
||||
|
||||
|
||||
def embedded_dropout(embed, words, dropout=0.1, scale=None):
|
||||
if dropout:
|
||||
mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
|
||||
mask.requires_grad_(True)
|
||||
masked_embed_weight = mask * embed.weight
|
||||
else:
|
||||
masked_embed_weight = embed.weight
|
||||
if scale:
|
||||
masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
|
||||
|
||||
padding_idx = embed.padding_idx
|
||||
if padding_idx is None:
|
||||
padding_idx = -1
|
||||
X = torch.nn.functional.embedding(
|
||||
words, masked_embed_weight,
|
||||
padding_idx, embed.max_norm, embed.norm_type,
|
||||
embed.scale_grad_by_freq, embed.sparse)
|
||||
return X
|
||||
|
||||
|
||||
class LockedDropout(nn.Module):
|
||||
def __init__(self):
|
||||
super(LockedDropout, self).__init__()
|
||||
|
||||
def forward(self, x, dropout=0.5):
|
||||
if not self.training or not dropout:
|
||||
return x
|
||||
m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
|
||||
mask = m.div_(1 - dropout).detach()
|
||||
mask = mask.expand_as(x)
|
||||
return mask * x
|
||||
|
||||
|
||||
def mask2d(B, D, keep_prob, cuda=True):
|
||||
m = torch.floor(torch.rand(B, D) + keep_prob) / keep_prob
|
||||
if cuda: return m.cuda()
|
||||
else : return m
|
2
lib/scheduler/__init__.py
Normal file
2
lib/scheduler/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .utils import load_config
|
||||
from .scheduler import MultiStepLR, obtain_scheduler
|
29
lib/scheduler/scheduler.py
Normal file
29
lib/scheduler/scheduler.py
Normal file
@ -0,0 +1,29 @@
|
||||
import torch
|
||||
from bisect import bisect_right
|
||||
|
||||
|
||||
class MultiStepLR(torch.optim.lr_scheduler._LRScheduler):
|
||||
|
||||
def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
|
||||
if not list(milestones) == sorted(milestones):
|
||||
raise ValueError('Milestones should be a list of'
|
||||
' increasing integers. Got {:}', milestones)
|
||||
assert len(milestones) == len(gammas), '{:} vs {:}'.format(milestones, gammas)
|
||||
self.milestones = milestones
|
||||
self.gammas = gammas
|
||||
super(MultiStepLR, self).__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self):
|
||||
LR = 1
|
||||
for x in self.gammas[:bisect_right(self.milestones, self.last_epoch)]: LR = LR * x
|
||||
return [base_lr * LR for base_lr in self.base_lrs]
|
||||
|
||||
|
||||
def obtain_scheduler(config, optimizer):
|
||||
if config.type == 'multistep':
|
||||
scheduler = MultiStepLR(optimizer, milestones=config.milestones, gammas=config.gammas)
|
||||
elif config.type == 'cosine':
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)
|
||||
else:
|
||||
raise ValueError('Unknown learning rate scheduler type : {:}'.format(config.type))
|
||||
return scheduler
|
46
lib/scheduler/utils.py
Normal file
46
lib/scheduler/utils.py
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
#
|
||||
import os, sys, json
|
||||
from pathlib import Path
|
||||
from collections import namedtuple
|
||||
|
||||
support_types = ('str', 'int', 'bool', 'float')
|
||||
|
||||
def convert_param(original_lists):
|
||||
assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
|
||||
ctype, value = original_lists[0], original_lists[1]
|
||||
assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
|
||||
is_list = isinstance(value, list)
|
||||
if not is_list: value = [value]
|
||||
outs = []
|
||||
for x in value:
|
||||
if ctype == 'int':
|
||||
x = int(x)
|
||||
elif ctype == 'str':
|
||||
x = str(x)
|
||||
elif ctype == 'bool':
|
||||
x = bool(int(x))
|
||||
elif ctype == 'float':
|
||||
x = float(x)
|
||||
else:
|
||||
raise TypeError('Does not know this type : {:}'.format(ctype))
|
||||
outs.append(x)
|
||||
if not is_list: outs = outs[0]
|
||||
return outs
|
||||
|
||||
def load_config(path):
|
||||
path = str(path)
|
||||
assert os.path.exists(path), 'Can not find {:}'.format(path)
|
||||
# Reading data back
|
||||
with open(path, 'r') as f:
|
||||
data = json.load(f)
|
||||
f.close()
|
||||
content = { k: convert_param(v) for k,v in data.items()}
|
||||
Arguments = namedtuple('Configure', ' '.join(content.keys()))
|
||||
content = Arguments(**content)
|
||||
return content
|
14
lib/utils/__init__.py
Normal file
14
lib/utils/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
from .utils import AverageMeter, RecorderMeter, convert_secs2time
|
||||
from .utils import time_file_str, time_string
|
||||
from .utils import test_imagenet_data
|
||||
from .utils import print_log
|
||||
from .evaluation_utils import obtain_accuracy
|
||||
from .draw_pts import draw_points
|
||||
from .fb_transform import ApplyOffset
|
||||
from .gpu_manager import GPUManager
|
||||
|
||||
from .save_meta import Save_Meta
|
||||
|
||||
from .model_utils import count_parameters_in_MB
|
||||
from .model_utils import Cutout
|
||||
from .flop_benchmark import print_FLOPs
|
41
lib/utils/draw_pts.py
Normal file
41
lib/utils/draw_pts.py
Normal file
@ -0,0 +1,41 @@
|
||||
import os, sys, time
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
import random
|
||||
matplotlib.use('agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.cm as cm
|
||||
|
||||
def draw_points(points, labels, save_path):
|
||||
title = 'the visualized features'
|
||||
dpi = 100
|
||||
width, height = 1000, 1000
|
||||
legend_fontsize = 10
|
||||
figsize = width / float(dpi), height / float(dpi)
|
||||
fig = plt.figure(figsize=figsize)
|
||||
|
||||
classes = np.unique(labels).tolist()
|
||||
colors = cm.rainbow(np.linspace(0, 1, len(classes)))
|
||||
|
||||
legends = []
|
||||
legendnames = []
|
||||
|
||||
for cls, c in zip(classes, colors):
|
||||
|
||||
indexes = labels == cls
|
||||
ptss = points[indexes, :]
|
||||
x = ptss[:,0]
|
||||
y = ptss[:,1]
|
||||
if cls % 2 == 0: marker = 'x'
|
||||
else: marker = 'o'
|
||||
legend = plt.scatter(x, y, color=c, s=1, marker=marker)
|
||||
legendname = '{:02d}'.format(cls+1)
|
||||
legends.append( legend )
|
||||
legendnames.append( legendname )
|
||||
|
||||
plt.legend(legends, legendnames, scatterpoints=1, ncol=5, fontsize=8)
|
||||
|
||||
if save_path is not None:
|
||||
fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
|
||||
print ('---- save figure {} into {}'.format(title, save_path))
|
||||
plt.close(fig)
|
16
lib/utils/evaluation_utils.py
Normal file
16
lib/utils/evaluation_utils.py
Normal file
@ -0,0 +1,16 @@
|
||||
import torch
|
||||
|
||||
def obtain_accuracy(output, target, topk=(1,)):
|
||||
"""Computes the precision@k for the specified values of k"""
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
14
lib/utils/fb_transform.py
Normal file
14
lib/utils/fb_transform.py
Normal file
@ -0,0 +1,14 @@
|
||||
import torch
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
class ApplyOffset(object):
|
||||
def __init__(self, offset):
|
||||
assert isinstance(offset, int), 'The offset is not right : {}'.format(offset)
|
||||
self.offset = offset
|
||||
def __call__(self, x):
|
||||
if isinstance(x, np.ndarray) and x.dtype == 'uint8':
|
||||
x = x.astype(int)
|
||||
if isinstance(x, np.ndarray) and x.size == 1:
|
||||
x = int(x)
|
||||
return x + self.offset
|
113
lib/utils/flop_benchmark.py
Normal file
113
lib/utils/flop_benchmark.py
Normal file
@ -0,0 +1,113 @@
|
||||
# modified from https://github.com/warmspringwinds/pytorch-segmentation-detection/blob/master/pytorch_segmentation_detection/utils/flops_benchmark.py
|
||||
import copy, torch
|
||||
|
||||
def print_FLOPs(model, shape, logs):
|
||||
print_log, log = logs
|
||||
model = copy.deepcopy( model )
|
||||
|
||||
model = add_flops_counting_methods(model)
|
||||
model = model.cuda()
|
||||
model.eval()
|
||||
|
||||
cache_inputs = torch.zeros(*shape).cuda()
|
||||
#print_log('In the calculating function : cache input size : {:}'.format(cache_inputs.size()), log)
|
||||
_ = model(cache_inputs)
|
||||
FLOPs = compute_average_flops_cost( model ) / 1e6
|
||||
print_log('FLOPs : {:} MB'.format(FLOPs), log)
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
# ---- Public functions
|
||||
def add_flops_counting_methods( model ):
|
||||
model.__batch_counter__ = 0
|
||||
add_batch_counter_hook_function( model )
|
||||
model.apply( add_flops_counter_variable_or_reset )
|
||||
model.apply( add_flops_counter_hook_function )
|
||||
return model
|
||||
|
||||
|
||||
|
||||
def compute_average_flops_cost(model):
|
||||
"""
|
||||
A method that will be available after add_flops_counting_methods() is called on a desired net object.
|
||||
Returns current mean flops consumption per image.
|
||||
"""
|
||||
batches_count = model.__batch_counter__
|
||||
flops_sum = 0
|
||||
for module in model.modules():
|
||||
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
|
||||
flops_sum += module.__flops__
|
||||
return flops_sum / batches_count
|
||||
|
||||
|
||||
# ---- Internal functions
|
||||
def pool_flops_counter_hook(pool_module, inputs, output):
|
||||
batch_size = inputs[0].size(0)
|
||||
kernel_size = pool_module.kernel_size
|
||||
out_C, output_height, output_width = output.shape[1:]
|
||||
assert out_C == inputs[0].size(1), '{:} vs. {:}'.format(out_C, inputs[0].size())
|
||||
|
||||
overall_flops = batch_size * out_C * output_height * output_width * kernel_size * kernel_size
|
||||
pool_module.__flops__ += overall_flops
|
||||
|
||||
|
||||
def fc_flops_counter_hook(fc_module, inputs, output):
|
||||
batch_size = inputs[0].size(0)
|
||||
xin, xout = fc_module.in_features, fc_module.out_features
|
||||
assert xin == inputs[0].size(1) and xout == output.size(1), 'IO=({:}, {:})'.format(xin, xout)
|
||||
overall_flops = batch_size * xin * xout
|
||||
if fc_module.bias is not None:
|
||||
overall_flops += batch_size * xout
|
||||
fc_module.__flops__ += overall_flops
|
||||
|
||||
|
||||
def conv_flops_counter_hook(conv_module, inputs, output):
|
||||
batch_size = inputs[0].size(0)
|
||||
output_height, output_width = output.shape[2:]
|
||||
|
||||
kernel_height, kernel_width = conv_module.kernel_size
|
||||
in_channels = conv_module.in_channels
|
||||
out_channels = conv_module.out_channels
|
||||
groups = conv_module.groups
|
||||
conv_per_position_flops = kernel_height * kernel_width * in_channels * out_channels / groups
|
||||
|
||||
active_elements_count = batch_size * output_height * output_width
|
||||
overall_flops = conv_per_position_flops * active_elements_count
|
||||
|
||||
if conv_module.bias is not None:
|
||||
overall_flops += out_channels * active_elements_count
|
||||
conv_module.__flops__ += overall_flops
|
||||
|
||||
|
||||
def batch_counter_hook(module, inputs, output):
|
||||
# Can have multiple inputs, getting the first one
|
||||
inputs = inputs[0]
|
||||
batch_size = inputs.shape[0]
|
||||
module.__batch_counter__ += batch_size
|
||||
|
||||
|
||||
def add_batch_counter_hook_function(module):
|
||||
if not hasattr(module, '__batch_counter_handle__'):
|
||||
handle = module.register_forward_hook(batch_counter_hook)
|
||||
module.__batch_counter_handle__ = handle
|
||||
|
||||
|
||||
def add_flops_counter_variable_or_reset(module):
|
||||
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
|
||||
or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
|
||||
module.__flops__ = 0
|
||||
|
||||
|
||||
def add_flops_counter_hook_function(module):
|
||||
if isinstance(module, torch.nn.Conv2d):
|
||||
if not hasattr(module, '__flops_handle__'):
|
||||
handle = module.register_forward_hook(conv_flops_counter_hook)
|
||||
module.__flops_handle__ = handle
|
||||
elif isinstance(module, torch.nn.Linear):
|
||||
if not hasattr(module, '__flops_handle__'):
|
||||
handle = module.register_forward_hook(fc_flops_counter_hook)
|
||||
module.__flops_handle__ = handle
|
||||
elif isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
|
||||
if not hasattr(module, '__flops_handle__'):
|
||||
handle = module.register_forward_hook(pool_flops_counter_hook)
|
||||
module.__flops_handle__ = handle
|
70
lib/utils/gpu_manager.py
Normal file
70
lib/utils/gpu_manager.py
Normal file
@ -0,0 +1,70 @@
|
||||
import os
|
||||
|
||||
class GPUManager():
|
||||
queries = ('index', 'gpu_name', 'memory.free', 'memory.used', 'memory.total', 'power.draw', 'power.limit')
|
||||
|
||||
def __init__(self):
|
||||
all_gpus = self.query_gpu(False)
|
||||
|
||||
def get_info(self, ctype):
|
||||
cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(ctype)
|
||||
lines = os.popen(cmd).readlines()
|
||||
lines = [line.strip('\n') for line in lines]
|
||||
return lines
|
||||
|
||||
def query_gpu(self, show=True):
|
||||
num_gpus = len( self.get_info('index') )
|
||||
all_gpus = [ {} for i in range(num_gpus) ]
|
||||
for query in self.queries:
|
||||
infos = self.get_info(query)
|
||||
for idx, info in enumerate(infos):
|
||||
all_gpus[idx][query] = info
|
||||
|
||||
if 'CUDA_VISIBLE_DEVICES' in os.environ:
|
||||
CUDA_VISIBLE_DEVICES = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
|
||||
selected_gpus = []
|
||||
for idx, CUDA_VISIBLE_DEVICE in enumerate(CUDA_VISIBLE_DEVICES):
|
||||
find = False
|
||||
for gpu in all_gpus:
|
||||
if gpu['index'] == CUDA_VISIBLE_DEVICE:
|
||||
assert find==False, 'Duplicate cuda device index : {}'.format(CUDA_VISIBLE_DEVICE)
|
||||
find = True
|
||||
selected_gpus.append( gpu.copy() )
|
||||
selected_gpus[-1]['index'] = '{}'.format(idx)
|
||||
assert find, 'Does not find the device : {}'.format(CUDA_VISIBLE_DEVICE)
|
||||
all_gpus = selected_gpus
|
||||
|
||||
if show:
|
||||
allstrings = ''
|
||||
for gpu in all_gpus:
|
||||
string = '| '
|
||||
for query in self.queries:
|
||||
if query.find('memory') == 0: xinfo = '{:>9}'.format(gpu[query])
|
||||
else: xinfo = gpu[query]
|
||||
string = string + query + ' : ' + xinfo + ' | '
|
||||
allstrings = allstrings + string + '\n'
|
||||
return allstrings
|
||||
else:
|
||||
return all_gpus
|
||||
|
||||
def select_by_memory(self, numbers=1):
|
||||
all_gpus = self.query_gpu(False)
|
||||
assert numbers <= len(all_gpus), 'Require {} gpus more than you have'.format(numbers)
|
||||
alls = []
|
||||
for idx, gpu in enumerate(all_gpus):
|
||||
free_memory = gpu['memory.free']
|
||||
free_memory = free_memory.split(' ')[0]
|
||||
free_memory = int(free_memory)
|
||||
index = gpu['index']
|
||||
alls.append((free_memory, index))
|
||||
alls.sort(reverse = True)
|
||||
alls = [ int(alls[i][1]) for i in range(numbers) ]
|
||||
return sorted(alls)
|
||||
|
||||
"""
|
||||
if __name__ == '__main__':
|
||||
manager = GPUManager()
|
||||
manager.query_gpu(True)
|
||||
indexes = manager.select_by_memory(3)
|
||||
print (indexes)
|
||||
"""
|
34
lib/utils/model_utils.py
Normal file
34
lib/utils/model_utils.py
Normal file
@ -0,0 +1,34 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
def count_parameters_in_MB(model):
|
||||
if isinstance(model, nn.Module):
|
||||
return np.sum(np.prod(v.size()) for v in model.parameters())/1e6
|
||||
else:
|
||||
return np.sum(np.prod(v.size()) for v in model)/1e6
|
||||
|
||||
|
||||
class Cutout(object):
|
||||
def __init__(self, length):
|
||||
self.length = length
|
||||
|
||||
def __repr__(self):
|
||||
return ('{name}(length={length})'.format(name=self.__class__.__name__, **self.__dict__))
|
||||
|
||||
def __call__(self, img):
|
||||
h, w = img.size(1), img.size(2)
|
||||
mask = np.ones((h, w), np.float32)
|
||||
y = np.random.randint(h)
|
||||
x = np.random.randint(w)
|
||||
|
||||
y1 = np.clip(y - self.length // 2, 0, h)
|
||||
y2 = np.clip(y + self.length // 2, 0, h)
|
||||
x1 = np.clip(x - self.length // 2, 0, w)
|
||||
x2 = np.clip(x + self.length // 2, 0, w)
|
||||
|
||||
mask[y1: y2, x1: x2] = 0.
|
||||
mask = torch.from_numpy(mask)
|
||||
mask = mask.expand_as(img)
|
||||
img *= mask
|
||||
return img
|
50
lib/utils/save_meta.py
Normal file
50
lib/utils/save_meta.py
Normal file
@ -0,0 +1,50 @@
|
||||
import torch
|
||||
import os, sys
|
||||
import os.path as osp
|
||||
import numpy as np
|
||||
|
||||
def tensor2np(x):
|
||||
if isinstance(x, np.ndarray): return x
|
||||
if x.is_cuda: x = x.cpu()
|
||||
return x.numpy()
|
||||
|
||||
class Save_Meta():
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def __repr__(self):
|
||||
return ('{name}'.format(name=self.__class__.__name__)+'(number of data = {})'.format(len(self)))
|
||||
|
||||
def reset(self):
|
||||
self.predictions = []
|
||||
self.groundtruth = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.predictions)
|
||||
|
||||
def append(self, _pred, _ground):
|
||||
_pred, _ground = tensor2np(_pred), tensor2np(_ground)
|
||||
assert _ground.shape[0] == _pred.shape[0] and len(_pred.shape) == 2 and len(_ground.shape) == 1, 'The shapes are wrong : {} & {}'.format(_pred.shape, _ground.shape)
|
||||
self.predictions.append(_pred)
|
||||
self.groundtruth.append(_ground)
|
||||
|
||||
def save(self, save_dir, filename, test=True):
|
||||
meta = {'predictions': self.predictions,
|
||||
'groundtruth': self.groundtruth}
|
||||
filename = osp.join(save_dir, filename)
|
||||
torch.save(meta, filename)
|
||||
if test:
|
||||
predictions = np.concatenate(self.predictions)
|
||||
groundtruth = np.concatenate(self.groundtruth)
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
accuracy = np.sum(groundtruth==predictions) * 100.0 / predictions.size
|
||||
else:
|
||||
accuracy = None
|
||||
print ('save save_meta into {} with accuracy = {}'.format(filename, accuracy))
|
||||
|
||||
def load(self, filename):
|
||||
assert os.path.isfile(filename), '{} is not a file'.format(filename)
|
||||
checkpoint = torch.load(filename)
|
||||
self.predictions = checkpoint['predictions']
|
||||
self.groundtruth = checkpoint['groundtruth']
|
137
lib/utils/utils.py
Normal file
137
lib/utils/utils.py
Normal file
@ -0,0 +1,137 @@
|
||||
import os, sys, time
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
import random
|
||||
matplotlib.use('agg')
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
|
||||
class RecorderMeter(object):
|
||||
"""Computes and stores the minimum loss value and its epoch index"""
|
||||
def __init__(self, total_epoch):
|
||||
self.reset(total_epoch)
|
||||
|
||||
def reset(self, total_epoch):
|
||||
assert total_epoch > 0
|
||||
self.total_epoch = total_epoch
|
||||
self.current_epoch = 0
|
||||
self.epoch_losses = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
|
||||
self.epoch_losses = self.epoch_losses - 1
|
||||
|
||||
self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
|
||||
self.epoch_accuracy= self.epoch_accuracy
|
||||
|
||||
def update(self, idx, train_loss, train_acc, val_loss, val_acc):
|
||||
assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
|
||||
self.epoch_losses [idx, 0] = train_loss
|
||||
self.epoch_losses [idx, 1] = val_loss
|
||||
self.epoch_accuracy[idx, 0] = train_acc
|
||||
self.epoch_accuracy[idx, 1] = val_acc
|
||||
self.current_epoch = idx + 1
|
||||
return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]
|
||||
|
||||
def max_accuracy(self, istrain):
|
||||
if self.current_epoch <= 0: return 0
|
||||
if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
|
||||
else: return self.epoch_accuracy[:self.current_epoch, 1].max()
|
||||
|
||||
def plot_curve(self, save_path):
|
||||
title = 'the accuracy/loss curve of train/val'
|
||||
dpi = 100
|
||||
width, height = 1600, 1000
|
||||
legend_fontsize = 10
|
||||
figsize = width / float(dpi), height / float(dpi)
|
||||
|
||||
fig = plt.figure(figsize=figsize)
|
||||
x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
|
||||
y_axis = np.zeros(self.total_epoch)
|
||||
|
||||
plt.xlim(0, self.total_epoch)
|
||||
plt.ylim(0, 100)
|
||||
interval_y = 5
|
||||
interval_x = 5
|
||||
plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
|
||||
plt.yticks(np.arange(0, 100 + interval_y, interval_y))
|
||||
plt.grid()
|
||||
plt.title(title, fontsize=20)
|
||||
plt.xlabel('the training epoch', fontsize=16)
|
||||
plt.ylabel('accuracy', fontsize=16)
|
||||
|
||||
y_axis[:] = self.epoch_accuracy[:, 0]
|
||||
plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
|
||||
plt.legend(loc=4, fontsize=legend_fontsize)
|
||||
|
||||
y_axis[:] = self.epoch_accuracy[:, 1]
|
||||
plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
|
||||
plt.legend(loc=4, fontsize=legend_fontsize)
|
||||
|
||||
|
||||
y_axis[:] = self.epoch_losses[:, 0]
|
||||
plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
|
||||
plt.legend(loc=4, fontsize=legend_fontsize)
|
||||
|
||||
y_axis[:] = self.epoch_losses[:, 1]
|
||||
plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
|
||||
plt.legend(loc=4, fontsize=legend_fontsize)
|
||||
|
||||
if save_path is not None:
|
||||
fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
|
||||
print ('---- save figure {} into {}'.format(title, save_path))
|
||||
plt.close(fig)
|
||||
|
||||
def print_log(print_string, log):
|
||||
print("{}".format(print_string))
|
||||
if log is not None:
|
||||
log.write('{}\n'.format(print_string))
|
||||
log.flush()
|
||||
|
||||
def time_file_str():
|
||||
ISOTIMEFORMAT='%Y-%m-%d'
|
||||
string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
|
||||
return string + '-{}'.format(random.randint(1, 10000))
|
||||
|
||||
def time_string():
|
||||
ISOTIMEFORMAT='%Y-%m-%d-%X'
|
||||
string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
|
||||
return string
|
||||
|
||||
def convert_secs2time(epoch_time, return_str=False):
|
||||
need_hour = int(epoch_time / 3600)
|
||||
need_mins = int((epoch_time - 3600*need_hour) / 60)
|
||||
need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
|
||||
if return_str == False:
|
||||
return need_hour, need_mins, need_secs
|
||||
else:
|
||||
return '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
|
||||
|
||||
def test_imagenet_data(imagenet):
|
||||
total_length = len(imagenet)
|
||||
assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
|
||||
map_id = {}
|
||||
for index in range(total_length):
|
||||
path, target = imagenet.imgs[index]
|
||||
folder, image_name = os.path.split(path)
|
||||
_, folder = os.path.split(folder)
|
||||
if folder not in map_id:
|
||||
map_id[folder] = target
|
||||
else:
|
||||
assert map_id[folder] == target, 'Class : {} is not {}'.format(folder, target)
|
||||
assert image_name.find(folder) == 0, '{} is wrong.'.format(path)
|
||||
print ('Check ImageNet Dataset OK')
|
30
scripts-cnn/DMS-V-Train.sh
Normal file
30
scripts-cnn/DMS-V-Train.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 3 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 3 parameters for the GPUs and the epochs and the cutout"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=acc2
|
||||
cutout=$3
|
||||
dataset=cifar10
|
||||
epoch=$2
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.05 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--tau_max 10 --tau_min 4 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
30
scripts-cnn/DMS-V-TrainV3.sh
Normal file
30
scripts-cnn/DMS-V-TrainV3.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the epochs"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=acc2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=$2
|
||||
SAVED=./snapshots/NAS/ACC-V3-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v3.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.01 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--tau_max 10 --tau_min 1 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
57
scripts-cnn/README.md
Normal file
57
scripts-cnn/README.md
Normal file
@ -0,0 +1,57 @@
|
||||
# Neural-Architecture-Search
|
||||
|
||||
### Baseline
|
||||
```
|
||||
bash ./scripts-nas/search.sh 1 base cifar10
|
||||
bash ./scripts-nas/search.sh 1 share
|
||||
bash ./scripts-nas/batch-base-search.sh 1
|
||||
bash ./scripts-nas/batch-base-model.sh 1
|
||||
```
|
||||
|
||||
### Meta
|
||||
```
|
||||
bash ./scripts-nas/meta-search.sh 0 meta 20 5
|
||||
```
|
||||
|
||||
### Acceleration
|
||||
```
|
||||
bash ./scripts-nas/search-acc-v2.sh 3 acc2
|
||||
bash ./scripts-nas/DMS-V-Train.sh 0
|
||||
|
||||
bash ./scripts-nas/search-acc-simple.sh 3 NetworkV2
|
||||
```
|
||||
|
||||
### Base Model Training
|
||||
```
|
||||
bash ./scripts-nas/train-model.sh 3 AmoebaNet
|
||||
bash ./scripts-nas/train-model.sh 3 NASNet
|
||||
bash ./scripts-nas/train-model.sh 3 DARTS_V1
|
||||
bash ./scripts-nas/train-model-simple.sh 3 AmoebaNet
|
||||
bash ./scripts-nas/train-imagenet.sh 3 DARTS_V2 50 14
|
||||
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 PNASNet cifar10 nocut 48 11
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 AmoebaNet cifar10 nocut 36 20
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 NASNet cifar10 nocut 33 20
|
||||
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 DMS_F1 cifar10 nocut 36 20
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut 36 20
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 GDAS_CC cifar10 nocut 36 20
|
||||
bash scripts-nas/train-imagenet.sh 3 DMS_F1 52 14
|
||||
bash scripts-nas/train-imagenet.sh 3 DMS_V1 50 14
|
||||
|
||||
|
||||
bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut 36 20
|
||||
```
|
||||
|
||||
|
||||
### Visualization
|
||||
```
|
||||
python ./exps-nas/vis-arch.py --checkpoint --save_dir
|
||||
python ./exps-nas/cvpr-vis.py --save_dir ./snapshots/NAS-VIS/
|
||||
```
|
||||
|
||||
### Test datasets
|
||||
```
|
||||
cd ./lib/datasets/
|
||||
python test_NLP.py
|
||||
```
|
30
scripts-cnn/TRAIN-BASE.sh
Normal file
30
scripts-cnn/TRAIN-BASE.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env sh
|
||||
# bash scripts-nas/TRAIN-BASE.sh 0 DMS_V1 cifar10 nocut init-channel layers
|
||||
if [ "$#" -ne 6 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 6 parameters for the GPUs, the architecture, the dataset, the config, the initial channel, and the number of layers"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
dataset=$3
|
||||
config=$4
|
||||
C=$5
|
||||
N=$6
|
||||
SAVED=./snapshots/NAS/${arch}-${C}-${N}-${dataset}-${config}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--dataset ${dataset} --arch ${arch} \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--init_channels ${C} --layers ${N} \
|
||||
--model_config ./configs/nas-cifar-cos-${config}.config \
|
||||
--print_freq 100 --workers 8
|
23
scripts-cnn/batch-base-model.sh
Normal file
23
scripts-cnn/batch-base-model.sh
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env sh
|
||||
set -e
|
||||
if [ "$#" -ne 1 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 1 parameters for the GPUs"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
|
||||
bash ./scripts-nas/train-model.sh ${gpus} AmoebaNet 0
|
||||
|
||||
bash ./scripts-nas/train-model.sh ${gpus} NASNet 0
|
||||
|
||||
bash ./scripts-nas/train-model.sh ${gpus} DARTS_V1 0
|
||||
|
||||
bash ./scripts-nas/train-model.sh ${gpus} DARTS_V2 0
|
19
scripts-cnn/batch-base-search.sh
Normal file
19
scripts-cnn/batch-base-search.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 1 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 1 parameters for the GPUs"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
Times="1 2 3"
|
||||
|
||||
for time in ${Times}; do
|
||||
bash ./scripts-nas/search.sh ${gpus}
|
||||
done
|
30
scripts-cnn/meta-search.sh
Normal file
30
scripts-cnn/meta-search.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 4 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 4 parameters for the GPUs and the network and N-way and K-shot"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
n_way=$3
|
||||
k_shot=$4
|
||||
cutout=16
|
||||
epoch=60
|
||||
SAVED=./snapshots/NAS/Meta-Search-${arch}-N${n_way}-K${k_shot}-cut${cutout}-${epoch}
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/meta_search.py \
|
||||
--data_path $TORCH_HOME/tiered-imagenet \
|
||||
--arch ${arch} --n_way ${n_way} --k_shot ${k_shot} \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.001 --learning_rate_min 0.0001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos-cut.config \
|
||||
--print_freq 200 --workers 16
|
29
scripts-cnn/search-acc-simple.sh
Normal file
29
scripts-cnn/search-acc-simple.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=100
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E100
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos-simple.config \
|
||||
--print_freq 100 --workers 8
|
29
scripts-cnn/search-acc-v2-E150.sh
Normal file
29
scripts-cnn/search-acc-v2-E150.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=150
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
29
scripts-cnn/search-acc-v2-E200.sh
Normal file
29
scripts-cnn/search-acc-v2-E200.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=200
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
29
scripts-cnn/search-acc-v2-E300.sh
Normal file
29
scripts-cnn/search-acc-v2-E300.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=300
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
29
scripts-cnn/search-acc-v2-E50.sh
Normal file
29
scripts-cnn/search-acc-v2-E50.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=50
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
29
scripts-cnn/search-acc-v2.sh
Normal file
29
scripts-cnn/search-acc-v2.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the network"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=cifar10
|
||||
epoch=100
|
||||
SAVED=./snapshots/NAS/ACC-V2-Search-${arch}-${dataset}-cut${cutout}-${epoch}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/acc_search_v2.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--arch ${arch} --dataset ${dataset} --batch_size 128 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
45
scripts-cnn/search.sh
Normal file
45
scripts-cnn/search.sh
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 3 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 3 parameters for the GPUs and the network and the dataset"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
cutout=0
|
||||
dataset=$3
|
||||
epoch=50
|
||||
SAVED=./snapshots/NAS/Search-${arch}-${dataset}-cut${cutout}-${epoch}
|
||||
|
||||
if [ "$dataset" == "cifar10" ] ;then
|
||||
dataset_root=$TORCH_HOME/cifar.python
|
||||
print_freq=100
|
||||
elif [ "$dataset" == "cifar100" ] ;then
|
||||
dataset_root=$TORCH_HOME/cifar.python
|
||||
print_freq=100
|
||||
elif [ "$dataset" == "tiered" ] ;then
|
||||
dataset_root=$TORCH_HOME/tiered-imagenet
|
||||
print_freq=500
|
||||
else
|
||||
echo 'invalid dataset-name :'${dataset}
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_search.py \
|
||||
--data_path ${dataset_root} \
|
||||
--arch ${arch} \
|
||||
--dataset ${dataset} --batch_size 64 \
|
||||
--save_path ${SAVED} \
|
||||
--learning_rate_max 0.025 --learning_rate_min 0.001 --momentum 0.9 --weight_decay 0.0003 \
|
||||
--epochs ${epoch} --cutout ${cutout} --validate --grad_clip 5 \
|
||||
--init_channels 16 --layers 8 \
|
||||
--manualSeed 3858 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq ${print_freq} --workers 8
|
26
scripts-cnn/train-cifar100.sh
Normal file
26
scripts-cnn/train-cifar100.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs, the architecture"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
dataset=cifar100
|
||||
SAVED=./snapshots/NAS/${arch}-${dataset}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--dataset ${dataset} --arch ${arch} \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--init_channels 36 --layers 20 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
28
scripts-cnn/train-imagenet.sh
Normal file
28
scripts-cnn/train-imagenet.sh
Normal file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 4 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 4 parameters for the GPUs, the architecture, and the channel and the layers"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
dataset=imagenet
|
||||
channels=$3
|
||||
layers=$4
|
||||
SAVED=./snapshots/NAS/${arch}-${dataset}-C${channels}-L${layers}-E250
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
|
||||
--data_path $TORCH_HOME/ILSVRC2012 \
|
||||
--dataset ${dataset} --arch ${arch} \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--init_channels ${channels} --layers ${layers} \
|
||||
--model_config ./configs/nas-imagenet.config \
|
||||
--print_freq 200 --workers 20
|
25
scripts-cnn/train-model-simple.sh
Normal file
25
scripts-cnn/train-model-simple.sh
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs and the architecture"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
dataset=cifar10
|
||||
SAVED=./snapshots/NAS/${arch}-${dataset}-E100
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--dataset ${dataset} --arch ${arch} \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--model_config ./configs/nas-cifar-cos-simple.config \
|
||||
--print_freq 100 --workers 8
|
26
scripts-cnn/train-model.sh
Normal file
26
scripts-cnn/train-model.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 2 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 2 parameters for the GPUs, the architecture"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
echo "Must set TORCH_HOME envoriment variable for data dir saving"
|
||||
exit 1
|
||||
else
|
||||
echo "TORCH_HOME : $TORCH_HOME"
|
||||
fi
|
||||
|
||||
gpus=$1
|
||||
arch=$2
|
||||
dataset=cifar10
|
||||
SAVED=./snapshots/NAS/${arch}-${dataset}-E600
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${gpus} python ./exps-nas/train_base.py \
|
||||
--data_path $TORCH_HOME/cifar.python \
|
||||
--dataset ${dataset} --arch ${arch} \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--init_channels 36 --layers 20 \
|
||||
--model_config ./configs/nas-cifar-cos.config \
|
||||
--print_freq 100 --workers 8
|
9
scripts-cnn/vis.sh
Normal file
9
scripts-cnn/vis.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
seeds="seed-8167 seed-908 seed-9242"
|
||||
for seed in ${seeds}; do
|
||||
python ./exps-nas/vis-arch.py --checkpoint ./snapshots/NAS/Search-cifar10-cut16-100/${seed}/checkpoint-search.pth \
|
||||
--save_dir ./snapshots/NAS-VIS/Search-cut16-100/${seed}
|
||||
done
|
||||
|
||||
|
12
scripts-rnn/README.md
Normal file
12
scripts-rnn/README.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Search RNN cell
|
||||
```
|
||||
bash scripts-nas-rnn/search-baseline.sh 3
|
||||
bash scripts-nas-rnn/search-accelerate.sh 0 200 10 1
|
||||
```
|
||||
|
||||
# Train the Searched Model
|
||||
```
|
||||
bash scripts-nas-rnn/train-PTB.sh 3 DARTS_V1
|
||||
bash scripts-nas-rnn/train-WT2.sh 3 DARTS_V1
|
||||
bash scripts-nas-rnn/train-PTB.sh 3 DARTS_V2
|
||||
```
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user