From c72e66b66ca589bb7bb68c357d9850373d54808b Mon Sep 17 00:00:00 2001
From: D-X-Y <280835372@qq.com>
Date: Fri, 15 Nov 2019 17:15:07 +1100
Subject: [PATCH] update 10 NAS algs

---
 .latent-data/BASELINE.md                      | 138 ----------
 AA-NAS-Bench.md                               | 109 --------
 LICENSE.md                                    |  21 ++
 README.md                                     | 136 ++++++++++
 lib/aa_nas_api/__init__.py                    |   3 +
 lib/aa_nas_api/api.py                         |   3 +
 lib/config_utils/__init__.py                  |   3 +
 lib/config_utils/basic_args.py                |   3 +
 lib/datasets/DownsampledImageNet.py           |   3 +
 lib/datasets/SearchDatasetWrap.py             |   3 +
 lib/datasets/__init__.py                      |   3 +
 lib/datasets/get_dataset_with_transform.py    |   3 +
 lib/datasets/test_utils.py                    |   3 +
 lib/log_utils/__init__.py                     |   3 +
 lib/models/SharedUtils.py                     |   3 +
 lib/models/__init__.py                        |   3 +
 lib/models/cell_infers/__init__.py            |   3 +
 lib/models/cell_operations.py                 |   3 +
 lib/models/cell_searchs/__init__.py           |   3 +
 lib/models/cell_searchs/_test_module.py       |   3 +
 lib/models/cell_searchs/cells.py              | 115 --------
 lib/models/cell_searchs/genotypes.py          |   3 +
 lib/models/cell_searchs/search_cells.py       |   3 +
 .../cell_searchs/search_model_darts_v1.py     |   2 +
 .../cell_searchs/search_model_darts_v2.py     |   2 +
 lib/models/cell_searchs/search_model_enas.py  |   2 +
 .../cell_searchs/search_model_enas_utils.py   |   2 +
 .../cell_searchs/search_model_random.py       |   2 +
 lib/models/cell_searchs/search_model_setn.py  |   2 +
 lib/models/shape_searchs/SearchCifarResNet.py |   3 +
 .../shape_searchs/SearchCifarResNet_depth.py  |   3 +
 .../shape_searchs/SearchCifarResNet_width.py  |   3 +
 lib/models/shape_searchs/SoftSelect.py        |   3 +
 lib/models/shape_searchs/__init__.py          |   3 +
 lib/models/shape_searchs/test.py              |   3 +
 lib/nas_infer_model/__init__.py               |   3 +
 lib/nas_infer_model/operations.py             |   3 +
 lib/procedures/__init__.py                    |   3 +
 lib/procedures/basic_main.py                  |   3 +
 lib/procedures/optimizers.py                  |   3 +
 lib/procedures/search_main.py                 |   3 +
 lib/procedures/search_main_v2.py              |   3 +
 lib/procedures/simple_KD_main.py              |   3 +
 lib/procedures/starts.py                      |   9 +-
 others/GDAS/LICENSE                           |  21 ++
 others/GDAS/README.md                         |  73 ++++++
 others/GDAS/configs/NAS-PTB-BASE.config       |  27 ++
 others/GDAS/configs/NAS-WT2-BASE.config       |  27 ++
 others/GDAS/configs/cos1800.config            |   8 +
 others/GDAS/configs/cos600.config             |   8 +
 others/GDAS/configs/nas-cifar-cos-cut.config  |  14 +
 .../GDAS/configs/nas-cifar-cos-cutB128.config |  14 +
 .../GDAS/configs/nas-cifar-cos-cutB64.config  |  14 +
 .../GDAS/configs/nas-cifar-cos-cutB96.config  |  14 +
 .../GDAS/configs/nas-cifar-cos-cutW1.config   |  14 +
 .../GDAS/configs/nas-cifar-cos-cutW3.config   |  14 +
 .../GDAS/configs/nas-cifar-cos-cutW5.config   |  14 +
 .../GDAS/configs/nas-cifar-cos-nocut.config   |  14 +
 others/GDAS/configs/nas-imagenet-B128.config  |  15 ++
 others/GDAS/configs/nas-imagenet-B256.config  |  15 ++
 others/GDAS/configs/nas-imagenet.config       |  15 ++
 others/GDAS/configs/pyramidC10.config         |  10 +
 others/GDAS/configs/pyramidC100.config        |  10 +
 others/GDAS/configs/resnet165.config          |  10 +
 others/GDAS/configs/resnet200.config          |  10 +
 others/GDAS/exps-cnn/cvpr-vis.py              |  97 +++++++
 others/GDAS/exps-cnn/evaluate.py              |  53 ++++
 others/GDAS/exps-cnn/train_base.py            |  89 +++++++
 others/GDAS/exps-cnn/train_utils.py           | 169 ++++++++++++
 others/GDAS/exps-cnn/train_utils_imagenet.py  | 192 ++++++++++++++
 others/GDAS/exps-cnn/vis-arch.py              |  69 +++++
 others/GDAS/exps-rnn/train_rnn_base.py        |  76 ++++++
 others/GDAS/exps-rnn/train_rnn_utils.py       | 221 ++++++++++++++++
 others/GDAS/lib/datasets/LanguageDataset.py   | 122 +++++++++
 others/GDAS/lib/datasets/MetaBatchSampler.py  |  65 +++++
 others/GDAS/lib/datasets/TieredImageNet.py    |  84 ++++++
 others/GDAS/lib/datasets/__init__.py          |   7 +
 .../datasets/get_dataset_with_transform.py    |  77 ++++++
 others/GDAS/lib/datasets/test_NLP.py          |  10 +
 others/GDAS/lib/datasets/test_dataset.py      |  33 +++
 others/GDAS/lib/nas/CifarNet.py               |  89 +++++++
 others/GDAS/lib/nas/ImageNet.py               | 104 ++++++++
 others/GDAS/lib/nas/SE_Module.py              |  27 ++
 others/GDAS/lib/nas/__init__.py               |  10 +
 others/GDAS/lib/nas/construct_utils.py        | 152 +++++++++++
 others/GDAS/lib/nas/genotypes.py              | 245 ++++++++++++++++++
 others/GDAS/lib/nas/head_utils.py             |  19 ++
 others/GDAS/lib/nas/operations.py             | 122 +++++++++
 others/GDAS/lib/nas_rnn/__init__.py           |   9 +
 others/GDAS/lib/nas_rnn/basemodel.py          | 181 +++++++++++++
 others/GDAS/lib/nas_rnn/genotypes.py          |  55 ++++
 others/GDAS/lib/nas_rnn/model_search.py       | 104 ++++++++
 others/GDAS/lib/nas_rnn/utils.py              |  66 +++++
 others/GDAS/lib/scheduler/__init__.py         |   5 +
 others/GDAS/lib/scheduler/scheduler.py        |  32 +++
 others/GDAS/lib/scheduler/utils.py            |  42 +++
 others/GDAS/lib/utils/__init__.py             |  16 ++
 others/GDAS/lib/utils/draw_pts.py             |  41 +++
 others/GDAS/lib/utils/evaluation_utils.py     |  16 ++
 others/GDAS/lib/utils/flop_benchmark.py       | 116 +++++++++
 others/GDAS/lib/utils/gpu_manager.py          |  70 +++++
 others/GDAS/lib/utils/model_utils.py          |  35 +++
 others/GDAS/lib/utils/save_meta.py            |  53 ++++
 others/GDAS/lib/utils/utils.py                | 140 ++++++++++
 others/GDAS/paddlepaddle/.gitignore           |   3 +
 others/GDAS/paddlepaddle/README.md            | 119 +++++++++
 .../GDAS/paddlepaddle/lib/models/__init__.py  |   3 +
 .../GDAS/paddlepaddle/lib/models/genotypes.py | 175 +++++++++++++
 .../GDAS/paddlepaddle/lib/models/nas_net.py   |  79 ++++++
 .../paddlepaddle/lib/models/operations.py     |  91 +++++++
 others/GDAS/paddlepaddle/lib/models/resnet.py |  65 +++++
 .../GDAS/paddlepaddle/lib/utils/__init__.py   |   6 +
 .../GDAS/paddlepaddle/lib/utils/data_utils.py |  64 +++++
 others/GDAS/paddlepaddle/lib/utils/meter.py   |  23 ++
 .../GDAS/paddlepaddle/lib/utils/time_utils.py |  46 ++++
 .../GDAS/paddlepaddle/scripts/base-train.sh   |  31 +++
 others/GDAS/paddlepaddle/scripts/train-nas.sh |  31 +++
 others/GDAS/paddlepaddle/train_cifar.py       | 189 ++++++++++++++
 others/GDAS/scripts-cluster/README.md         |  14 +
 others/GDAS/scripts-cluster/job-script.sh     |  36 +++
 others/GDAS/scripts-cluster/submit.sh         |  52 ++++
 others/GDAS/scripts-cnn/train-cifar.sh        |  38 +++
 others/GDAS/scripts-cnn/train-imagenet.sh     |  73 ++++++
 others/GDAS/scripts-rnn/train-PTB.sh          |  25 ++
 others/GDAS/scripts-rnn/train-WT2.sh          |  25 ++
 others/paddlepaddle/.gitignore                |   3 +
 others/paddlepaddle/README.md                 | 118 +++++++++
 others/paddlepaddle/lib/models/__init__.py    |   3 +
 others/paddlepaddle/lib/models/genotypes.py   | 175 +++++++++++++
 others/paddlepaddle/lib/models/nas_net.py     |  79 ++++++
 others/paddlepaddle/lib/models/operations.py  |  91 +++++++
 others/paddlepaddle/lib/models/resnet.py      |  65 +++++
 others/paddlepaddle/lib/utils/__init__.py     |   6 +
 others/paddlepaddle/lib/utils/data_utils.py   |  64 +++++
 others/paddlepaddle/lib/utils/meter.py        |  26 ++
 others/paddlepaddle/lib/utils/time_utils.py   |  52 ++++
 others/paddlepaddle/scripts/base-train.sh     |  31 +++
 others/paddlepaddle/scripts/train-nas.sh      |  31 +++
 others/paddlepaddle/train_cifar.py            | 189 ++++++++++++++
 139 files changed, 5863 insertions(+), 368 deletions(-)
 delete mode 100644 .latent-data/BASELINE.md
 delete mode 100644 AA-NAS-Bench.md
 create mode 100644 LICENSE.md
 create mode 100644 README.md
 delete mode 100644 lib/models/cell_searchs/cells.py
 create mode 100644 others/GDAS/LICENSE
 create mode 100644 others/GDAS/README.md
 create mode 100644 others/GDAS/configs/NAS-PTB-BASE.config
 create mode 100644 others/GDAS/configs/NAS-WT2-BASE.config
 create mode 100644 others/GDAS/configs/cos1800.config
 create mode 100644 others/GDAS/configs/cos600.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cut.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutB128.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutB64.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutB96.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutW1.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutW3.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-cutW5.config
 create mode 100644 others/GDAS/configs/nas-cifar-cos-nocut.config
 create mode 100644 others/GDAS/configs/nas-imagenet-B128.config
 create mode 100644 others/GDAS/configs/nas-imagenet-B256.config
 create mode 100644 others/GDAS/configs/nas-imagenet.config
 create mode 100644 others/GDAS/configs/pyramidC10.config
 create mode 100644 others/GDAS/configs/pyramidC100.config
 create mode 100644 others/GDAS/configs/resnet165.config
 create mode 100644 others/GDAS/configs/resnet200.config
 create mode 100644 others/GDAS/exps-cnn/cvpr-vis.py
 create mode 100644 others/GDAS/exps-cnn/evaluate.py
 create mode 100644 others/GDAS/exps-cnn/train_base.py
 create mode 100644 others/GDAS/exps-cnn/train_utils.py
 create mode 100644 others/GDAS/exps-cnn/train_utils_imagenet.py
 create mode 100644 others/GDAS/exps-cnn/vis-arch.py
 create mode 100644 others/GDAS/exps-rnn/train_rnn_base.py
 create mode 100644 others/GDAS/exps-rnn/train_rnn_utils.py
 create mode 100644 others/GDAS/lib/datasets/LanguageDataset.py
 create mode 100644 others/GDAS/lib/datasets/MetaBatchSampler.py
 create mode 100644 others/GDAS/lib/datasets/TieredImageNet.py
 create mode 100644 others/GDAS/lib/datasets/__init__.py
 create mode 100644 others/GDAS/lib/datasets/get_dataset_with_transform.py
 create mode 100644 others/GDAS/lib/datasets/test_NLP.py
 create mode 100644 others/GDAS/lib/datasets/test_dataset.py
 create mode 100644 others/GDAS/lib/nas/CifarNet.py
 create mode 100644 others/GDAS/lib/nas/ImageNet.py
 create mode 100644 others/GDAS/lib/nas/SE_Module.py
 create mode 100644 others/GDAS/lib/nas/__init__.py
 create mode 100644 others/GDAS/lib/nas/construct_utils.py
 create mode 100644 others/GDAS/lib/nas/genotypes.py
 create mode 100644 others/GDAS/lib/nas/head_utils.py
 create mode 100644 others/GDAS/lib/nas/operations.py
 create mode 100644 others/GDAS/lib/nas_rnn/__init__.py
 create mode 100644 others/GDAS/lib/nas_rnn/basemodel.py
 create mode 100644 others/GDAS/lib/nas_rnn/genotypes.py
 create mode 100644 others/GDAS/lib/nas_rnn/model_search.py
 create mode 100644 others/GDAS/lib/nas_rnn/utils.py
 create mode 100644 others/GDAS/lib/scheduler/__init__.py
 create mode 100644 others/GDAS/lib/scheduler/scheduler.py
 create mode 100644 others/GDAS/lib/scheduler/utils.py
 create mode 100644 others/GDAS/lib/utils/__init__.py
 create mode 100644 others/GDAS/lib/utils/draw_pts.py
 create mode 100644 others/GDAS/lib/utils/evaluation_utils.py
 create mode 100644 others/GDAS/lib/utils/flop_benchmark.py
 create mode 100644 others/GDAS/lib/utils/gpu_manager.py
 create mode 100644 others/GDAS/lib/utils/model_utils.py
 create mode 100644 others/GDAS/lib/utils/save_meta.py
 create mode 100644 others/GDAS/lib/utils/utils.py
 create mode 100644 others/GDAS/paddlepaddle/.gitignore
 create mode 100644 others/GDAS/paddlepaddle/README.md
 create mode 100644 others/GDAS/paddlepaddle/lib/models/__init__.py
 create mode 100644 others/GDAS/paddlepaddle/lib/models/genotypes.py
 create mode 100644 others/GDAS/paddlepaddle/lib/models/nas_net.py
 create mode 100644 others/GDAS/paddlepaddle/lib/models/operations.py
 create mode 100644 others/GDAS/paddlepaddle/lib/models/resnet.py
 create mode 100644 others/GDAS/paddlepaddle/lib/utils/__init__.py
 create mode 100644 others/GDAS/paddlepaddle/lib/utils/data_utils.py
 create mode 100644 others/GDAS/paddlepaddle/lib/utils/meter.py
 create mode 100644 others/GDAS/paddlepaddle/lib/utils/time_utils.py
 create mode 100644 others/GDAS/paddlepaddle/scripts/base-train.sh
 create mode 100644 others/GDAS/paddlepaddle/scripts/train-nas.sh
 create mode 100644 others/GDAS/paddlepaddle/train_cifar.py
 create mode 100644 others/GDAS/scripts-cluster/README.md
 create mode 100644 others/GDAS/scripts-cluster/job-script.sh
 create mode 100644 others/GDAS/scripts-cluster/submit.sh
 create mode 100644 others/GDAS/scripts-cnn/train-cifar.sh
 create mode 100644 others/GDAS/scripts-cnn/train-imagenet.sh
 create mode 100644 others/GDAS/scripts-rnn/train-PTB.sh
 create mode 100644 others/GDAS/scripts-rnn/train-WT2.sh
 create mode 100644 others/paddlepaddle/.gitignore
 create mode 100644 others/paddlepaddle/README.md
 create mode 100644 others/paddlepaddle/lib/models/__init__.py
 create mode 100644 others/paddlepaddle/lib/models/genotypes.py
 create mode 100644 others/paddlepaddle/lib/models/nas_net.py
 create mode 100644 others/paddlepaddle/lib/models/operations.py
 create mode 100644 others/paddlepaddle/lib/models/resnet.py
 create mode 100644 others/paddlepaddle/lib/utils/__init__.py
 create mode 100644 others/paddlepaddle/lib/utils/data_utils.py
 create mode 100644 others/paddlepaddle/lib/utils/meter.py
 create mode 100644 others/paddlepaddle/lib/utils/time_utils.py
 create mode 100644 others/paddlepaddle/scripts/base-train.sh
 create mode 100644 others/paddlepaddle/scripts/train-nas.sh
 create mode 100644 others/paddlepaddle/train_cifar.py

diff --git a/.latent-data/BASELINE.md b/.latent-data/BASELINE.md
deleted file mode 100644
index d496c3a..0000000
--- a/.latent-data/BASELINE.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# Basic Classification Models
-
-## Performance on CIFAR
-
-|        Model       | FLOPs       | Params (M) | Error on CIFAR-10 | Error on CIFAR-100 | Batch-GPU |
-|:------------------:|:-----------:|:----------:|:-----------------:|:------------------:|:---------:|
-| ResNet-08          |  12.50 M    |  0.08      |       12.14       |       40.20        |  256-2    |
-| ResNet-20          |  40.81 M    |  0.27      |       7.26        |       31.38        |  256-2    |
-| ResNet-32          |  69.12 M    |  0.47      |       6.19        |       29.56        |  256-2    |
-| ResNet-56          | 125.75 M    |  0.86      |       5.74        |       26.82        |  256-2    |
-| ResNet-110         | 253.15 M    |  1.73      |       5.14        |       25.18        |  256-2    |
-| ResNet-110         | 253.15 M    |  1.73      |       5.06        |       25.49        |  256-1    |
-| ResNet-164         | 247.65 M    |  1.70      |       4.36        |       21.48        |  256-2    |
-| ResNet-1001        | 1491.00 M   |  10.33     |       5.34        |       22.50        |  256-2    |
-| DenseNet-BC100-12  | 287.93 M    |  0.77      |       4.68        |       22.76        |  256-2    |
-| DenseNet-BC100-12  | 287.93 M    |  0.77      |       4.25        |       21.54        |  128-2    |
-| DenseNet-BC100-12  | 287.93 M    |  0.77      |       5.51        |       24.67        |   64-1    |
-| WRN-28-10          | 5243.33 M   |  36.48     |       3.61        |       19.65        |  256-2    |
-
-```
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 ResNet20  E300 L1 256 -1"
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 ResNet56  E300 L1 256 -1"
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 ResNet110 E300 L1 256 -1"
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 ResNet164 E300 L1 256 -1"
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 DenseBC100-12 E300 L1 256 -1"
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/base-train.sh cifar10 WRN28-10  E300 L1 256 -1"
-CUDA_VISIBLE_DEVICES=0,1 python ./exps/basic-eval.py --data_path ${TORCH_HOME}/ILSVRC2012 --checkpoint
-CUDA_VISIBLE_DEVICES=0,1 python ./exps/test-official-CNN.py --data_path ${TORCH_HOME}/ILSVRC2012
-python ./scripts-cluster/submit.py yq01-v100-box-2-8 TEST-CIFAR10-1001 2 "bash ./scripts/base-train.sh cifar10 ResNet1001 E300 L1 256 1021"
-```
-
-Train some NAS models:
-```
-CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10  SETN 96 -1
-CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 SETN 96 -1
-CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k SETN  256 -1
-CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k SETN1 256 -1
-CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k DARTS 256 -1
-CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k GDAS_V1 256 -1
-```
-
-## Performance on ImageNet
-
-|      Model     | FLOPs (GB) | Params (M) | Top-1 Error | Top-5 Error |  Optimizer |
-|:--------------:|:----------:|:----------:|:-----------:|:-----------:|:----------:|
-| ResNet-18      | 1.814      |  11.69     |   30.24     |   10.92     | Official   |
-| ResNet-18      | 1.814      |  11.69     |   29.97     |   10.43     | Step-120   |
-| ResNet-18      | 1.814      |  11.69     |   29.35     |   10.13     | Cosine-120 |
-| ResNet-18      | 1.814      |  11.69     |   29.45     |   10.25     | Cosine-120 B1024 |
-| ResNet-18      | 1.814      |  11.69     |   29.44     |   10.12     |Cosine-S-120|
-| ResNet-18 (DS) | 2.053      |  11.71     |   28.53     |   9.69      |Cosine-S-120|
-| ResNet-34      | 3.663      |  21.80     |   25.65     |   8.06      |Cosine-120  |
-| ResNet-34 (DS) | 3.903      |  21.82     |   25.05     |   7.67      |Cosine-S-120|
-| ResNet-50      | 4.089      |  25.56     |   23.85     |   7.13      | Official   |
-| ResNet-50      | 4.089      |  25.56     |   22.54     |   6.45      |Cosine-120  |
-| ResNet-50      | 4.089      |  25.56     |   22.71     |   6.38      |Cosine-120 B1024 |
-| ResNet-50      | 4.089      |  25.56     |   22.34     |   6.22      |Cosine-S-120|
-| ResNet-50 (DS) | 4.328      |  25.58     |   22.67     |   6.39      | Step-120   |
-| ResNet-50 (DS) | 4.328      |  25.58     |   21.94     |   6.23      | Cosine-120 |
-| ResNet-50 (DS) | 4.328      |  25.58     |   21.71     |   5.99      |Cosine-S-120|
-| ResNet-101     | 7.801      |  44.55     |   20.93     |   5.57      |Cosine-120  |
-| ResNet-101     | 7.801      |  44.55     |   20.92     |   5.58      |Cosine-120 B1024 |
-| ResNet-101 (DS)| 8.041      |  44.57     |   20.36     |   5.22      |Cosine-S-120|
-| ResNet-152     | 11.514     |  60.19     |   20.10     |   5.17      |Cosine-120 B1024 |
-| ResNet-152 (DS)| 11.753     |  60.21     |   19.83     |   5.02      |Cosine-S-120|
-| ResNet-200     | 15.007     |  64.67     |   20.06     |   4.98      |Cosine-S-120|
-| Next50-32x4d (DS)| 4.2      |  25.0      |   22.2      |     -       | Official   |
-| Next50-32x4d (DS)| 4.470    |  25.05     |   21.16     |   5.65      |Cosine-S-120|
-| MobileNet-V2   | 0.300      |  3.40      |   28.0      |     -       | Official   |
-| MobileNet-V2   | 0.300      |  3.50      |   27.92     |   9.50      | MobileFast |
-| MobileNet-V2   | 0.300      |  3.50      |   27.56     |   9.26      | MobileFast-Smooth |
-| ShuffleNet-V2 1.0| 0.146    |  2.28      |   30.6      |   11.1      | Official   |
-| ShuffleNet-V2 1.0| 0.145    |  2.28      |             |             |Cosine-S-120|
-| ShuffleNet-V2 1.5| 0.299    |            |   27.4      |     -       | Official   |
-| ShuffleNet-V2 1.5|          |            |             |             |Cosine-S-120|
-| ShuffleNet-V2 2.0|          |            |             |             |Cosine-S-120|
-
-`DS` indicates deep-stem for the first convolutional layer.
-```
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet18V1 Step-Soft 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet18V1  Cos-Soft 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet18V1  Cos-Soft 1024 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet18V1  Cos-Smooth 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet18V2  Cos-Smooth 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet34V2  Cos-Smooth 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet50V1 Cos-Soft 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet50V2 Step-Soft 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet50V2 Cos-Soft 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNet101V2 Cos-Smooth 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ResNext50-32x4dV2 Cos-Smooth 256 -1"
-```
-
-Train efficient models may require different hyper-parameters.
-```
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh MobileNetV2-X MobileFast 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh MobileNetV2-X MobileFastS 256 -1"
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh MobileNetV2   Mobile     256 -1" (70.96 top-1, 90.05 top-5)
-bash ./scripts-cluster/local.sh 0,1,2,3 "bash ./scripts/base-imagenet.sh ShuffleNetV2-X Shuffle  1024 -1"
-```
-
-# Train with Knowledge Distillation
-
-ResNet110 -> ResNet20
-```
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/KD-train.sh cifar10 ResNet20 ResNet110 0.9 4 -1"
-```
-
-ResNet110 -> ResNet110
-```
-bash ./scripts-cluster/local.sh 0,1 "bash ./scripts/KD-train.sh cifar10 ResNet110 ResNet110 0.9 4 -1"
-```
-
-Set alpha=0.9 and temperature=4 following `Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer, ICLR 2017`.
-
-# Linux
-The following command will redirect the output of top command to `top.txt`.
-```
-top -b -n 1 > top.txt
-```
-
-## Download the ImageNet dataset
-The ImageNet Large Scale Visual Recognition Challenge (ILSVRC) dataset has 1000 categories and 1.2 million images. The images do not need to be preprocessed or packaged in any database, but the validation images need to be moved into appropriate subfolders.
-
-1. Download the images from http://image-net.org/download-images
-
-2. Extract the training data:
-  ```bash
-  mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
-  tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
-  find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
-  cd ..
-  ```
-
-3. Extract the validation data and move images to subfolders:
-  ```bash
-  mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xvf ILSVRC2012_img_val.tar
-  wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
-  ```
diff --git a/AA-NAS-Bench.md b/AA-NAS-Bench.md
deleted file mode 100644
index d5811cb..0000000
--- a/AA-NAS-Bench.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# An Algorithm-Agnostic NAS Benchmark (AA-NAS-Bench)
-
-We propose an Algorithm-Agnostic NAS Benchmark (AA-NAS-Bench) with a fixed search space, which provides a unified benchmark for almost any up-to-date NAS algorithms.
-The design of our search space is inspired from that used in the most popular cell-based searching algorithms, where a cell is represented as a directed acyclic graph. Each edge here is associated with an operation selected from a predefined operation set. For it to be applicable for all NAS algorithms, the search space defined in AA-NAS-Bench includes 4 nodes and 5 associated operation options, which generates 15,625 neural cell candidates in total.
-
-In this Markdown file, we provide:
-- Detailed instruction to reproduce AA-NAS-Bench.
-- 10 NAS algorithms evaluated in our paper.
-
-Note: please use `PyTorch >= 1.1.0` and `Python >= 3.6.0`.
-
-## How to Use AA-NAS-Bench
-
-1. Creating AA-NAS-Bench API from a file:
-```
-from aa_nas_api import AANASBenchAPI
-api = AANASBenchAPI('$path_to_meta_aa_nas_bench_file')
-api = AANASBenchAPI('AA-NAS-Bench-v1_0.pth')
-```
-
-2. Show the number of architectures `len(api)` and each architecture `api[i]`:
-```
-num = len(api)
-for i, arch_str in enumerate(api):
-  print ('{:5d}/{:5d} : {:}'.format(i, len(api), arch_str))
-```
-
-3. Show the results of all trials for a single architecture:
-```
-# show all information for a specific architecture
-api.show(1)
-api.show(2)
-
-# show the mean loss and accuracy of an architecture
-info = api.query_meta_info_by_index(1)
-loss, accuracy = info.get_metrics('cifar10', 'train')
-flops, params, latency = info.get_comput_costs('cifar100')
-
-# get the detailed information
-results = api.query_by_index(1, 'cifar100')
-print ('There are {:} trials for this architecture [{:}] on cifar100'.format(len(results), api[1]))
-print ('Latency : {:}'.format(results[0].get_latency()))
-print ('Train Info : {:}'.format(results[0].get_train()))
-print ('Valid Info : {:}'.format(results[0].get_eval('x-valid')))
-print ('Test  Info : {:}'.format(results[0].get_eval('x-test')))
-# for the metric after a specific epoch
-print ('Train Info [10-th epoch] : {:}'.format(results[0].get_train(10)))
-```
-
-4. Query the index of an architecture by string
-```
-index = api.query_index_by_arch('|nor_conv_3x3~0|+|nor_conv_3x3~0|avg_pool_3x3~1|+|skip_connect~0|nor_conv_3x3~1|skip_connect~2|')
-api.show(index)
-```
-
-5. For other usages, please see `lib/aa_nas_api/api.py`
-
-## Instruction to Generate AA-NAS-Bench
-
-1. generate the meta file for AA-NAS-Bench using the following script, where `AA-NAS-BENCH` indicates the name and `4` indicates the maximum number of nodes in a cell.
-```
-bash scripts-search/AA-NAS-meta-gen.sh AA-NAS-BENCH 4
-```
-
-2. train earch architecture on a single GPU (see commands in `output/AA-NAS-BENCH-4/meta-node-4.opt-script.txt` which is automatically generated by step-1).
-```
-CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/AA-NAS-train-archs.sh     0   389 -1 '777 888 999'
-```
-This command will train 390 architectures (id from 0 to 389) using the following four kinds of splits with three random seeds (777, 888, 999).
-
-|     Dataset     |     Train     | Eval  |
-|:---------------:|:-------------:|:-----:|
-| CIFAR-10        | train         | valid |
-| CIFAR-10        | train + valid | test  |
-| CIFAR-100       | train         | valid+test |
-| ImageNet-16-120 | train         | valid+test |
-
-3. calculate the latency, merge the results of all architectures, and simplify the results.
-(see commands in `output/AA-NAS-BENCH-4/meta-node-4.cal-script.txt` which is automatically generated by step-1).
-```
-OMP_NUM_THREADS=6 CUDA_VISIBLE_DEVICES=0 python exps/AA-NAS-statistics.py --mode cal --target_dir 000000-000389-C16-N5
-```
-
-4. merge all results into a single file for AA-NAS-Bench-API.
-```
-OMP_NUM_THREADS=4 python exps/AA-NAS-statistics.py --mode merge
-```
-
-[option] train a single architecture on a single GPU.
-```
-CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/AA-NAS-train-net.sh resnet 16 5
-CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/AA-NAS-train-net.sh '|nor_conv_3x3~0|+|nor_conv_3x3~0|nor_conv_3x3~1|+|skip_connect~0|skip_connect~1|skip_connect~2|' 16 5
-```
-
-## To Reproduce 10 Baseline NAS Algorithms in AA-NAS-Bench
-
-We have tried our best to implement each method. However, still, some algorithms might obtain non-optimal results since their hyper-parameters might not fit our AA-NAS-Bench.
-If researchers can provide better results with different hyper-parameters, we are happy to update results according to the new experimental results. We also welcome more NAS algorithms to test on our dataset and would include them accordingly.
-
--[1] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/DARTS-V1.sh cifar10 -1`
--[2] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/DARTS-V2.sh cifar10 -1`
--[3] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/GDAS.sh     cifar10 -1`
--[4] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/SETN.sh     cifar10 -1`
--[5] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/ENAS.sh     cifar10 -1`
--[6] `CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/RANDOM-NAS.sh cifar10 -1`
--[7] `bash ./scripts-search/algos/R-EA.sh -1`
--[8] `bash ./scripts-search/algos/Random.sh -1`
--[9] `bash ./scripts-search/algos/REINFORCE.sh -1`
--[10] `bash ./scripts-search/algos/BOHB.sh -1`
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..c813fbb
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Xuanyi Dong [GitHub: https://github.com/D-X-Y]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4fd3785
--- /dev/null
+++ b/README.md
@@ -0,0 +1,136 @@
+# Nueral Architecture Search (NAS)
+
+This project contains the following neural architecture search algorithms, implemented in [PyTorch](http://pytorch.org). More NAS resources can be found in [Awesome-NAS](https://github.com/D-X-Y/Awesome-NAS).
+
+- Network Pruning via Transformable Architecture Search, NeurIPS 2019
+- One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019
+- Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019
+- several typical classification models, e.g., ResNet and DenseNet (see BASELINE.md)
+
+
+## Requirements and Preparation
+
+Please install `PyTorch>=1.1.0`, `Python>=3.6`, and `opencv`.
+
+The CIFAR and ImageNet should be downloaded and extracted into `$TORCH_HOME`.
+Some methods use knowledge distillation (KD), which require pre-trained models. Please download these models from [Google Driver](https://drive.google.com/open?id=1ANmiYEGX-IQZTfH8w0aSpj-Wypg-0DR-) (or train by yourself) and save into `.latent-data`.
+
+### usefull tools
+1. Compute the number of parameters and FLOPs of a model:
+```
+from utils import get_model_infos
+flop, param  = get_model_infos(net, (1,3,32,32))
+```
+
+
+## [Network Pruning via Transformable Architecture Search](https://arxiv.org/abs/1905.09717)
+In this paper, we proposed a differentiable searching strategy for transformable architectures, i.e., searching for the depth and width of a deep neural network.
+You could see the highlight of our Transformable Architecture Search (TAS) at our [project page](https://xuanyidong.com/assets/projects/NeurIPS-2019-TAS.html).
+
+<p float="left">
+<img src="https://d-x-y.github.com/resources/paper-icon/NIPS-2019-TAS.png" width="680px"/>
+<img src="https://d-x-y.github.com/resources/videos/NeurIPS-2019-TAS/TAS-arch.gif?raw=true" width="180px"/>
+</p>
+
+
+### Usage
+
+Use `bash ./scripts/prepare.sh` to prepare data splits for `CIFAR-10`, `CIFARR-100`, and `ILSVRC2012`.
+If you do not have `ILSVRC2012` data, pleasee comment L12 in `./scripts/prepare.sh`.
+
+Search the depth configuration of ResNet:
+```
+CUDA_VISIBLE_DEVICES=0,1 bash ./scripts-search/search-depth-gumbel.sh cifar10 ResNet110 CIFARX 0.57 -1
+```
+
+Search the width configuration of ResNet:
+```
+CUDA_VISIBLE_DEVICES=0,1 bash ./scripts-search/search-width-gumbel.sh cifar10 ResNet110 CIFARX 0.57 -1
+```
+
+Search for both depth and width configuration of ResNet:
+```
+CUDA_VISIBLE_DEVICES=0,1 bash ./scripts-search/search-cifar.sh cifar10 ResNet56  CIFARX 0.47 -1
+```
+
+args: `cifar10` indicates the dataset name, `ResNet56` indicates the basemodel name, `CIFARX` indicates the searching hyper-parameters, `0.47/0.57` indicates the expected FLOP ratio, `-1` indicates the random seed.
+
+
+## [One-Shot Neural Architecture Search via Self-Evaluated Template Network](https://arxiv.org/abs/1910.05733)
+
+<img align="right" src="https://d-x-y.github.com/resources/paper-icon/ICCV-2019-SETN.png" width="450">
+
+<strong>Highlight</strong>: we equip one-shot NAS with an architecture sampler and train network weights using uniformly sampling.
+
+
+### Usage
+
+Please use the following scripts to train the searched SETN-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10  SETN 96 -1
+CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 SETN 96 -1
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k SETN  256 -1
+```
+
+The searching codes of SETN on a small search space:
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/SETN.sh cifar10 -1
+```
+
+
+## [Searching for A Robust Neural Architecture in Four GPU Hours](https://arxiv.org/abs/1910.04465)
+
+
+<img align="right" src="https://d-x-y.github.com/resources/paper-icon/CVPR-2019-GDAS.png" width="300">
+
+We proposed a Gradient-based searching algorithm using Differentiable Architecture Sampling (GDAS). GDAS is baseed on DARTS and improves it with Gumbel-softmax sampling.
+Experiments on CIFAR-10, CIFAR-100, ImageNet, PTB, and WT2 are reported.
+
+The old version is located at [`others/GDAS`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS) and a paddlepaddle implementation is locate at [`others/paddlepaddle`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/paddlepaddle).
+
+
+### Usage
+
+Please use the following scripts to train the searched GDAS-searched CNN on CIFAR-10, CIFAR-100, and ImageNet.
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar10  GDAS_V1 96 -1
+CUDA_VISIBLE_DEVICES=0 bash ./scripts/nas-infer-train.sh cifar100 GDAS_V1 96 -1
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts/nas-infer-train.sh imagenet-1k GDAS_V1 256 -1
+```
+
+The GDAS searching codes on a small search space:
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/GDAS.sh cifar10 -1
+```
+
+The baseline searching codes are DARTS:
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/DARTS-V1.sh cifar10 -1
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-search/algos/DARTS-V2.sh cifar10 -1
+```
+
+
+# Citation
+
+If you find that this project helps your research, please consider citing some of the following papers:
+```
+@inproceedings{dong2019tas,
+  title     = {Network Pruning via Transformable Architecture Search},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Neural Information Processing Systems (NeurIPS)},
+  year      = {2019}
+}
+@inproceedings{dong2019one,
+  title     = {One-Shot Neural Architecture Search via Self-Evaluated Template Network},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
+  year      = {2019}
+}
+@inproceedings{dong2019search,
+  title     = {Searching for A Robust Neural Architecture in Four GPU Hours},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages     = {1761--1770},
+  year      = {2019}
+}
+```
diff --git a/lib/aa_nas_api/__init__.py b/lib/aa_nas_api/__init__.py
index 525f097..594669b 100644
--- a/lib/aa_nas_api/__init__.py
+++ b/lib/aa_nas_api/__init__.py
@@ -1,2 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .api import AANASBenchAPI
 from .api import ArchResults, ResultsCount
diff --git a/lib/aa_nas_api/api.py b/lib/aa_nas_api/api.py
index 01c4b84..3e4e351 100644
--- a/lib/aa_nas_api/api.py
+++ b/lib/aa_nas_api/api.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, copy, torch, numpy as np
 from collections import OrderedDict
 
diff --git a/lib/config_utils/__init__.py b/lib/config_utils/__init__.py
index 66f6355..dd91409 100644
--- a/lib/config_utils/__init__.py
+++ b/lib/config_utils/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .configure_utils    import load_config, dict2config, configure2str
 from .basic_args         import obtain_basic_args
 from .attention_args     import obtain_attention_args
diff --git a/lib/config_utils/basic_args.py b/lib/config_utils/basic_args.py
index 3146371..dc6d78c 100644
--- a/lib/config_utils/basic_args.py
+++ b/lib/config_utils/basic_args.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, random, argparse
 from .share_args import add_shared_args
 
diff --git a/lib/datasets/DownsampledImageNet.py b/lib/datasets/DownsampledImageNet.py
index 1eed66a..970336e 100644
--- a/lib/datasets/DownsampledImageNet.py
+++ b/lib/datasets/DownsampledImageNet.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, hashlib, torch
 import numpy as np
 from PIL import Image
diff --git a/lib/datasets/SearchDatasetWrap.py b/lib/datasets/SearchDatasetWrap.py
index f7925b3..d20dd1f 100644
--- a/lib/datasets/SearchDatasetWrap.py
+++ b/lib/datasets/SearchDatasetWrap.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch, copy, random
 import torch.utils.data as data
 
diff --git a/lib/datasets/__init__.py b/lib/datasets/__init__.py
index 20d3a4c..6000628 100644
--- a/lib/datasets/__init__.py
+++ b/lib/datasets/__init__.py
@@ -1,2 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .get_dataset_with_transform import get_datasets
 from .SearchDatasetWrap import SearchDataset
diff --git a/lib/datasets/get_dataset_with_transform.py b/lib/datasets/get_dataset_with_transform.py
index f437fe3..416bcde 100644
--- a/lib/datasets/get_dataset_with_transform.py
+++ b/lib/datasets/get_dataset_with_transform.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, torch
 import os.path as osp
 import numpy as np
diff --git a/lib/datasets/test_utils.py b/lib/datasets/test_utils.py
index 7ab3b41..ca30512 100644
--- a/lib/datasets/test_utils.py
+++ b/lib/datasets/test_utils.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 def test_imagenet_data(imagenet):
   total_length = len(imagenet)
   assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
diff --git a/lib/log_utils/__init__.py b/lib/log_utils/__init__.py
index c3f2438..0c8858a 100644
--- a/lib/log_utils/__init__.py
+++ b/lib/log_utils/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .logger       import Logger
 from .print_logger import PrintLogger
 from .meter        import AverageMeter
diff --git a/lib/models/SharedUtils.py b/lib/models/SharedUtils.py
index 191031e..0536ebf 100644
--- a/lib/models/SharedUtils.py
+++ b/lib/models/SharedUtils.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 import torch.nn as nn
 
diff --git a/lib/models/__init__.py b/lib/models/__init__.py
index 1aca1d0..7f14ec1 100644
--- a/lib/models/__init__.py
+++ b/lib/models/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 from os import path as osp
 
diff --git a/lib/models/cell_infers/__init__.py b/lib/models/cell_infers/__init__.py
index b27b4fc..63bd95b 100644
--- a/lib/models/cell_infers/__init__.py
+++ b/lib/models/cell_infers/__init__.py
@@ -1 +1,4 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .tiny_network import TinyNetwork
diff --git a/lib/models/cell_operations.py b/lib/models/cell_operations.py
index 408289b..4e28f56 100644
--- a/lib/models/cell_operations.py
+++ b/lib/models/cell_operations.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 import torch.nn as nn
 
diff --git a/lib/models/cell_searchs/__init__.py b/lib/models/cell_searchs/__init__.py
index a4b3e35..2133795 100644
--- a/lib/models/cell_searchs/__init__.py
+++ b/lib/models/cell_searchs/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .search_model_darts_v1 import TinyNetworkDartsV1
 from .search_model_darts_v2 import TinyNetworkDartsV2
 from .search_model_gdas     import TinyNetworkGDAS
diff --git a/lib/models/cell_searchs/_test_module.py b/lib/models/cell_searchs/_test_module.py
index 7261cd4..c603ba6 100644
--- a/lib/models/cell_searchs/_test_module.py
+++ b/lib/models/cell_searchs/_test_module.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 from search_model_enas_utils import Controller
 
diff --git a/lib/models/cell_searchs/cells.py b/lib/models/cell_searchs/cells.py
deleted file mode 100644
index b1c5a8c..0000000
--- a/lib/models/cell_searchs/cells.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import math, torch
-import torch.nn as nn
-import torch.nn.functional as F
-from copy import deepcopy
-from ..cell_operations import OPS
-
-
-class SearchCell(nn.Module):
-
-  def __init__(self, C_in, C_out, stride, max_nodes, op_names):
-    super(SearchCell, self).__init__()
-
-    self.op_names  = deepcopy(op_names)
-    self.edges     = nn.ModuleDict()
-    self.max_nodes = max_nodes
-    self.in_dim    = C_in
-    self.out_dim   = C_out
-    for i in range(1, max_nodes):
-      for j in range(i):
-        node_str = '{:}<-{:}'.format(i, j)
-        if j == 0:
-          xlists = [OPS[op_name](C_in , C_out, stride) for op_name in op_names]
-        else:
-          xlists = [OPS[op_name](C_in , C_out,      1) for op_name in op_names]
-        self.edges[ node_str ] = nn.ModuleList( xlists )
-    self.edge_keys  = sorted(list(self.edges.keys()))
-    self.edge2index = {key:i for i, key in enumerate(self.edge_keys)}
-    self.num_edges  = len(self.edges)
-
-  def extra_repr(self):
-    string = 'info :: {max_nodes} nodes, inC={in_dim}, outC={out_dim}'.format(**self.__dict__)
-    return string
-
-  def forward(self, inputs, weightss):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      inter_nodes = []
-      for j in range(i):
-        node_str = '{:}<-{:}'.format(i, j)
-        weights  = weightss[ self.edge2index[node_str] ]
-        inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
-
-  # GDAS
-  def forward_acc(self, inputs, weightss, indexess):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      inter_nodes = []
-      for j in range(i):
-        node_str = '{:}<-{:}'.format(i, j)
-        weights  = weightss[ self.edge2index[node_str] ]
-        indexes  = indexess[ self.edge2index[node_str] ].item()
-        import pdb; pdb.set_trace() # to-do
-        #inter_nodes.append( self.edges[node_str][indexes](nodes[j]) * weights[indexes] )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
-
-  # joint
-  def forward_joint(self, inputs, weightss):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      inter_nodes = []
-      for j in range(i):
-        node_str = '{:}<-{:}'.format(i, j)
-        weights  = weightss[ self.edge2index[node_str] ]
-        aggregation = sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) / weights.numel()
-        inter_nodes.append( aggregation )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
-
-  # uniform random sampling per iteration
-  def forward_urs(self, inputs):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      while True: # to avoid select zero for all ops
-        sops, has_non_zero = [], False
-        for j in range(i):
-          node_str   = '{:}<-{:}'.format(i, j)
-          candidates = self.edges[node_str]
-          select_op  = random.choice(candidates)
-          sops.append( select_op )
-          if not hasattr(select_op, 'is_zero') or select_op.is_zero == False: has_non_zero=True
-        if has_non_zero: break
-      inter_nodes = []
-      for j, select_op in enumerate(sops):
-        inter_nodes.append( select_op(nodes[j]) )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
-
-  # select the argmax
-  def forward_select(self, inputs, weightss):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      inter_nodes = []
-      for j in range(i):
-        node_str = '{:}<-{:}'.format(i, j)
-        weights  = weightss[ self.edge2index[node_str] ]
-        inter_nodes.append( self.edges[node_str][ weights.argmax().item() ]( nodes[j] ) )
-        #inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
-
-  # select the argmax
-  def forward_dynamic(self, inputs, structure):
-    nodes = [inputs]
-    for i in range(1, self.max_nodes):
-      cur_op_node = structure.nodes[i-1]
-      inter_nodes = []
-      for op_name, j in cur_op_node:
-        node_str = '{:}<-{:}'.format(i, j)
-        op_index = self.op_names.index( op_name )
-        inter_nodes.append( self.edges[node_str][op_index]( nodes[j] ) )
-      nodes.append( sum(inter_nodes) )
-    return nodes[-1]
diff --git a/lib/models/cell_searchs/genotypes.py b/lib/models/cell_searchs/genotypes.py
index 6bd8af8..e0f2e2e 100644
--- a/lib/models/cell_searchs/genotypes.py
+++ b/lib/models/cell_searchs/genotypes.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from copy import deepcopy
 
 
diff --git a/lib/models/cell_searchs/search_cells.py b/lib/models/cell_searchs/search_cells.py
index 2b43453..49124be 100644
--- a/lib/models/cell_searchs/search_cells.py
+++ b/lib/models/cell_searchs/search_cells.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, random, torch
 import warnings
 import torch.nn as nn
diff --git a/lib/models/cell_searchs/search_model_darts_v1.py b/lib/models/cell_searchs/search_model_darts_v1.py
index 2103ee6..61ef8ea 100644
--- a/lib/models/cell_searchs/search_model_darts_v1.py
+++ b/lib/models/cell_searchs/search_model_darts_v1.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ########################################################
 # DARTS: Differentiable Architecture Search, ICLR 2019 #
 ########################################################
diff --git a/lib/models/cell_searchs/search_model_darts_v2.py b/lib/models/cell_searchs/search_model_darts_v2.py
index 9928bb1..cb996ff 100644
--- a/lib/models/cell_searchs/search_model_darts_v2.py
+++ b/lib/models/cell_searchs/search_model_darts_v2.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ########################################################
 # DARTS: Differentiable Architecture Search, ICLR 2019 #
 ########################################################
diff --git a/lib/models/cell_searchs/search_model_enas.py b/lib/models/cell_searchs/search_model_enas.py
index b08bcb5..2422b52 100644
--- a/lib/models/cell_searchs/search_model_enas.py
+++ b/lib/models/cell_searchs/search_model_enas.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ##########################################################################
 # Efficient Neural Architecture Search via Parameters Sharing, ICML 2018 #
 ##########################################################################
diff --git a/lib/models/cell_searchs/search_model_enas_utils.py b/lib/models/cell_searchs/search_model_enas_utils.py
index c87bcd0..e03f57b 100644
--- a/lib/models/cell_searchs/search_model_enas_utils.py
+++ b/lib/models/cell_searchs/search_model_enas_utils.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ##########################################################################
 # Efficient Neural Architecture Search via Parameters Sharing, ICML 2018 #
 ##########################################################################
diff --git a/lib/models/cell_searchs/search_model_random.py b/lib/models/cell_searchs/search_model_random.py
index 1bd947c..c2f83f9 100644
--- a/lib/models/cell_searchs/search_model_random.py
+++ b/lib/models/cell_searchs/search_model_random.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ##############################################################################
 # Random Search and Reproducibility for Neural Architecture Search, UAI 2019 # 
 ##############################################################################
diff --git a/lib/models/cell_searchs/search_model_setn.py b/lib/models/cell_searchs/search_model_setn.py
index 6d60d55..5864f32 100644
--- a/lib/models/cell_searchs/search_model_setn.py
+++ b/lib/models/cell_searchs/search_model_setn.py
@@ -1,3 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
 ######################################################################################
 # One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019 #
 ######################################################################################
diff --git a/lib/models/shape_searchs/SearchCifarResNet.py b/lib/models/shape_searchs/SearchCifarResNet.py
index 9f06f1d..6271616 100644
--- a/lib/models/shape_searchs/SearchCifarResNet.py
+++ b/lib/models/shape_searchs/SearchCifarResNet.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, torch
 from collections import OrderedDict
 from bisect import bisect_right
diff --git a/lib/models/shape_searchs/SearchCifarResNet_depth.py b/lib/models/shape_searchs/SearchCifarResNet_depth.py
index e393bf5..9395e7d 100644
--- a/lib/models/shape_searchs/SearchCifarResNet_depth.py
+++ b/lib/models/shape_searchs/SearchCifarResNet_depth.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, torch
 from collections import OrderedDict
 from bisect import bisect_right
diff --git a/lib/models/shape_searchs/SearchCifarResNet_width.py b/lib/models/shape_searchs/SearchCifarResNet_width.py
index 2c61acd..2e37e98 100644
--- a/lib/models/shape_searchs/SearchCifarResNet_width.py
+++ b/lib/models/shape_searchs/SearchCifarResNet_width.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, torch
 import torch.nn as nn
 from ..initialization import initialize_resnet
diff --git a/lib/models/shape_searchs/SoftSelect.py b/lib/models/shape_searchs/SoftSelect.py
index b120c8b..802dcb6 100644
--- a/lib/models/shape_searchs/SoftSelect.py
+++ b/lib/models/shape_searchs/SoftSelect.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, torch
 import torch.nn as nn
 
diff --git a/lib/models/shape_searchs/__init__.py b/lib/models/shape_searchs/__init__.py
index cbb11c5..91a58f4 100644
--- a/lib/models/shape_searchs/__init__.py
+++ b/lib/models/shape_searchs/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .SearchCifarResNet_width import SearchWidthCifarResNet
 from .SearchCifarResNet_depth import SearchDepthCifarResNet
 from .SearchCifarResNet       import SearchShapeCifarResNet
diff --git a/lib/models/shape_searchs/test.py b/lib/models/shape_searchs/test.py
index 502623e..4f77f4c 100644
--- a/lib/models/shape_searchs/test.py
+++ b/lib/models/shape_searchs/test.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 import torch.nn as nn
 from SoftSelect import ChannelWiseInter
diff --git a/lib/nas_infer_model/__init__.py b/lib/nas_infer_model/__init__.py
index 5a77cce..aeda5b5 100644
--- a/lib/nas_infer_model/__init__.py
+++ b/lib/nas_infer_model/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 
 def obtain_nas_infer_model(config):
diff --git a/lib/nas_infer_model/operations.py b/lib/nas_infer_model/operations.py
index a328d0e..1c56905 100644
--- a/lib/nas_infer_model/operations.py
+++ b/lib/nas_infer_model/operations.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import torch
 import torch.nn as nn
 
diff --git a/lib/procedures/__init__.py b/lib/procedures/__init__.py
index b3aee8d..7950c66 100644
--- a/lib/procedures/__init__.py
+++ b/lib/procedures/__init__.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 from .starts     import prepare_seed, prepare_logger, get_machine_info, save_checkpoint, copy_checkpoint
 from .optimizers import get_optim_scheduler
 
diff --git a/lib/procedures/basic_main.py b/lib/procedures/basic_main.py
index 33a8ff4..ed8cea4 100644
--- a/lib/procedures/basic_main.py
+++ b/lib/procedures/basic_main.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, torch
 from log_utils import AverageMeter, time_string
 from utils     import obtain_accuracy
diff --git a/lib/procedures/optimizers.py b/lib/procedures/optimizers.py
index 4dd63fc..a3cb84a 100644
--- a/lib/procedures/optimizers.py
+++ b/lib/procedures/optimizers.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import math, torch
 import torch.nn as nn
 from bisect import bisect_right
diff --git a/lib/procedures/search_main.py b/lib/procedures/search_main.py
index d6808ee..48ed44f 100644
--- a/lib/procedures/search_main.py
+++ b/lib/procedures/search_main.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, torch
 from log_utils import AverageMeter, time_string
 from utils     import obtain_accuracy
diff --git a/lib/procedures/search_main_v2.py b/lib/procedures/search_main_v2.py
index cd5d107..46707ef 100644
--- a/lib/procedures/search_main_v2.py
+++ b/lib/procedures/search_main_v2.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, torch
 from log_utils import AverageMeter, time_string
 from utils     import obtain_accuracy
diff --git a/lib/procedures/simple_KD_main.py b/lib/procedures/simple_KD_main.py
index 20e2598..007b51c 100644
--- a/lib/procedures/simple_KD_main.py
+++ b/lib/procedures/simple_KD_main.py
@@ -1,3 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, torch
 import torch.nn.functional as F
 # our modules
diff --git a/lib/procedures/starts.py b/lib/procedures/starts.py
index 2409059..3202b3f 100644
--- a/lib/procedures/starts.py
+++ b/lib/procedures/starts.py
@@ -1,9 +1,6 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
 import os, sys, time, torch, random, PIL, copy, numpy as np
 from os import path as osp
 from shutil  import copyfile
diff --git a/others/GDAS/LICENSE b/others/GDAS/LICENSE
new file mode 100644
index 0000000..89b8259
--- /dev/null
+++ b/others/GDAS/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Xuanyi Dong
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/others/GDAS/README.md b/others/GDAS/README.md
new file mode 100644
index 0000000..3735db8
--- /dev/null
+++ b/others/GDAS/README.md
@@ -0,0 +1,73 @@
+## [Searching for A Robust Neural Architecture in Four GPU Hours](http://xuanyidong.com/publication/gradient-based-diff-sampler/)
+
+We propose A Gradient-based neural architecture search approach using Differentiable Architecture Sampler (GDAS).
+
+<img src="https://github.com/D-X-Y/NAS-Projects/blob/master/others/GDAS/data/GDAS.png" width="520">
+Figure-1. We utilize a DAG to represent the search space of a neural cell. Different operations (colored arrows) transform one node (square) to its intermediate features (little circles). Meanwhile, each node is the sum of the intermediate features transformed from the previous nodes. As indicated by the solid connections, the neural cell in the proposed GDAS is a sampled sub-graph of this DAG. Specifically, among the intermediate features between every two nodes, GDAS samples one feature in a differentiable way.
+
+### Requirements
+- PyTorch 1.0.1
+- Python 3.6
+- opencv
+```
+conda install pytorch torchvision cuda100 -c pytorch
+```
+
+### Usages
+
+Train the searched CNN on CIFAR
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_FG cifar10  cut
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10  cut
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_V1 cifar100 cut
+```
+
+Train the searched CNN on ImageNet
+```
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14 B128 -1
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14 B256 -1
+```
+
+Evaluate a trained CNN model
+```
+CUDA_VISIBLE_DEVICES=0 python ./exps-cnn/evaluate.py --data_path  $TORCH_HOME/cifar.python --checkpoint ${checkpoint-path}
+CUDA_VISIBLE_DEVICES=0 python ./exps-cnn/evaluate.py --data_path  $TORCH_HOME/ILSVRC2012 --checkpoint ${checkpoint-path}
+CUDA_VISIBLE_DEVICES=0 python ./exps-cnn/evaluate.py --data_path  $TORCH_HOME/ILSVRC2012 --checkpoint GDAS-V1-C50-N14-ImageNet.pth
+```
+
+Train the searched RNN
+```
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-PTB.sh DARTS_V1
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-PTB.sh DARTS_V2
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-PTB.sh GDAS
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh DARTS_V1
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh DARTS_V2
+CUDA_VISIBLE_DEVICES=0 bash ./scripts-rnn/train-WT2.sh GDAS
+```
+
+### Training Logs
+You can find some training logs in [`./data/logs/`](https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/logs).
+You can also find some pre-trained models in [Google Driver](https://drive.google.com/open?id=1Ofhc49xC1PLIX4O708gJZ1ugzz4td_RJ).
+
+
+### Experimental Results
+
+<img src="https://github.com/D-X-Y/NAS-Projects/tree/master/others/GDAS/data/imagenet-results.png" width="700">
+
+Figure-2. Top-1 and top-5 errors on ImageNet.
+
+### Correction
+
+The Gumbel-softmax tempurature during searching should decrease from 10 to 0.1.
+
+### Citation
+If you find that this project (GDAS) helps your research, please cite the paper:
+```
+@inproceedings{dong2019search,
+  title={Searching for A Robust Neural Architecture in Four GPU Hours},
+  author={Dong, Xuanyi and Yang, Yi},
+  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages={1761--1770},
+  year={2019}
+}
+```
diff --git a/others/GDAS/configs/NAS-PTB-BASE.config b/others/GDAS/configs/NAS-PTB-BASE.config
new file mode 100644
index 0000000..642fa92
--- /dev/null
+++ b/others/GDAS/configs/NAS-PTB-BASE.config
@@ -0,0 +1,27 @@
+{
+  "data_name" : ["str",   "PTB"],
+  "data_path" : ["str",   "./data/data/penn"],
+  "emsize"    : ["int",   850],
+  "nhid"      : ["int",   850],
+  "nhidlast"  : ["int",   850],
+  "LR"        : ["float", 20],
+  "clip"      : ["float", 0.25],
+  "epochs"    : ["int",   3000],
+ "train_batch": ["int",   64],
+  "eval_batch": ["int",   10],
+  "test_batch": ["int",    1],
+  "bptt"      : ["int",   35],
+
+  "dropout"   : ["float", 0.75],
+  "dropouth"  : ["float", 0.25],
+  "dropoutx"  : ["float", 0.75],
+  "dropouti"  : ["float", 0.2],
+  "dropoute"  : ["float", 0.1],
+
+  "nonmono"   : ["int", 5],
+  "alpha"     : ["float", 0],
+  "beta"      : ["float", 1e-3],
+  "wdecay"    : ["float", 8e-7],
+
+  "max_seq_len_delta" : ["int", 20]
+}
diff --git a/others/GDAS/configs/NAS-WT2-BASE.config b/others/GDAS/configs/NAS-WT2-BASE.config
new file mode 100644
index 0000000..85c8626
--- /dev/null
+++ b/others/GDAS/configs/NAS-WT2-BASE.config
@@ -0,0 +1,27 @@
+{
+  "data_name" : ["str",   "WT2"],
+  "data_path" : ["str",   "./data/data/wikitext-2"],
+  "emsize"    : ["int",   700],
+  "nhid"      : ["int",   700],
+  "nhidlast"  : ["int",   700],
+  "LR"        : ["float", 20],
+  "clip"      : ["float", 0.25],
+  "epochs"    : ["int",   3000],
+ "train_batch": ["int",   64],
+  "eval_batch": ["int",   10],
+  "test_batch": ["int",    1],
+  "bptt"      : ["int",   35],
+
+  "dropout"   : ["float", 0.75],
+  "dropouth"  : ["float", 0.15],
+  "dropoutx"  : ["float", 0.75],
+  "dropouti"  : ["float", 0.2],
+  "dropoute"  : ["float", 0.1],
+
+  "nonmono"   : ["int", 5],
+  "alpha"     : ["float", 0],
+  "beta"      : ["float", 1e-3],
+  "wdecay"    : ["float", 5e-7],
+
+  "max_seq_len_delta" : ["int", 20]
+}
diff --git a/others/GDAS/configs/cos1800.config b/others/GDAS/configs/cos1800.config
new file mode 100644
index 0000000..56e53b1
--- /dev/null
+++ b/others/GDAS/configs/cos1800.config
@@ -0,0 +1,8 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   1800],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0001],
+  "LR"        : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/cos600.config b/others/GDAS/configs/cos600.config
new file mode 100644
index 0000000..4bd4483
--- /dev/null
+++ b/others/GDAS/configs/cos600.config
@@ -0,0 +1,8 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0005],
+  "LR"        : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cut.config b/others/GDAS/configs/nas-cifar-cos-cut.config
new file mode 100644
index 0000000..830ffbd
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cut.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutB128.config b/others/GDAS/configs/nas-cifar-cos-cutB128.config
new file mode 100644
index 0000000..c2c6f9b
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutB128.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutB64.config b/others/GDAS/configs/nas-cifar-cos-cutB64.config
new file mode 100644
index 0000000..a566c6d
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutB64.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   64],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutB96.config b/others/GDAS/configs/nas-cifar-cos-cutB96.config
new file mode 100644
index 0000000..830ffbd
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutB96.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutW1.config b/others/GDAS/configs/nas-cifar-cos-cutW1.config
new file mode 100644
index 0000000..acd2ae0
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutW1.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0001],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutW3.config b/others/GDAS/configs/nas-cifar-cos-cutW3.config
new file mode 100644
index 0000000..830ffbd
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutW3.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-cutW5.config b/others/GDAS/configs/nas-cifar-cos-cutW5.config
new file mode 100644
index 0000000..da61223
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-cutW5.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0005],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 16],
+  "drop_path_prob" : ["float", 0.2]
+}
diff --git a/others/GDAS/configs/nas-cifar-cos-nocut.config b/others/GDAS/configs/nas-cifar-cos-nocut.config
new file mode 100644
index 0000000..4b89845
--- /dev/null
+++ b/others/GDAS/configs/nas-cifar-cos-nocut.config
@@ -0,0 +1,14 @@
+{
+  "type"      : ["str",   "cosine"],
+  "batch_size": ["int",   96],
+  "epochs"    : ["int",   600],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0003],
+  "LR"        : ["float", 0.025],
+  "LR_MIN"    : ["float", 0.0001],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "cutout"    : ["int", 0],
+  "drop_path_prob" : ["float", 0.3]
+}
diff --git a/others/GDAS/configs/nas-imagenet-B128.config b/others/GDAS/configs/nas-imagenet-B128.config
new file mode 100644
index 0000000..442b497
--- /dev/null
+++ b/others/GDAS/configs/nas-imagenet-B128.config
@@ -0,0 +1,15 @@
+{
+  "type"      : ["str",   "steplr"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   250],
+  "decay_period": ["int",   1],
+  "gamma"     : ["float", 0.97],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.00003],
+  "LR"        : ["float", 0.1],
+  "label_smooth": ["float", 0.1],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "drop_path_prob" : ["float", 0]
+}
diff --git a/others/GDAS/configs/nas-imagenet-B256.config b/others/GDAS/configs/nas-imagenet-B256.config
new file mode 100644
index 0000000..a5926fb
--- /dev/null
+++ b/others/GDAS/configs/nas-imagenet-B256.config
@@ -0,0 +1,15 @@
+{
+  "type"      : ["str",   "steplr"],
+  "batch_size": ["int",   256],
+  "epochs"    : ["int",   250],
+  "decay_period": ["int",   1],
+  "gamma"     : ["float", 0.97],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.00003],
+  "LR"        : ["float", 0.1],
+  "label_smooth": ["float", 0.1],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "drop_path_prob" : ["float", 0]
+}
diff --git a/others/GDAS/configs/nas-imagenet.config b/others/GDAS/configs/nas-imagenet.config
new file mode 100644
index 0000000..442b497
--- /dev/null
+++ b/others/GDAS/configs/nas-imagenet.config
@@ -0,0 +1,15 @@
+{
+  "type"      : ["str",   "steplr"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   250],
+  "decay_period": ["int",   1],
+  "gamma"     : ["float", 0.97],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.00003],
+  "LR"        : ["float", 0.1],
+  "label_smooth": ["float", 0.1],
+  "auxiliary" : ["bool", 1],
+  "auxiliary_weight" : ["float", 0.4],
+  "grad_clip" : ["float", 5],
+  "drop_path_prob" : ["float", 0]
+}
diff --git a/others/GDAS/configs/pyramidC10.config b/others/GDAS/configs/pyramidC10.config
new file mode 100644
index 0000000..730663e
--- /dev/null
+++ b/others/GDAS/configs/pyramidC10.config
@@ -0,0 +1,10 @@
+{
+  "type"      : ["str",   "multistep"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   300],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0001],
+  "LR"        : ["float", 0.1],
+  "milestones": ["int",   [150, 225]],
+  "gammas"    : ["float", [0.1, 0.1]]
+}
diff --git a/others/GDAS/configs/pyramidC100.config b/others/GDAS/configs/pyramidC100.config
new file mode 100644
index 0000000..a6a633d
--- /dev/null
+++ b/others/GDAS/configs/pyramidC100.config
@@ -0,0 +1,10 @@
+{
+  "type"      : ["str",   "multistep"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   300],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0001],
+  "LR"        : ["float", 0.5],
+  "milestones": ["int",   [150, 225]],
+  "gammas"    : ["float", [0.1, 0.1]]
+}
diff --git a/others/GDAS/configs/resnet165.config b/others/GDAS/configs/resnet165.config
new file mode 100644
index 0000000..9ae21fa
--- /dev/null
+++ b/others/GDAS/configs/resnet165.config
@@ -0,0 +1,10 @@
+{
+  "type"      : ["str",   "multistep"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   165],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0001],
+  "LR"        : ["float", 0.01],
+  "milestones": ["int",   [1, 83, 124]],
+  "gammas"    : ["float", [10, 0.1, 0.1]]
+}
diff --git a/others/GDAS/configs/resnet200.config b/others/GDAS/configs/resnet200.config
new file mode 100644
index 0000000..6694e19
--- /dev/null
+++ b/others/GDAS/configs/resnet200.config
@@ -0,0 +1,10 @@
+{
+  "type"      : ["str",   "multistep"],
+  "batch_size": ["int",   128],
+  "epochs"    : ["int",   200],
+  "momentum"  : ["float", 0.9],
+  "decay"     : ["float", 0.0005],
+  "LR"        : ["float", 0.01],
+  "milestones": ["int",   [1 ,  60, 120, 160]],
+  "gammas"    : ["float", [10, 0.2, 0.2, 0.2]]
+}
diff --git a/others/GDAS/exps-cnn/cvpr-vis.py b/others/GDAS/exps-cnn/cvpr-vis.py
new file mode 100644
index 0000000..b0f68d9
--- /dev/null
+++ b/others/GDAS/exps-cnn/cvpr-vis.py
@@ -0,0 +1,97 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+# python ./exps-nas/cvpr-vis.py --save_dir ./snapshots/NAS-VIS/
+import os, sys, time, glob, random, argparse
+import numpy as np
+from copy import deepcopy
+import torch
+from pathlib import Path
+lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from nas import DMS_V1, DMS_F1
+from nas_rnn import DARTS_V2, GDAS
+from graphviz import Digraph
+
+parser = argparse.ArgumentParser("Visualize the Networks")
+parser.add_argument('--save_dir',   type=str,   help='The directory to save the network plot.')
+args = parser.parse_args()
+
+
+def plot_cnn(genotype, filename):
+  g = Digraph(
+      format='pdf',
+      edge_attr=dict(fontsize='20', fontname="times"),
+      node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
+      engine='dot')
+  g.body.extend(['rankdir=LR'])
+
+  g.node("c_{k-2}", fillcolor='darkseagreen2')
+  g.node("c_{k-1}", fillcolor='darkseagreen2')
+  assert len(genotype) % 2 == 0, '{:}'.format(genotype)
+  steps = len(genotype) // 2
+
+  for i in range(steps):
+    g.node(str(i), fillcolor='lightblue')
+
+  for i in range(steps):
+    for k in [2*i, 2*i + 1]:
+      op, j, weight = genotype[k]
+      if j == 0:
+        u = "c_{k-2}"
+      elif j == 1:
+        u = "c_{k-1}"
+      else:
+        u = str(j-2)
+      v = str(i)
+      g.edge(u, v, label=op, fillcolor="gray")
+
+  g.node("c_{k}", fillcolor='palegoldenrod')
+  for i in range(steps):
+    g.edge(str(i), "c_{k}", fillcolor="gray")
+
+  g.render(filename, view=False)
+
+def plot_rnn(genotype, filename):
+  g = Digraph(
+      format='pdf',
+      edge_attr=dict(fontsize='20', fontname="times"),
+      node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
+      engine='dot')
+  g.body.extend(['rankdir=LR'])
+
+  g.node("x_{t}", fillcolor='darkseagreen2')
+  g.node("h_{t-1}", fillcolor='darkseagreen2')
+  g.node("0", fillcolor='lightblue')
+  g.edge("x_{t}", "0", fillcolor="gray")
+  g.edge("h_{t-1}", "0", fillcolor="gray")
+  steps = len(genotype)
+
+  for i in range(1, steps + 1):
+    g.node(str(i), fillcolor='lightblue')
+
+  for i, (op, j) in enumerate(genotype):
+    g.edge(str(j), str(i + 1), label=op, fillcolor="gray")
+
+  g.node("h_{t}", fillcolor='palegoldenrod')
+  for i in range(1, steps + 1):
+    g.edge(str(i), "h_{t}", fillcolor="gray")
+
+  g.render(filename, view=False)
+
+
+if __name__ == '__main__':
+  save_dir   = Path(args.save_dir)
+
+  save_path = str(save_dir / 'DMS_V1-normal')
+  plot_cnn(DMS_V1.normal, save_path)
+  save_path = str(save_dir / 'DMS_V1-reduce')
+  plot_cnn(DMS_V1.reduce, save_path)
+  save_path = str(save_dir / 'DMS_F1-normal')
+  plot_cnn(DMS_F1.normal, save_path)
+
+  save_path = str(save_dir / 'DARTS-V2-RNN')
+  plot_rnn(DARTS_V2.recurrent, save_path)
+
+  save_path = str(save_dir / 'GDAS-V1-RNN')
+  plot_rnn(GDAS.recurrent, save_path)
diff --git a/others/GDAS/exps-cnn/evaluate.py b/others/GDAS/exps-cnn/evaluate.py
new file mode 100644
index 0000000..c6a747e
--- /dev/null
+++ b/others/GDAS/exps-cnn/evaluate.py
@@ -0,0 +1,53 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+# For evaluating the learned model
+import os, sys, time, glob, random, argparse
+import numpy as np
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.datasets as dset
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+from pathlib import Path
+lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from utils import AverageMeter, time_string, convert_secs2time
+from utils import print_log, obtain_accuracy
+from utils import Cutout, count_parameters_in_MB
+from nas import model_types as models
+from train_utils import main_procedure
+from train_utils_imagenet import main_procedure_imagenet
+from scheduler import load_config
+
+
+parser = argparse.ArgumentParser("Evaluate-CNN")
+parser.add_argument('--data_path',         type=str,   help='Path to dataset.')
+parser.add_argument('--checkpoint',        type=str,   help='Choose between Cifar10/100 and ImageNet.')
+args = parser.parse_args()
+
+assert torch.cuda.is_available(), 'torch.cuda is not available'
+
+
+def main():
+
+  assert os.path.isdir( args.data_path ), 'invalid data-path : {:}'.format(args.data_path)
+  assert os.path.isfile( args.checkpoint ), 'invalid checkpoint : {:}'.format(args.checkpoint)
+
+  checkpoint = torch.load( args.checkpoint )
+  xargs      = checkpoint['args']
+  config     = load_config(xargs.model_config)
+  genotype   = models[xargs.arch]
+
+  # clear GPU cache
+  torch.cuda.empty_cache()
+  if xargs.dataset == 'imagenet':
+    main_procedure_imagenet(config, args.data_path, xargs, genotype, xargs.init_channels, xargs.layers, checkpoint['state_dict'], None)
+  else:
+    main_procedure(config, xargs.dataset, args.data_path, xargs, genotype, xargs.init_channels, xargs.layers, checkpoint['state_dict'], None)
+
+
+if __name__ == '__main__':
+  main() 
diff --git a/others/GDAS/exps-cnn/train_base.py b/others/GDAS/exps-cnn/train_base.py
new file mode 100644
index 0000000..2514d01
--- /dev/null
+++ b/others/GDAS/exps-cnn/train_base.py
@@ -0,0 +1,89 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, time, glob, random, argparse
+import numpy as np
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.datasets as dset
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+from pathlib import Path
+lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from utils import AverageMeter, time_string, convert_secs2time
+from utils import print_log, obtain_accuracy
+from utils import Cutout, count_parameters_in_MB
+from nas import model_types as models
+from train_utils import main_procedure
+from train_utils_imagenet import main_procedure_imagenet
+from scheduler import load_config
+
+
+parser = argparse.ArgumentParser("Train-CNN")
+parser.add_argument('--data_path',         type=str,   help='Path to dataset')
+parser.add_argument('--dataset',           type=str,   choices=['imagenet', 'cifar10', 'cifar100'], help='Choose between Cifar10/100 and ImageNet.')
+parser.add_argument('--arch',              type=str,   choices=models.keys(), help='the searched model.')
+# 
+parser.add_argument('--grad_clip',      type=float, help='gradient clipping')
+parser.add_argument('--model_config',   type=str  , help='the model configuration')
+parser.add_argument('--init_channels',  type=int  , help='the initial number of channels')
+parser.add_argument('--layers',         type=int  , help='the number of layers.')
+
+# log
+parser.add_argument('--workers',       type=int, default=2, help='number of data loading workers (default: 2)')
+parser.add_argument('--save_path',     type=str, help='Folder to save checkpoints and log.')
+parser.add_argument('--print_freq',    type=int, help='print frequency (default: 200)')
+parser.add_argument('--manualSeed',    type=int, help='manual seed')
+args = parser.parse_args()
+
+if 'CUDA_VISIBLE_DEVICES' not in os.environ: print('Can not find CUDA_VISIBLE_DEVICES in os.environ')
+else                                       : print('Find CUDA_VISIBLE_DEVICES={:}'.format(os.environ['CUDA_VISIBLE_DEVICES']))
+
+assert torch.cuda.is_available(), 'torch.cuda is not available'
+
+
+if args.manualSeed is None or args.manualSeed < 0:
+  args.manualSeed = random.randint(1, 10000)
+random.seed(args.manualSeed)
+cudnn.benchmark = True
+cudnn.enabled   = True
+torch.manual_seed(args.manualSeed)
+torch.cuda.manual_seed_all(args.manualSeed)
+
+
+def main():
+
+  # Init logger
+  #args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
+  if not os.path.isdir(args.save_path):
+    os.makedirs(args.save_path)
+  log = open(os.path.join(args.save_path, 'seed-{:}-log.txt'.format(args.manualSeed)), 'w')
+  print_log('Save Path      : {:}'.format(args.save_path), log)
+  state = {k: v for k, v in args._get_kwargs()}
+  print_log(state, log)
+  print_log("Random Seed    : {:}".format(args.manualSeed), log)
+  print_log("Python version : {:}".format(sys.version.replace('\n', ' ')), log)
+  print_log("Torch  version : {:}".format(torch.__version__), log)
+  print_log("CUDA   version : {:}".format(torch.version.cuda), log)
+  print_log("cuDNN  version : {:}".format(cudnn.version()), log)
+  print_log("Num of GPUs    : {:}".format(torch.cuda.device_count()), log)
+  args.dataset = args.dataset.lower()
+
+  config = load_config(args.model_config)
+  genotype = models[args.arch]
+  print_log('configuration : {:}'.format(config), log)
+  print_log('genotype      : {:}'.format(genotype), log)
+  # clear GPU cache
+  torch.cuda.empty_cache()
+  if args.dataset == 'imagenet':
+    main_procedure_imagenet(config, args.data_path, args, genotype, args.init_channels, args.layers, None, log)
+  else:
+    main_procedure(config, args.dataset, args.data_path, args, genotype, args.init_channels, args.layers, None, log)
+  log.close()
+
+
+if __name__ == '__main__':
+  main() 
diff --git a/others/GDAS/exps-cnn/train_utils.py b/others/GDAS/exps-cnn/train_utils.py
new file mode 100644
index 0000000..eff39ef
--- /dev/null
+++ b/others/GDAS/exps-cnn/train_utils.py
@@ -0,0 +1,169 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, time
+from copy import deepcopy
+import torch
+import torchvision.transforms as transforms
+from shutil import copyfile
+
+from utils import print_log, obtain_accuracy, AverageMeter
+from utils import time_string, convert_secs2time
+from utils import count_parameters_in_MB
+from utils import Cutout
+from nas import NetworkCIFAR as Network
+from datasets import get_datasets
+
+
+def obtain_best(accuracies):
+  if len(accuracies) == 0: return (0, 0)
+  tops = [value for key, value in accuracies.items()]
+  s2b = sorted( tops )
+  return s2b[-1]
+
+
+def main_procedure(config, dataset, data_path, args, genotype, init_channels, layers, pure_evaluate, log):
+  
+  train_data, test_data, class_num = get_datasets(dataset, data_path, config.cutout)
+
+  print_log('-------------------------------------- main-procedure', log)
+  print_log('config        : {:}'.format(config), log)
+  print_log('genotype      : {:}'.format(genotype), log)
+  print_log('init_channels : {:}'.format(init_channels), log)
+  print_log('layers        : {:}'.format(layers), log)
+  print_log('class_num     : {:}'.format(class_num), log)
+  basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
+  model     = torch.nn.DataParallel(basemodel).cuda()
+
+  total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
+  print_log('Network =>\n{:}'.format(basemodel), log)
+  print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
+  print_log('config        : {:}'.format(config), log)
+  print_log('genotype      : {:}'.format(genotype), log)
+  print_log('args          : {:}'.format(args), log)
+  print_log('Train-Dataset : {:}'.format(train_data), log)
+  print_log('Train-Trans   : {:}'.format(train_data.transform), log)
+  print_log('Test--Dataset : {:}'.format(test_data ), log)
+  print_log('Test--Trans   : {:}'.format(test_data.transform ), log)
+
+
+  train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True,
+                         num_workers=args.workers, pin_memory=True)
+  test_loader  = torch.utils.data.DataLoader(test_data , batch_size=config.batch_size, shuffle=False,
+                         num_workers=args.workers, pin_memory=True)
+
+  criterion = torch.nn.CrossEntropyLoss().cuda()
+  
+  optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay)
+  #optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nestero=True)
+  if config.type == 'cosine':
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs), eta_min=float(config.LR_MIN))
+  else:
+    raise ValueError('Can not find the schedular type : {:}'.format(config.type))
+
+
+  checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-model.pth'.format(args.manualSeed, dataset))
+  checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-best.pth'.format(args.manualSeed, dataset))
+  if pure_evaluate:
+    print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
+    basemodel.load_state_dict( pure_evaluate )
+    with torch.no_grad():
+      valid_acc1, valid_acc5, valid_los = _train(test_loader, model, criterion, optimizer, 'test', -1, config, args.print_freq, log)
+    return (valid_acc1, valid_acc5)
+  elif os.path.isfile(checkpoint_path):
+    checkpoint  = torch.load( checkpoint_path )
+    start_epoch = checkpoint['epoch']
+    basemodel.load_state_dict(checkpoint['state_dict'])
+    optimizer.load_state_dict(checkpoint['optimizer'])
+    scheduler.load_state_dict(checkpoint['scheduler'])
+    accuracies  = checkpoint['accuracies']
+    print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
+  else:
+    start_epoch, accuracies = 0, {}
+    print_log('Train model from scratch without pre-trained model or snapshot', log)
+
+
+  # Main loop
+  start_time, epoch_time = time.time(), AverageMeter()
+  for epoch in range(start_epoch, config.epochs):
+    scheduler.step()
+
+    need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
+    print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size), log)
+
+    basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
+
+    train_acc1, train_acc5, train_los = _train(train_loader, model, criterion, optimizer, 'train', epoch, config, args.print_freq, log)
+
+    with torch.no_grad():
+      valid_acc1, valid_acc5, valid_los = _train(test_loader, model, criterion, optimizer, 'test', epoch, config, args.print_freq, log)
+    accuracies[epoch] = (valid_acc1, valid_acc5)
+
+    torch.save({'epoch'     : epoch + 1,
+                'args'      : deepcopy(args),
+                'state_dict': basemodel.state_dict(),
+                'optimizer' : optimizer.state_dict(),
+                'scheduler' : scheduler.state_dict(),
+                'accuracies': accuracies},
+                checkpoint_path)
+    best_acc = obtain_best( accuracies )
+    if accuracies[epoch] == best_acc: copyfile(checkpoint_path, checkpoint_best)
+    print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
+    print_log('----> Save into {:}'.format(checkpoint_path), log)
+
+    # measure elapsed time
+    epoch_time.update(time.time() - start_time)
+    start_time = time.time()
+  return obtain_best( accuracies )
+
+
+def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
+  data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+  if mode == 'train':
+    model.train()
+  elif mode == 'test':
+    model.eval()
+  else: raise ValueError("The mode is not right : {:}".format(mode))
+  
+  end = time.time()
+  for i, (inputs, targets) in enumerate(xloader):
+    # measure data loading time
+    data_time.update(time.time() - end)
+    # calculate prediction and loss
+    targets = targets.cuda(non_blocking=True)
+
+    if mode == 'train': optimizer.zero_grad()
+
+    if config.auxiliary and model.training:
+      logits, logits_aux = model(inputs)
+    else:
+      logits = model(inputs)
+
+    loss = criterion(logits, targets)
+    if config.auxiliary and model.training:
+      loss_aux = criterion(logits_aux, targets)
+      loss += config.auxiliary_weight * loss_aux
+    
+    if mode == 'train':
+      loss.backward()
+      if config.grad_clip > 0:
+        torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
+      optimizer.step()
+    # record
+    prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
+    losses.update(loss.item(),  inputs.size(0))
+    top1.update  (prec1.item(), inputs.size(0))
+    top5.update  (prec5.item(), inputs.size(0))
+
+    # measure elapsed time
+    batch_time.update(time.time() - end)
+    end = time.time()
+
+    if i % print_freq == 0 or (i+1) == len(xloader):
+      Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
+      Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
+      Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
+      print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
+
+  print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
+  return top1.avg, top5.avg, losses.avg
diff --git a/others/GDAS/exps-cnn/train_utils_imagenet.py b/others/GDAS/exps-cnn/train_utils_imagenet.py
new file mode 100644
index 0000000..5fa421b
--- /dev/null
+++ b/others/GDAS/exps-cnn/train_utils_imagenet.py
@@ -0,0 +1,192 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, time
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from shutil import copyfile
+
+from utils import print_log, obtain_accuracy, AverageMeter
+from utils import time_string, convert_secs2time
+from utils import count_parameters_in_MB
+from utils import print_FLOPs
+from utils import Cutout
+from nas import NetworkImageNet as Network
+from datasets import get_datasets
+
+
+def obtain_best(accuracies):
+  if len(accuracies) == 0: return (0, 0)
+  tops = [value for key, value in accuracies.items()]
+  s2b = sorted( tops )
+  return s2b[-1]
+
+
+class CrossEntropyLabelSmooth(nn.Module):
+
+  def __init__(self, num_classes, epsilon):
+    super(CrossEntropyLabelSmooth, self).__init__()
+    self.num_classes = num_classes
+    self.epsilon = epsilon
+    self.logsoftmax = nn.LogSoftmax(dim=1)
+
+  def forward(self, inputs, targets):
+    log_probs = self.logsoftmax(inputs)
+    targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
+    targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
+    loss = (-targets * log_probs).mean(0).sum()
+    return loss
+
+
+def main_procedure_imagenet(config, data_path, args, genotype, init_channels, layers, pure_evaluate, log):
+  
+  # training data and testing data
+  train_data, valid_data, class_num = get_datasets('imagenet-1k', data_path, -1)
+
+  train_queue = torch.utils.data.DataLoader(
+    train_data, batch_size=config.batch_size, shuffle= True, pin_memory=True, num_workers=args.workers)
+
+  valid_queue = torch.utils.data.DataLoader(
+    valid_data, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)
+
+  print_log('-------------------------------------- main-procedure', log)
+  print_log('config        : {:}'.format(config), log)
+  print_log('genotype      : {:}'.format(genotype), log)
+  print_log('init_channels : {:}'.format(init_channels), log)
+  print_log('layers        : {:}'.format(layers), log)
+  print_log('class_num     : {:}'.format(class_num), log)
+  basemodel = Network(init_channels, class_num, layers, config.auxiliary, genotype)
+  model     = torch.nn.DataParallel(basemodel).cuda()
+
+  total_param, aux_param = count_parameters_in_MB(basemodel), count_parameters_in_MB(basemodel.auxiliary_param())
+  print_log('Network =>\n{:}'.format(basemodel), log)
+  print_FLOPs(basemodel, (1,3,224,224), [print_log, log])
+  print_log('Parameters : {:} - {:} = {:.3f} MB'.format(total_param, aux_param, total_param - aux_param), log)
+  print_log('config        : {:}'.format(config), log)
+  print_log('genotype      : {:}'.format(genotype), log)
+  print_log('Train-Dataset : {:}'.format(train_data), log)
+  print_log('Valid--Dataset : {:}'.format(valid_data), log)
+  print_log('Args          : {:}'.format(args), log)
+
+
+  criterion = torch.nn.CrossEntropyLoss().cuda()
+  criterion_smooth = CrossEntropyLabelSmooth(class_num, config.label_smooth).cuda()
+
+
+  optimizer = torch.optim.SGD(model.parameters(), config.LR, momentum=config.momentum, weight_decay=config.decay, nesterov=True)
+  if config.type == 'cosine':
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(config.epochs))
+  elif config.type == 'steplr':
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.decay_period, gamma=config.gamma)
+  else:
+    raise ValueError('Can not find the schedular type : {:}'.format(config.type))
+
+
+  checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-model.pth'.format(args.manualSeed))
+  checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-best.pth'.format(args.manualSeed))
+
+  if pure_evaluate:
+    print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
+    basemodel.load_state_dict( pure_evaluate )
+    with torch.no_grad():
+      valid_acc1, valid_acc5, valid_los = _train(valid_queue, model, criterion,           None, 'test' , -1, config, args.print_freq, log)
+    return (valid_acc1, valid_acc5)
+  elif os.path.isfile(checkpoint_path):
+    checkpoint  = torch.load( checkpoint_path )
+    start_epoch = checkpoint['epoch']
+    basemodel.load_state_dict(checkpoint['state_dict'])
+    optimizer.load_state_dict(checkpoint['optimizer'])
+    scheduler.load_state_dict(checkpoint['scheduler'])
+    accuracies  = checkpoint['accuracies']
+    print_log('Load checkpoint from {:} with start-epoch = {:}'.format(checkpoint_path, start_epoch), log)
+  else:
+    start_epoch, accuracies = 0, {}
+    print_log('Train model from scratch without pre-trained model or snapshot', log)
+
+
+  # Main loop
+  start_time, epoch_time = time.time(), AverageMeter()
+  for epoch in range(start_epoch, config.epochs):
+    scheduler.step()
+
+    basemodel.update_drop_path(config.drop_path_prob * epoch / config.epochs)
+
+    need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
+    print_log("\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} LR={:6.4f} ~ {:6.4f}, Batch={:d}, Drop-Path-Prob={:}".format(time_string(), epoch, config.epochs, need_time, min(scheduler.get_lr()), max(scheduler.get_lr()), config.batch_size, basemodel.get_drop_path()), log)
+
+    train_acc1, train_acc5, train_los = _train(train_queue, model, criterion_smooth, optimizer, 'train', epoch, config, args.print_freq, log)
+
+    with torch.no_grad():
+      valid_acc1, valid_acc5, valid_los = _train(valid_queue, model, criterion,           None, 'test' , epoch, config, args.print_freq, log)
+    accuracies[epoch] = (valid_acc1, valid_acc5)
+
+    torch.save({'epoch'     : epoch + 1,
+                'args'      : deepcopy(args),
+                'state_dict': basemodel.state_dict(),
+                'optimizer' : optimizer.state_dict(),
+                'scheduler' : scheduler.state_dict(),
+                'accuracies': accuracies},
+                checkpoint_path)
+    best_acc = obtain_best( accuracies )
+    if accuracies[epoch] == best_acc: copyfile(checkpoint_path, checkpoint_best)
+    print_log('----> Best Accuracy : Acc@1={:.2f}, Acc@5={:.2f}, Error@1={:.2f}, Error@5={:.2f}'.format(best_acc[0], best_acc[1], 100-best_acc[0], 100-best_acc[1]), log)
+    print_log('----> Save into {:}'.format(checkpoint_path), log)
+
+    # measure elapsed time
+    epoch_time.update(time.time() - start_time)
+    start_time = time.time()
+  return obtain_best( accuracies )
+
+
+def _train(xloader, model, criterion, optimizer, mode, epoch, config, print_freq, log):
+  data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+  if mode == 'train':
+    model.train()
+  elif mode == 'test':
+    model.eval()
+  else: raise ValueError("The mode is not right : {:}".format(mode))
+  
+  end = time.time()
+  for i, (inputs, targets) in enumerate(xloader):
+    # measure data loading time
+    data_time.update(time.time() - end)
+    # calculate prediction and loss
+    targets = targets.cuda(non_blocking=True)
+
+    if mode == 'train': optimizer.zero_grad()
+
+    if config.auxiliary and model.training:
+      logits, logits_aux = model(inputs)
+    else:
+      logits = model(inputs)
+
+    loss = criterion(logits, targets)
+    if config.auxiliary and model.training:
+      loss_aux = criterion(logits_aux, targets)
+      loss += config.auxiliary_weight * loss_aux
+    
+    if mode == 'train':
+      loss.backward()
+      if config.grad_clip > 0:
+        torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
+      optimizer.step()
+    # record
+    prec1, prec5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
+    losses.update(loss.item(),  inputs.size(0))
+    top1.update  (prec1.item(), inputs.size(0))
+    top5.update  (prec5.item(), inputs.size(0))
+
+    # measure elapsed time
+    batch_time.update(time.time() - end)
+    end = time.time()
+
+    if i % print_freq == 0 or (i+1) == len(xloader):
+      Sstr = ' {:5s}'.format(mode) + time_string() + ' Epoch: [{:03d}][{:03d}/{:03d}]'.format(epoch, i, len(xloader))
+      Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
+      Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
+      print_log(Sstr + ' ' + Tstr + ' ' + Lstr, log)
+
+  print_log ('{TIME:} **{mode:}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(TIME=time_string(), mode=mode, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg), log)
+  return top1.avg, top5.avg, losses.avg
diff --git a/others/GDAS/exps-cnn/vis-arch.py b/others/GDAS/exps-cnn/vis-arch.py
new file mode 100644
index 0000000..3fb2dc1
--- /dev/null
+++ b/others/GDAS/exps-cnn/vis-arch.py
@@ -0,0 +1,69 @@
+import os, sys, time, glob, random, argparse
+import numpy as np
+from copy import deepcopy
+import torch
+from pathlib import Path
+lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from graphviz import Digraph
+
+parser = argparse.ArgumentParser("Visualize the Networks")
+parser.add_argument('--checkpoint', type=str,   help='The path to the checkpoint.')
+parser.add_argument('--save_dir',   type=str,   help='The directory to save the network plot.')
+args = parser.parse_args()
+
+
+def plot(genotype, filename):
+  g = Digraph(
+      format='pdf',
+      edge_attr=dict(fontsize='20', fontname="times"),
+      node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
+      engine='dot')
+  g.body.extend(['rankdir=LR'])
+
+  g.node("c_{k-2}", fillcolor='darkseagreen2')
+  g.node("c_{k-1}", fillcolor='darkseagreen2')
+  assert len(genotype) % 2 == 0
+  steps = len(genotype) // 2
+
+  for i in range(steps):
+    g.node(str(i), fillcolor='lightblue')
+
+  for i in range(steps):
+    for k in [2*i, 2*i + 1]:
+      op, j, weight = genotype[k]
+      if j == 0:
+        u = "c_{k-2}"
+      elif j == 1:
+        u = "c_{k-1}"
+      else:
+        u = str(j-2)
+      v = str(i)
+      g.edge(u, v, label=op, fillcolor="gray")
+
+  g.node("c_{k}", fillcolor='palegoldenrod')
+  for i in range(steps):
+    g.edge(str(i), "c_{k}", fillcolor="gray")
+
+  g.render(filename, view=False)
+
+
+if __name__ == '__main__':
+  checkpoint = args.checkpoint
+  assert os.path.isfile(checkpoint), 'Invalid path for checkpoint : {:}'.format(checkpoint)
+  checkpoint = torch.load( checkpoint, map_location='cpu' )
+  genotypes  = checkpoint['genotypes']
+  save_dir   = Path(args.save_dir)
+  subs       = ['normal', 'reduce']
+  for sub in subs:
+    if not (save_dir / sub).exists():
+      (save_dir / sub).mkdir(parents=True, exist_ok=True)
+
+  for key, network in genotypes.items():
+    save_path = str(save_dir / 'normal' / 'epoch-{:03d}'.format( int(key) ))
+    print('save into {:}'.format(save_path))
+    plot(network.normal, save_path)
+
+    save_path = str(save_dir / 'reduce' / 'epoch-{:03d}'.format( int(key) ))
+    print('save into {:}'.format(save_path))
+    plot(network.reduce, save_path)
diff --git a/others/GDAS/exps-rnn/train_rnn_base.py b/others/GDAS/exps-rnn/train_rnn_base.py
new file mode 100644
index 0000000..b250046
--- /dev/null
+++ b/others/GDAS/exps-rnn/train_rnn_base.py
@@ -0,0 +1,76 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, gc, sys, math, time, glob, random, argparse
+import numpy as np
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.datasets as dset
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+import multiprocessing
+from pathlib import Path
+lib_dir = (Path(__file__).parent / '..' / 'lib').resolve()
+print ('lib-dir : {:}'.format(lib_dir))
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from utils import AverageMeter, time_string, time_file_str, convert_secs2time
+from utils import print_log, obtain_accuracy
+from utils import count_parameters_in_MB
+from nas_rnn import DARTS_V1, DARTS_V2, GDAS
+from train_rnn_utils import main_procedure
+from scheduler import load_config
+
+Networks = {'DARTS_V1': DARTS_V1,
+            'DARTS_V2': DARTS_V2,
+            'GDAS'    : GDAS}
+
+parser = argparse.ArgumentParser("RNN")
+parser.add_argument('--arch',              type=str, choices=Networks.keys(), help='the network architecture')
+parser.add_argument('--config_path',       type=str, help='the training configure for the discovered model')
+# log
+parser.add_argument('--save_path',         type=str, help='Folder to save checkpoints and log.')
+parser.add_argument('--print_freq',        type=int, help='print frequency (default: 200)')
+parser.add_argument('--manualSeed',        type=int, help='manual seed')
+parser.add_argument('--threads',           type=int, default=4, help='the number of threads')
+args = parser.parse_args()
+
+assert torch.cuda.is_available(), 'torch.cuda is not available'
+
+if args.manualSeed is None:
+  args.manualSeed = random.randint(1, 10000)
+random.seed(args.manualSeed)
+cudnn.benchmark = True
+cudnn.enabled   = True
+torch.manual_seed(args.manualSeed)
+torch.cuda.manual_seed_all(args.manualSeed)
+torch.set_num_threads(args.threads)
+
+def main():
+
+  # Init logger
+  args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
+  if not os.path.isdir(args.save_path):
+    os.makedirs(args.save_path)
+  log = open(os.path.join(args.save_path, 'log-seed-{:}-{:}.txt'.format(args.manualSeed, time_file_str())), 'w')
+  print_log('save path : {:}'.format(args.save_path), log)
+  state = {k: v for k, v in args._get_kwargs()}
+  print_log(state, log)
+  print_log("Random Seed: {}".format(args.manualSeed), log)
+  print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
+  print_log("Torch  version : {}".format(torch.__version__), log)
+  print_log("CUDA   version : {}".format(torch.version.cuda), log)
+  print_log("cuDNN  version : {}".format(cudnn.version()), log)
+  print_log("Num of GPUs    : {}".format(torch.cuda.device_count()), log)
+  print_log("Num of CPUs    : {}".format(multiprocessing.cpu_count()), log)
+
+  config = load_config( args.config_path )
+  genotype = Networks[ args.arch ]
+
+  main_procedure(config, genotype, args.save_path, args.print_freq, log)
+  log.close()
+
+
+if __name__ == '__main__':
+  main() 
diff --git a/others/GDAS/exps-rnn/train_rnn_utils.py b/others/GDAS/exps-rnn/train_rnn_utils.py
new file mode 100644
index 0000000..22edd83
--- /dev/null
+++ b/others/GDAS/exps-rnn/train_rnn_utils.py
@@ -0,0 +1,221 @@
+# Modified from https://github.com/quark0/darts
+import os, gc, sys, time, math
+import numpy as np
+from copy import deepcopy
+import torch
+import torch.nn as nn
+from utils import print_log, obtain_accuracy, AverageMeter
+from utils import time_string, convert_secs2time
+from utils import count_parameters_in_MB
+from datasets import Corpus
+from nas_rnn import batchify, get_batch, repackage_hidden
+from nas_rnn import DARTSCell, RNNModel
+
+
+def obtain_best(accuracies):
+  if len(accuracies) == 0: return (0, 0)
+  tops = [value for key, value in accuracies.items()]
+  s2b = sorted( tops )
+  return s2b[-1]
+
+
+def main_procedure(config, genotype, save_dir, print_freq, log):
+ 
+  print_log('-'*90, log)
+  print_log('save-dir : {:}'.format(save_dir), log)
+  print_log('genotype : {:}'.format(genotype), log)
+  print_log('config   : {:}'.format(config), log)
+
+  corpus = Corpus(config.data_path)
+  train_data = batchify(corpus.train, config.train_batch, True)
+  valid_data = batchify(corpus.valid, config.eval_batch , True)
+  test_data  = batchify(corpus.test,  config.test_batch , True)
+  ntokens = len(corpus.dictionary)
+  print_log("Train--Data Size : {:}".format(train_data.size()), log)
+  print_log("Valid--Data Size : {:}".format(valid_data.size()), log)
+  print_log("Test---Data Size : {:}".format( test_data.size()), log)
+  print_log("ntokens = {:}".format(ntokens), log)
+
+  model = RNNModel(ntokens, config.emsize, config.nhid, config.nhidlast, 
+                       config.dropout, config.dropouth, config.dropoutx, config.dropouti, config.dropoute, 
+                       cell_cls=DARTSCell, genotype=genotype)
+  model = model.cuda()
+  print_log('Network =>\n{:}'.format(model), log)
+  print_log('Genotype : {:}'.format(genotype), log)
+  print_log('Parameters : {:.3f} MB'.format(count_parameters_in_MB(model)), log)
+
+  checkpoint_path = os.path.join(save_dir, 'checkpoint-{:}.pth'.format(config.data_name))
+
+  Soptimizer = torch.optim.SGD (model.parameters(), lr=config.LR, weight_decay=config.wdecay)
+  Aoptimizer = torch.optim.ASGD(model.parameters(), lr=config.LR, t0=0, lambd=0., weight_decay=config.wdecay)
+  if os.path.isfile(checkpoint_path):
+    checkpoint = torch.load(checkpoint_path)
+    model.load_state_dict( checkpoint['state_dict'] )
+    Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
+    Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
+    epoch          = checkpoint['epoch']
+    use_asgd       = checkpoint['use_asgd']
+    print_log('load checkpoint from {:} and start train from {:}'.format(checkpoint_path, epoch), log)
+  else:
+    epoch, use_asgd = 0, False
+
+  start_time, epoch_time = time.time(), AverageMeter()
+  valid_loss_from_sgd, losses = [], {-1 : 1e9}
+  while epoch < config.epochs:
+    need_time = convert_secs2time(epoch_time.val * (config.epochs-epoch), True)
+    print_log("\n==>>{:s} [Epoch={:04d}/{:04d}] {:}".format(time_string(), epoch, config.epochs, need_time), log)
+    if use_asgd : optimizer = Aoptimizer
+    else        : optimizer = Soptimizer
+
+    try:
+      Dtime, Btime = train(model, optimizer, corpus, train_data, config, epoch, print_freq, log)
+    except:
+      torch.cuda.empty_cache()
+      checkpoint = torch.load(checkpoint_path)
+      model.load_state_dict( checkpoint['state_dict'] )
+      Soptimizer.load_state_dict( checkpoint['SGD_optimizer'] )
+      Aoptimizer.load_state_dict( checkpoint['ASGD_optimizer'] )
+      epoch          = checkpoint['epoch']
+      use_asgd       = checkpoint['use_asgd']
+      valid_loss_from_sgd = checkpoint['valid_loss_from_sgd']
+      continue
+    if use_asgd:
+      tmp = {}
+      for prm in model.parameters():
+        tmp[prm] = prm.data.clone()
+        prm.data = Aoptimizer.state[prm]['ax'].clone()
+
+      val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
+    
+      for prm in model.parameters():
+        prm.data = tmp[prm].clone()
+    else:
+      val_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
+      if len(valid_loss_from_sgd) > config.nonmono and val_loss > min(valid_loss_from_sgd):
+        use_asgd = True
+      valid_loss_from_sgd.append( val_loss )
+
+    print_log('{:} end of epoch {:3d} with {:} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(time_string(), epoch, 'ASGD' if use_asgd else 'SGD', val_loss, math.exp(val_loss)), log)
+
+    if val_loss < min(losses.values()):
+      if use_asgd:
+        tmp = {}
+        for prm in model.parameters():
+          tmp[prm] = prm.data.clone()
+          prm.data = Aoptimizer.state[prm]['ax'].clone()
+      torch.save({'epoch'     : epoch,
+                  'use_asgd'  : use_asgd,
+                  'valid_loss_from_sgd': valid_loss_from_sgd,
+                  'state_dict': model.state_dict(),
+                  'SGD_optimizer' : Soptimizer.state_dict(),
+                  'ASGD_optimizer': Aoptimizer.state_dict()},
+                  checkpoint_path)
+      if use_asgd:
+        for prm in model.parameters():
+          prm.data = tmp[prm].clone()
+      print_log('save into {:}'.format(checkpoint_path), log)
+      if use_asgd:
+        tmp = {}
+        for prm in model.parameters():
+          tmp[prm] = prm.data.clone()
+          prm.data = Aoptimizer.state[prm]['ax'].clone()
+      test_loss = evaluate(model, corpus, test_data, config.test_batch, config.bptt)
+      if use_asgd:
+        for prm in model.parameters():
+          prm.data = tmp[prm].clone()
+      print_log('| epoch={:03d} | test loss {:5.2f} | test ppl {:8.2f}'.format(epoch, test_loss, math.exp(test_loss)), log)
+    losses[epoch] = val_loss
+    epoch = epoch + 1
+    # measure elapsed time
+    epoch_time.update(time.time() - start_time)
+    start_time = time.time()
+
+      
+  print_log('--------------------- Finish Training ----------------', log)
+  checkpoint = torch.load(checkpoint_path)
+  model.load_state_dict( checkpoint['state_dict'] )
+  test_loss = evaluate(model, corpus, test_data , config.test_batch, config.bptt)
+  print_log('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(test_loss, math.exp(test_loss)), log)
+  vali_loss = evaluate(model, corpus, valid_data, config.eval_batch, config.bptt)
+  print_log('| End of training | valid loss {:5.2f} | valid ppl {:8.2f}'.format(vali_loss, math.exp(vali_loss)), log)
+  
+
+
+def evaluate(model, corpus, data_source, batch_size, bptt):
+  # Turn on evaluation mode which disables dropout.
+  model.eval()
+  total_loss, total_length = 0.0, 0.0
+  with torch.no_grad():
+    ntokens = len(corpus.dictionary)
+    hidden  = model.init_hidden(batch_size)
+    for i in range(0, data_source.size(0) - 1, bptt):
+      data, targets = get_batch(data_source, i, bptt)
+      targets = targets.view(-1)
+
+      log_prob, hidden = model(data, hidden)
+      loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
+
+      total_loss += loss.item() * len(data)
+      total_length += len(data)
+      hidden = repackage_hidden(hidden)
+  return total_loss / total_length
+
+
+
+def train(model, optimizer, corpus, train_data, config, epoch, print_freq, log):
+  # Turn on training mode which enables dropout.
+  total_loss, data_time, batch_time = 0, AverageMeter(), AverageMeter()
+  start_time = time.time()
+  ntokens = len(corpus.dictionary)
+
+  hidden_train = model.init_hidden(config.train_batch)
+  
+  batch, i = 0, 0
+  while i < train_data.size(0) - 1 - 1:
+    bptt = config.bptt if np.random.random() < 0.95 else config.bptt / 2.
+    # Prevent excessively small or negative sequence lengths
+    seq_len = max(5, int(np.random.normal(bptt, 5)))
+    # There's a very small chance that it could select a very long sequence length resulting in OOM
+    seq_len = min(seq_len, config.bptt + config.max_seq_len_delta)
+    
+
+    lr2 = optimizer.param_groups[0]['lr']
+    optimizer.param_groups[0]['lr'] = lr2 * seq_len / config.bptt
+  
+    model.train()
+    data, targets = get_batch(train_data, i, seq_len)
+    targets = targets.contiguous().view(-1)
+    # count data preparation time
+    data_time.update(time.time() - start_time)
+
+    optimizer.zero_grad()
+    hidden_train = repackage_hidden(hidden_train)
+    log_prob, hidden_train, rnn_hs, dropped_rnn_hs = model(data, hidden_train, return_h=True)
+    raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets)
+
+    loss = raw_loss
+    # Activiation Regularization
+    if config.alpha > 0:
+      loss = loss + sum(config.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
+    # Temporal Activation Regularization (slowness)
+    loss = loss + sum(config.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
+    loss.backward()
+    torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)
+    optimizer.step()
+
+    gc.collect()
+
+    optimizer.param_groups[0]['lr'] = lr2
+
+    total_loss += raw_loss.item()
+    assert torch.isnan(loss) == False, '--- Epoch={:04d} :: {:03d}/{:03d} Get Loss = Nan'.format(epoch, batch, len(train_data)//config.bptt)
+
+    batch_time.update(time.time() - start_time)
+    start_time = time.time()
+    batch, i = batch + 1, i + seq_len
+
+    if batch % print_freq == 0:
+      cur_loss = total_loss / print_freq
+      print_log('  >> Epoch: {:04d} :: {:03d}/{:03d} || loss = {:5.2f}, ppl = {:8.2f}'.format(epoch, batch, len(train_data) // config.bptt, cur_loss, math.exp(cur_loss)), log)
+      total_loss = 0
+  return data_time.sum, batch_time.sum
diff --git a/others/GDAS/lib/datasets/LanguageDataset.py b/others/GDAS/lib/datasets/LanguageDataset.py
new file mode 100644
index 0000000..196f0a7
--- /dev/null
+++ b/others/GDAS/lib/datasets/LanguageDataset.py
@@ -0,0 +1,122 @@
+import os
+import torch
+
+from collections import Counter
+
+
+class Dictionary(object):
+  def __init__(self):
+    self.word2idx = {}
+    self.idx2word = []
+    self.counter = Counter()
+    self.total = 0
+
+  def add_word(self, word):
+    if word not in self.word2idx:
+      self.idx2word.append(word)
+      self.word2idx[word] = len(self.idx2word) - 1
+    token_id = self.word2idx[word]
+    self.counter[token_id] += 1
+    self.total += 1
+    return self.word2idx[word]
+
+  def __len__(self):
+    return len(self.idx2word)
+
+
+class Corpus(object):
+  def __init__(self, path):
+    self.dictionary = Dictionary()
+    self.train = self.tokenize(os.path.join(path, 'train.txt'))
+    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
+    self.test = self.tokenize(os.path.join(path, 'test.txt'))
+
+  def tokenize(self, path):
+    """Tokenizes a text file."""
+    assert os.path.exists(path)
+    # Add words to the dictionary
+    with open(path, 'r', encoding='utf-8') as f:
+      tokens = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        tokens += len(words)
+        for word in words:
+          self.dictionary.add_word(word)
+
+    # Tokenize file content
+    with open(path, 'r', encoding='utf-8') as f:
+      ids = torch.LongTensor(tokens)
+      token = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        for word in words:
+          ids[token] = self.dictionary.word2idx[word]
+          token += 1
+
+    return ids
+
+class SentCorpus(object):
+  def __init__(self, path):
+    self.dictionary = Dictionary()
+    self.train = self.tokenize(os.path.join(path, 'train.txt'))
+    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
+    self.test = self.tokenize(os.path.join(path, 'test.txt'))
+
+  def tokenize(self, path):
+    """Tokenizes a text file."""
+    assert os.path.exists(path)
+    # Add words to the dictionary
+    with open(path, 'r', encoding='utf-8') as f:
+      tokens = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        tokens += len(words)
+        for word in words:
+          self.dictionary.add_word(word)
+
+    # Tokenize file content
+    sents = []
+    with open(path, 'r', encoding='utf-8') as f:
+      for line in f:
+        if not line:
+          continue
+        words = line.split() + ['<eos>']
+        sent = torch.LongTensor(len(words))
+        for i, word in enumerate(words):
+          sent[i] = self.dictionary.word2idx[word]
+        sents.append(sent)
+
+    return sents
+
+class BatchSentLoader(object):
+  def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
+    self.sents = sents
+    self.batch_size = batch_size
+    self.sort_sents = sorted(sents, key=lambda x: x.size(0))
+    self.cuda = cuda
+    self.volatile = volatile
+    self.pad_id = pad_id
+
+  def __next__(self):
+    if self.idx >= len(self.sort_sents):
+      raise StopIteration
+
+    batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
+    batch = self.sort_sents[self.idx:self.idx+batch_size]
+    max_len = max([s.size(0) for s in batch])
+    tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
+    for i in range(len(batch)):
+      s = batch[i]
+      tensor[:s.size(0),i].copy_(s)
+    if self.cuda:
+      tensor = tensor.cuda()
+
+    self.idx += batch_size
+
+    return tensor
+  
+  next = __next__
+
+  def __iter__(self):
+    self.idx = 0
+    return self
diff --git a/others/GDAS/lib/datasets/MetaBatchSampler.py b/others/GDAS/lib/datasets/MetaBatchSampler.py
new file mode 100644
index 0000000..9833065
--- /dev/null
+++ b/others/GDAS/lib/datasets/MetaBatchSampler.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+import numpy as np
+import torch
+
+
+class MetaBatchSampler(object):
+
+  def __init__(self, labels, classes_per_it, num_samples, iterations):
+    '''
+    Initialize MetaBatchSampler
+    Args:
+    - labels: an iterable containing all the labels for the current dataset
+    samples indexes will be infered from this iterable.
+    - classes_per_it: number of random classes for each iteration
+    - num_samples: number of samples for each iteration for each class (support + query)
+    - iterations: number of iterations (episodes) per epoch
+    '''
+    super(MetaBatchSampler, self).__init__()
+    self.labels           = labels.copy()
+    self.classes_per_it   = classes_per_it
+    self.sample_per_class = num_samples
+    self.iterations       = iterations
+
+    self.classes, self.counts = np.unique(self.labels, return_counts=True)
+    assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0
+    assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes))
+    self.classes = torch.LongTensor(self.classes)
+
+    # create a matrix, indexes, of dim: classes X max(elements per class)
+    # fill it with nans
+    # for every class c, fill the relative row with the indices samples belonging to c
+    # in numel_per_class we store the number of samples for each class/row
+    self.indexes = { x.item() : [] for x in self.classes }
+    indexes = { x.item() : [] for x in self.classes }
+
+    for idx, label in enumerate(self.labels):
+      indexes[ label.item() ].append( idx )
+    for key, value in indexes.items():
+      self.indexes[ key ] = torch.LongTensor( value )
+
+
+  def __iter__(self):
+    # yield a batch of indexes
+    spc = self.sample_per_class
+    cpi = self.classes_per_it
+
+    for it in range(self.iterations):
+      batch_size = spc * cpi
+      batch = torch.LongTensor(batch_size)
+      assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes))
+      c_idxs = torch.randperm(len(self.classes))[:cpi]
+
+      for i, cls in enumerate(self.classes[c_idxs]):
+        s = slice(i * spc, (i + 1) * spc)
+        num = self.indexes[ cls.item() ].nelement()
+        assert spc < num, '{:} vs. {:}'.format(spc, num)
+        sample_idxs = torch.randperm( num )[:spc]
+        batch[s] = self.indexes[ cls.item() ][sample_idxs]
+
+      batch = batch[torch.randperm(len(batch))]
+      yield batch
+
+  def __len__(self):
+    # returns the number of iterations (episodes) per epoch
+    return self.iterations
diff --git a/others/GDAS/lib/datasets/TieredImageNet.py b/others/GDAS/lib/datasets/TieredImageNet.py
new file mode 100644
index 0000000..299a18d
--- /dev/null
+++ b/others/GDAS/lib/datasets/TieredImageNet.py
@@ -0,0 +1,84 @@
+from __future__ import print_function
+import numpy as np
+from PIL import Image
+import pickle as pkl
+import os, cv2, csv, glob
+import torch
+import torch.utils.data as data
+
+
+class TieredImageNet(data.Dataset):
+
+  def __init__(self, root_dir, split, transform=None):
+    self.split = split
+    self.root_dir = root_dir
+    self.transform = transform
+    splits = split.split('-')
+
+    images, labels, last = [], [], 0
+    for split in splits:
+      labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split)
+      images_name = '{:}/{:}_images.npz'.format(self.root_dir, split)
+      # decompress images if npz not exits
+      if not os.path.exists(images_name):
+        png_pkl = images_name[:-4] + '_png.pkl'
+        if os.path.exists(png_pkl):
+          decompress(images_name, png_pkl)
+        else:
+          raise ValueError('png_pkl {:} not exits'.format( png_pkl ))
+      assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name)
+      print ("Prepare {:} done".format(images_name))
+      try:
+        with open(labels_name) as f:
+          data = pkl.load(f)
+          label_specific = data["label_specific"]
+      except:
+        with open(labels_name, 'rb') as f:
+          data = pkl.load(f, encoding='bytes')
+          label_specific = data[b'label_specific']
+      with np.load(images_name, mmap_mode="r", encoding='latin1') as data:
+        image_data = data["images"]
+      images.append( image_data )
+      label_specific = label_specific + last
+      labels.append( label_specific )
+      last = np.max(label_specific) + 1
+      print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific)))
+    images, labels = np.concatenate(images), np.concatenate(labels)
+
+    self.images = images
+    self.labels = labels
+    self.n_classes = int( np.max(labels) + 1 )
+    self.dict_index_label = {}
+    for cls in range(self.n_classes):
+      idxs = np.where(labels==cls)[0]
+      self.dict_index_label[cls] = idxs
+    self.length = len(labels)
+    print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels)))
+  
+
+  def __repr__(self):
+    return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__))
+
+  def __len__(self):
+    return self.length
+
+  def __getitem__(self, index):
+    assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
+    image = self.images[index].copy()
+    label = int(self.labels[index])
+    image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB')
+    if self.transform is not None:
+      image = self.transform( image )
+    return image, label
+
+
+
+
+def decompress(path, output):
+  with open(output, 'rb') as f:
+    array = pkl.load(f, encoding='bytes')
+  images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8)
+  for ii, item in enumerate(array):
+    im = cv2.imdecode(item, 1)
+    images[ii] = im
+  np.savez(path, images=images)
diff --git a/others/GDAS/lib/datasets/__init__.py b/others/GDAS/lib/datasets/__init__.py
new file mode 100644
index 0000000..12d8f0c
--- /dev/null
+++ b/others/GDAS/lib/datasets/__init__.py
@@ -0,0 +1,7 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .MetaBatchSampler import MetaBatchSampler
+from .TieredImageNet import TieredImageNet
+from .LanguageDataset import Corpus
+from .get_dataset_with_transform import get_datasets
diff --git a/others/GDAS/lib/datasets/get_dataset_with_transform.py b/others/GDAS/lib/datasets/get_dataset_with_transform.py
new file mode 100644
index 0000000..6fa419f
--- /dev/null
+++ b/others/GDAS/lib/datasets/get_dataset_with_transform.py
@@ -0,0 +1,77 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, torch
+import os.path as osp
+import torchvision.datasets as dset
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+from utils import Cutout
+from .TieredImageNet import TieredImageNet
+
+
+Dataset2Class = {'cifar10' : 10,
+                 'cifar100': 100,
+                 'tiered'  : -1,
+                 'imagenet-1k' : 1000,
+                 'imagenet-100': 100}
+
+
+def get_datasets(name, root, cutout):
+
+  # Mean + Std
+  if name == 'cifar10':
+    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
+    std = [x / 255 for x in [63.0, 62.1, 66.7]]
+  elif name == 'cifar100':
+    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
+    std = [x / 255 for x in [68.2, 65.4, 70.4]]
+  elif name == 'tiered':
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+
+
+  # Data Argumentation
+  if name == 'cifar10' or name == 'cifar100':
+    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
+             transforms.Normalize(mean, std)]
+    if cutout > 0 : lists += [Cutout(cutout)]
+    train_transform = transforms.Compose(lists)
+    test_transform  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
+  elif name == 'tiered':
+    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
+    if cutout > 0 : lists += [Cutout(cutout)]
+    train_transform = transforms.Compose(lists)
+    test_transform  = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    train_transform = transforms.Compose([
+      transforms.RandomResizedCrop(224),
+      transforms.RandomHorizontalFlip(),
+      transforms.ColorJitter(
+        brightness=0.4,
+        contrast=0.4,
+        saturation=0.4,
+        hue=0.2),
+      transforms.ToTensor(),
+      normalize,
+    ])
+    test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+
+  if name == 'cifar10':
+    train_data = dset.CIFAR10 (root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR10 (root, train=False, transform=test_transform , download=True)
+  elif name == 'cifar100':
+    train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
+    test_data  = dset.ImageFolder(osp.join(root, 'val'),   test_transform)
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+  
+  class_num = Dataset2Class[name]
+  return train_data, test_data, class_num
diff --git a/others/GDAS/lib/datasets/test_NLP.py b/others/GDAS/lib/datasets/test_NLP.py
new file mode 100644
index 0000000..4f493f5
--- /dev/null
+++ b/others/GDAS/lib/datasets/test_NLP.py
@@ -0,0 +1,10 @@
+import os, sys, torch
+
+from LanguageDataset import SentCorpus, BatchSentLoader
+
+if __name__ == '__main__':
+  path = '../../data/data/penn'
+  corpus = SentCorpus( path )
+  loader = BatchSentLoader(corpus.test, 10)
+  for i, d in enumerate(loader):
+    print('{:} :: {:}'.format(i, d.size()))
diff --git a/others/GDAS/lib/datasets/test_dataset.py b/others/GDAS/lib/datasets/test_dataset.py
new file mode 100644
index 0000000..b827c59
--- /dev/null
+++ b/others/GDAS/lib/datasets/test_dataset.py
@@ -0,0 +1,33 @@
+import os, sys, torch
+import torchvision.transforms as transforms
+
+from TieredImageNet import TieredImageNet
+from MetaBatchSampler import MetaBatchSampler
+
+root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet'
+print ('root : {:}'.format(root_dir))
+means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+
+lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)]
+transform = transforms.Compose(lists)
+
+dataset = TieredImageNet(root_dir, 'val-test', transform)
+image, label = dataset[111]
+print ('image shape = {:}, label = {:}'.format(image.size(), label))
+print ('image : min = {:}, max = {:}    ||| label : {:}'.format(image.min(), image.max(), label))
+
+
+sampler = MetaBatchSampler(dataset.labels, 250, 100, 10)
+
+dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)
+
+print ('the length of dataset : {:}'.format( len(dataset) ))
+print ('the length of loader  : {:}'.format( len(dataloader) ))
+
+for images, labels in dataloader:
+  print ('images : {:}'.format( images.size() ))
+  print ('labels : {:}'.format( labels.size() ))
+  for i in range(3):
+    print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std()))
+
+print('-----')
diff --git a/others/GDAS/lib/nas/CifarNet.py b/others/GDAS/lib/nas/CifarNet.py
new file mode 100644
index 0000000..0e1e456
--- /dev/null
+++ b/others/GDAS/lib/nas/CifarNet.py
@@ -0,0 +1,89 @@
+import torch
+import torch.nn as nn
+from .construct_utils import Cell, Transition
+
+class AuxiliaryHeadCIFAR(nn.Module):
+
+  def __init__(self, C, num_classes):
+    """assuming input size 8x8"""
+    super(AuxiliaryHeadCIFAR, self).__init__()
+    self.features = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
+      nn.Conv2d(C, 128, 1, bias=False),
+      nn.BatchNorm2d(128),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(128, 768, 2, bias=False),
+      nn.BatchNorm2d(768),
+      nn.ReLU(inplace=True)
+    )
+    self.classifier = nn.Linear(768, num_classes)
+
+  def forward(self, x):
+    x = self.features(x)
+    x = self.classifier(x.view(x.size(0),-1))
+    return x
+
+
+class NetworkCIFAR(nn.Module):
+
+  def __init__(self, C, num_classes, layers, auxiliary, genotype):
+    super(NetworkCIFAR, self).__init__()
+    self._layers = layers
+
+    stem_multiplier = 3
+    C_curr = stem_multiplier*C
+    self.stem = nn.Sequential(
+      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
+      nn.BatchNorm2d(C_curr)
+    )
+    
+    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+    self.cells = nn.ModuleList()
+    reduction_prev = False
+    for i in range(layers):
+      if i in [layers//3, 2*layers//3]:
+        C_curr *= 2
+        reduction = True
+      else:
+        reduction = False
+      if reduction and genotype.reduce is None:
+        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
+      else:
+        cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+      reduction_prev = reduction
+      self.cells.append( cell )
+      C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
+      if i == 2*layers//3:
+        C_to_auxiliary = C_prev
+
+    if auxiliary:
+      self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
+    else:
+      self.auxiliary_head = None
+    self.global_pooling = nn.AdaptiveAvgPool2d(1)
+    self.classifier = nn.Linear(C_prev, num_classes)
+    self.drop_path_prob = -1
+
+  def update_drop_path(self, drop_path_prob):
+    self.drop_path_prob = drop_path_prob
+
+  def auxiliary_param(self):
+    if self.auxiliary_head is None: return []
+    else: return list( self.auxiliary_head.parameters() )
+
+  def forward(self, inputs):
+    s0 = s1 = self.stem(inputs)
+    for i, cell in enumerate(self.cells):
+      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+      if i == 2*self._layers//3:
+        if self.auxiliary_head and self.training:
+          logits_aux = self.auxiliary_head(s1)
+    out = self.global_pooling(s1)
+    out = out.view(out.size(0), -1)
+    logits = self.classifier(out)
+
+    if self.auxiliary_head and self.training:
+      return logits, logits_aux
+    else:
+      return logits
diff --git a/others/GDAS/lib/nas/ImageNet.py b/others/GDAS/lib/nas/ImageNet.py
new file mode 100644
index 0000000..8e91755
--- /dev/null
+++ b/others/GDAS/lib/nas/ImageNet.py
@@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+from .construct_utils import Cell, Transition
+
+class AuxiliaryHeadImageNet(nn.Module):
+
+  def __init__(self, C, num_classes):
+    """assuming input size 14x14"""
+    super(AuxiliaryHeadImageNet, self).__init__()
+    self.features = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
+      nn.Conv2d(C, 128, 1, bias=False),
+      nn.BatchNorm2d(128),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(128, 768, 2, bias=False),
+      # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
+      # Commenting it out for consistency with the experiments in the paper.
+      # nn.BatchNorm2d(768),
+      nn.ReLU(inplace=True)
+    )
+    self.classifier = nn.Linear(768, num_classes)
+
+  def forward(self, x):
+    x = self.features(x)
+    x = self.classifier(x.view(x.size(0),-1))
+    return x
+
+
+
+
+class NetworkImageNet(nn.Module):
+
+  def __init__(self, C, num_classes, layers, auxiliary, genotype):
+    super(NetworkImageNet, self).__init__()
+    self._layers = layers
+
+    self.stem0 = nn.Sequential(
+      nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C // 2),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C),
+    )
+
+    self.stem1 = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C),
+    )
+
+    C_prev_prev, C_prev, C_curr = C, C, C
+
+    self.cells = nn.ModuleList()
+    reduction_prev = True
+    for i in range(layers):
+      if i in [layers // 3, 2 * layers // 3]:
+        C_curr *= 2
+        reduction = True
+      else:
+        reduction = False
+      if reduction and genotype.reduce is None:
+        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
+      else:
+        cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+      reduction_prev = reduction
+      self.cells += [cell]
+      C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+      if i == 2 * layers // 3:
+        C_to_auxiliary = C_prev
+
+    if auxiliary:
+      self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
+    else:
+      self.auxiliary_head = None
+    self.global_pooling = nn.AvgPool2d(7)
+    self.classifier = nn.Linear(C_prev, num_classes)
+    self.drop_path_prob = -1
+
+  def update_drop_path(self, drop_path_prob):
+    self.drop_path_prob = drop_path_prob
+
+  def get_drop_path(self):
+    return self.drop_path_prob
+
+  def auxiliary_param(self):
+    if self.auxiliary_head is None: return []
+    else: return list( self.auxiliary_head.parameters() )
+
+  def forward(self, input):
+    s0 = self.stem0(input)
+    s1 = self.stem1(s0)
+    for i, cell in enumerate(self.cells):
+      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+      #print ('{:} : {:} - {:}'.format(i, s0.size(), s1.size()))
+      if i == 2 * self._layers // 3:
+        if self.auxiliary_head and self.training:
+          logits_aux = self.auxiliary_head(s1)
+    out = self.global_pooling(s1)
+    logits = self.classifier(out.view(out.size(0), -1))
+    if self.auxiliary_head and self.training:
+      return logits, logits_aux
+    else:
+      return logits
diff --git a/others/GDAS/lib/nas/SE_Module.py b/others/GDAS/lib/nas/SE_Module.py
new file mode 100644
index 0000000..7a8d361
--- /dev/null
+++ b/others/GDAS/lib/nas/SE_Module.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+# Squeeze and Excitation module
+
+class SqEx(nn.Module):
+
+  def __init__(self, n_features, reduction=16):
+    super(SqEx, self).__init__()
+
+    if n_features % reduction != 0:
+      raise ValueError('n_features must be divisible by reduction (default = 16)')
+
+    self.linear1 = nn.Linear(n_features, n_features // reduction, bias=True)
+    self.nonlin1 = nn.ReLU(inplace=True)
+    self.linear2 = nn.Linear(n_features // reduction, n_features, bias=True)
+    self.nonlin2 = nn.Sigmoid()
+
+  def forward(self, x):
+
+    y = F.avg_pool2d(x, kernel_size=x.size()[2:4])
+    y = y.permute(0, 2, 3, 1)
+    y = self.nonlin1(self.linear1(y))
+    y = self.nonlin2(self.linear2(y))
+    y = y.permute(0, 3, 1, 2)
+    y = x * y
+    return y
+
diff --git a/others/GDAS/lib/nas/__init__.py b/others/GDAS/lib/nas/__init__.py
new file mode 100644
index 0000000..1493e17
--- /dev/null
+++ b/others/GDAS/lib/nas/__init__.py
@@ -0,0 +1,10 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .CifarNet        import NetworkCIFAR
+from .ImageNet        import NetworkImageNet
+
+# genotypes
+from .genotypes       import model_types
+
+from .construct_utils import return_alphas_str
diff --git a/others/GDAS/lib/nas/construct_utils.py b/others/GDAS/lib/nas/construct_utils.py
new file mode 100644
index 0000000..0207e1b
--- /dev/null
+++ b/others/GDAS/lib/nas/construct_utils.py
@@ -0,0 +1,152 @@
+import random
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .operations import OPS, FactorizedReduce, ReLUConvBN, Identity
+
+
+def random_select(length, ratio):
+  clist = []
+  index = random.randint(0, length-1)
+  for i in range(length):
+    if i == index or random.random() < ratio:
+      clist.append( 1 )
+    else:
+      clist.append( 0 )
+  return clist
+
+
+def all_select(length):
+  return [1 for i in range(length)]
+
+
+def drop_path(x, drop_prob):
+  if drop_prob > 0.:
+    keep_prob = 1. - drop_prob
+    mask = x.new_zeros(x.size(0), 1, 1, 1)
+    mask = mask.bernoulli_(keep_prob)
+    x.div_(keep_prob)
+    x.mul_(mask)
+  return x
+
+
+def return_alphas_str(basemodel):
+  string = 'normal : {:}'.format( F.softmax(basemodel.alphas_normal, dim=-1) )
+  if hasattr(basemodel, 'alphas_reduce'):
+    string = string + '\nreduce : {:}'.format( F.softmax(basemodel.alphas_reduce, dim=-1) )
+  return string
+
+
+class Cell(nn.Module):
+
+  def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+    super(Cell, self).__init__()
+    print(C_prev_prev, C_prev, C)
+
+    if reduction_prev:
+      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+    else:
+      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+    
+    if reduction:
+      op_names, indices, values = zip(*genotype.reduce)
+      concat = genotype.reduce_concat
+    else:
+      op_names, indices, values = zip(*genotype.normal)
+      concat = genotype.normal_concat
+    self._compile(C, op_names, indices, values, concat, reduction)
+
+  def _compile(self, C, op_names, indices, values, concat, reduction):
+    assert len(op_names) == len(indices)
+    self._steps = len(op_names) // 2
+    self._concat = concat
+    self.multiplier = len(concat)
+
+    self._ops = nn.ModuleList()
+    for name, index in zip(op_names, indices):
+      stride = 2 if reduction and index < 2 else 1
+      op = OPS[name](C, stride, True)
+      self._ops.append( op )
+    self._indices = indices
+    self._values  = values
+
+  def forward(self, s0, s1, drop_prob):
+    s0 = self.preprocess0(s0)
+    s1 = self.preprocess1(s1)
+
+    states = [s0, s1]
+    for i in range(self._steps):
+      h1 = states[self._indices[2*i]]
+      h2 = states[self._indices[2*i+1]]
+      op1 = self._ops[2*i]
+      op2 = self._ops[2*i+1]
+      h1 = op1(h1)
+      h2 = op2(h2)
+      if self.training and drop_prob > 0.:
+        if not isinstance(op1, Identity):
+          h1 = drop_path(h1, drop_prob)
+        if not isinstance(op2, Identity):
+          h2 = drop_path(h2, drop_prob)
+
+      s = h1 + h2
+
+      states += [s]
+    return torch.cat([states[i] for i in self._concat], dim=1)
+
+
+
+class Transition(nn.Module):
+
+  def __init__(self, C_prev_prev, C_prev, C, reduction_prev, multiplier=4):
+    super(Transition, self).__init__()
+    if reduction_prev:
+      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+    else:
+      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+    self.multiplier  = multiplier
+
+    self.reduction = True
+    self.ops1 = nn.ModuleList(
+                  [nn.Sequential(
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
+                      nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
+                      nn.BatchNorm2d(C, affine=True),
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
+                      nn.BatchNorm2d(C, affine=True)),
+                   nn.Sequential(
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
+                      nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
+                      nn.BatchNorm2d(C, affine=True),
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
+                      nn.BatchNorm2d(C, affine=True))])
+
+    self.ops2 = nn.ModuleList(
+                  [nn.Sequential(
+                      nn.MaxPool2d(3, stride=2, padding=1),
+                      nn.BatchNorm2d(C, affine=True)),
+                   nn.Sequential(
+                      nn.MaxPool2d(3, stride=2, padding=1),
+                      nn.BatchNorm2d(C, affine=True))])
+
+
+  def forward(self, s0, s1, drop_prob = -1):
+    s0 = self.preprocess0(s0)
+    s1 = self.preprocess1(s1)
+
+    X0 = self.ops1[0] (s0)
+    X1 = self.ops1[1] (s1)
+    if self.training and drop_prob > 0.:
+      X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
+
+    #X2 = self.ops2[0] (X0+X1)
+    X2 = self.ops2[0] (s0)
+    X3 = self.ops2[1] (s1)
+    if self.training and drop_prob > 0.:
+      X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
+    return torch.cat([X0, X1, X2, X3], dim=1)
diff --git a/others/GDAS/lib/nas/genotypes.py b/others/GDAS/lib/nas/genotypes.py
new file mode 100644
index 0000000..d2b93d5
--- /dev/null
+++ b/others/GDAS/lib/nas/genotypes.py
@@ -0,0 +1,245 @@
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+PRIMITIVES = [
+    'none',
+    'max_pool_3x3',
+    'avg_pool_3x3',
+    'skip_connect',
+    'sep_conv_3x3',
+    'sep_conv_5x5',
+    'dil_conv_3x3',
+    'dil_conv_5x5'
+]
+
+NASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+    ('skip_connect', 3, 1.0),
+    ('avg_pool_3x3', 2, 1.0),
+    ('sep_conv_3x3', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+  ],
+  reduce_concat = [4, 5, 6],
+)
+    
+AmoebaNet = Genotype(
+  normal = [
+    ('avg_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_5x5', 2, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 3, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ],
+  normal_concat = [4, 5, 6],
+  reduce = [
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 2, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('conv_7x1_1x7', 0, 1.0),
+    ('sep_conv_3x3', 5, 1.0),
+  ],
+  reduce_concat = [3, 4, 6]
+)
+
+DARTS_V1 = Genotype(
+  normal=[
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('avg_pool_3x3', 0, 1.0)],
+  reduce_concat=[2, 3, 4, 5]
+)
+
+DARTS_V2 = Genotype(
+  normal=[
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('dil_conv_3x3', 2, 1.0)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0)],
+  reduce_concat=[2, 3, 4, 5]
+)
+
+PNASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 1, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 4, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 1, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 4, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+# https://arxiv.org/pdf/1802.03268.pdf
+ENASNet = Genotype(
+  normal = [
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0), # 2
+    ('sep_conv_3x3', 1, 1.0),
+    ('avg_pool_3x3', 1, 1.0), # 3
+    ('sep_conv_3x3', 1, 1.0),
+    ('avg_pool_3x3', 1, 1.0), # 4
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 4, 1.0), # 5
+    ('sep_conv_3x3', 5, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+DARTS = DARTS_V2
+
+# Search by normal and reduce
+GDAS_V1 = Genotype(
+  normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
+  normal_concat=range(2, 6),
+  reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
+  reduce_concat=range(2, 6)
+)
+
+# Search by normal and fixing reduction
+GDAS_F1 = Genotype(
+  normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=None,
+  reduce_concat=[2, 3, 4, 5],
+)
+
+# Combine DMS_V1 and DMS_F1
+GDAS_GF = Genotype(
+  normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
+  normal_concat=range(2, 6),
+  reduce=None,
+  reduce_concat=range(2, 6)
+)
+GDAS_FG = Genotype(
+  normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
+  normal_concat=range(2, 6),
+  reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
+  reduce_concat=range(2, 6)
+)
+
+PDARTS = Genotype(
+  normal=[
+    ('skip_connect', 0, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 3, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('dil_conv_5x5', 4, 1.0)],
+  normal_concat=range(2, 6),
+  reduce=[
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('dil_conv_5x5', 2, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('dil_conv_5x5', 3, 1.0)],
+  reduce_concat=range(2, 6)
+)
+
+
+model_types = {'DARTS_V1': DARTS_V1,
+               'DARTS_V2': DARTS_V2,
+               'NASNet'  : NASNet,
+               'PNASNet' : PNASNet, 
+               'AmoebaNet': AmoebaNet,
+               'ENASNet' : ENASNet,
+               'PDARTS'  : PDARTS,
+               'GDAS_V1' : GDAS_V1,
+               'GDAS_F1' : GDAS_F1,
+               'GDAS_GF' : GDAS_GF,
+               'GDAS_FG' : GDAS_FG}
diff --git a/others/GDAS/lib/nas/head_utils.py b/others/GDAS/lib/nas/head_utils.py
new file mode 100644
index 0000000..788fdb6
--- /dev/null
+++ b/others/GDAS/lib/nas/head_utils.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+class ImageNetHEAD(nn.Sequential):
+  def __init__(self, C, stride=2):
+    super(ImageNetHEAD, self).__init__()
+    self.add_module('conv1', nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False))
+    self.add_module('bn1'  , nn.BatchNorm2d(C // 2))
+    self.add_module('relu1', nn.ReLU(inplace=True))
+    self.add_module('conv2', nn.Conv2d(C // 2, C, kernel_size=3, stride=stride, padding=1, bias=False))
+    self.add_module('bn2'  , nn.BatchNorm2d(C))
+
+
+class CifarHEAD(nn.Sequential):
+  def __init__(self, C):
+    super(CifarHEAD, self).__init__()
+    self.add_module('conv', nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False))
+    self.add_module('bn', nn.BatchNorm2d(C))
diff --git a/others/GDAS/lib/nas/operations.py b/others/GDAS/lib/nas/operations.py
new file mode 100644
index 0000000..d105886
--- /dev/null
+++ b/others/GDAS/lib/nas/operations.py
@@ -0,0 +1,122 @@
+import torch
+import torch.nn as nn
+
+OPS = {
+  'none'         : lambda C, stride, affine: Zero(stride),
+  'avg_pool_3x3' : lambda C, stride, affine: nn.Sequential(
+                                               nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
+                                               nn.BatchNorm2d(C, affine=False) ),
+  'max_pool_3x3' : lambda C, stride, affine: nn.Sequential(
+                                               nn.MaxPool2d(3, stride=stride, padding=1),
+                                               nn.BatchNorm2d(C, affine=False) ),
+  'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
+  'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
+  'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
+  'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
+  'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
+  'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
+  'conv_7x1_1x7' : lambda C, stride, affine: Conv717(C, C, stride, affine),
+}
+
+class Conv717(nn.Module):
+
+  def __init__(self, C_in, C_out, stride, affine):
+    super(Conv717, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in , C_out, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
+      nn.Conv2d(C_out, C_out, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
+      nn.BatchNorm2d(C_out, affine=affine)
+    )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class ReLUConvBN(nn.Module):
+
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(ReLUConvBN, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine)
+    )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class DilConv(nn.Module):
+    
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
+    super(DilConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine),
+      )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class SepConv(nn.Module):
+    
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(SepConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_in, affine=affine),
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine),
+      )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class Identity(nn.Module):
+
+  def __init__(self):
+    super(Identity, self).__init__()
+
+  def forward(self, x):
+    return x
+
+
+class Zero(nn.Module):
+
+  def __init__(self, stride):
+    super(Zero, self).__init__()
+    self.stride = stride
+
+  def forward(self, x):
+    if self.stride == 1:
+      return x.mul(0.)
+    return x[:,:,::self.stride,::self.stride].mul(0.)
+
+
+class FactorizedReduce(nn.Module):
+
+  def __init__(self, C_in, C_out, affine=True):
+    super(FactorizedReduce, self).__init__()
+    assert C_out % 2 == 0
+    self.relu = nn.ReLU(inplace=False)
+    self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
+    self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) 
+    self.bn = nn.BatchNorm2d(C_out, affine=affine)
+    self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
+
+
+  def forward(self, x):
+    x = self.relu(x)
+    y = self.pad(x)
+    out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
+    out = self.bn(out)
+    return out
diff --git a/others/GDAS/lib/nas_rnn/__init__.py b/others/GDAS/lib/nas_rnn/__init__.py
new file mode 100644
index 0000000..098c7f5
--- /dev/null
+++ b/others/GDAS/lib/nas_rnn/__init__.py
@@ -0,0 +1,9 @@
+# utils
+from .utils import batchify, get_batch, repackage_hidden
+# models
+from .model_search import RNNModelSearch
+from .model_search import DARTSCellSearch
+from .basemodel import DARTSCell, RNNModel
+# architecture
+from .genotypes import DARTS_V1, DARTS_V2
+from .genotypes import GDAS
diff --git a/others/GDAS/lib/nas_rnn/basemodel.py b/others/GDAS/lib/nas_rnn/basemodel.py
new file mode 100644
index 0000000..4a00b43
--- /dev/null
+++ b/others/GDAS/lib/nas_rnn/basemodel.py
@@ -0,0 +1,181 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .genotypes import STEPS
+from .utils import mask2d, LockedDropout, embedded_dropout
+
+
+INITRANGE = 0.04
+
+def none_func(x):
+  return x * 0
+
+
+class DARTSCell(nn.Module):
+
+  def __init__(self, ninp, nhid, dropouth, dropoutx, genotype):
+    super(DARTSCell, self).__init__()
+    self.nhid = nhid
+    self.dropouth = dropouth
+    self.dropoutx = dropoutx
+    self.genotype = genotype
+
+    # genotype is None when doing arch search
+    steps = len(self.genotype.recurrent) if self.genotype is not None else STEPS
+    self._W0 = nn.Parameter(torch.Tensor(ninp+nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE))
+    self._Ws = nn.ParameterList([
+        nn.Parameter(torch.Tensor(nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) for i in range(steps)
+    ])
+
+  def forward(self, inputs, hidden, arch_probs):
+    T, B = inputs.size(0), inputs.size(1)
+
+    if self.training:
+      x_mask = mask2d(B, inputs.size(2), keep_prob=1.-self.dropoutx)
+      h_mask = mask2d(B, hidden.size(2), keep_prob=1.-self.dropouth)
+    else:
+      x_mask = h_mask = None
+
+    hidden = hidden[0]
+    hiddens = []
+    for t in range(T):
+      hidden = self.cell(inputs[t], hidden, x_mask, h_mask, arch_probs)
+      hiddens.append(hidden)
+    hiddens = torch.stack(hiddens)
+    return hiddens, hiddens[-1].unsqueeze(0)
+
+  def _compute_init_state(self, x, h_prev, x_mask, h_mask):
+    if self.training:
+      xh_prev = torch.cat([x * x_mask, h_prev * h_mask], dim=-1)
+    else:
+      xh_prev = torch.cat([x, h_prev], dim=-1)
+    c0, h0 = torch.split(xh_prev.mm(self._W0), self.nhid, dim=-1)
+    c0 = c0.sigmoid()
+    h0 = h0.tanh()
+    s0 = h_prev + c0 * (h0-h_prev)
+    return s0
+
+  def _get_activation(self, name):
+    if name == 'tanh':
+      f = torch.tanh
+    elif name == 'relu':
+      f = torch.relu
+    elif name == 'sigmoid':
+      f = torch.sigmoid
+    elif name == 'identity':
+      f = lambda x: x
+    elif name == 'none':
+      f = none_func
+    else:
+      raise NotImplementedError
+    return f
+
+  def cell(self, x, h_prev, x_mask, h_mask, _):
+    s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
+
+    states = [s0]
+    for i, (name, pred) in enumerate(self.genotype.recurrent):
+      s_prev = states[pred]
+      if self.training:
+        ch = (s_prev * h_mask).mm(self._Ws[i])
+      else:
+        ch = s_prev.mm(self._Ws[i])
+      c, h = torch.split(ch, self.nhid, dim=-1)
+      c = c.sigmoid()
+      fn = self._get_activation(name)
+      h = fn(h)
+      s = s_prev + c * (h-s_prev)
+      states += [s]
+    output = torch.mean(torch.stack([states[i] for i in self.genotype.concat], -1), -1)
+    return output
+
+
+class RNNModel(nn.Module):
+  """Container module with an encoder, a recurrent module, and a decoder."""
+  def __init__(self, ntoken, ninp, nhid, nhidlast, 
+                 dropout=0.5, dropouth=0.5, dropoutx=0.5, dropouti=0.5, dropoute=0.1,
+                 cell_cls=None, genotype=None):
+    super(RNNModel, self).__init__()
+    self.lockdrop = LockedDropout()
+    self.encoder = nn.Embedding(ntoken, ninp)
+        
+    assert ninp == nhid == nhidlast
+    if cell_cls == DARTSCell:
+      assert genotype is not None
+      rnns = [cell_cls(ninp, nhid, dropouth, dropoutx, genotype)]
+    else:
+      assert genotype is None
+      rnns = [cell_cls(ninp, nhid, dropouth, dropoutx)]
+
+    self.rnns    = torch.nn.ModuleList(rnns)
+    self.decoder = nn.Linear(ninp, ntoken)
+    self.decoder.weight = self.encoder.weight
+    self.init_weights()
+    self.arch_weights = None
+
+    self.ninp = ninp
+    self.nhid = nhid
+    self.nhidlast = nhidlast
+    self.dropout = dropout
+    self.dropouti = dropouti
+    self.dropoute = dropoute
+    self.ntoken = ntoken
+    self.cell_cls = cell_cls
+    # acceleration
+    self.tau = None
+    self.use_gumbel = False
+
+  def set_gumbel(self, use_gumbel, set_check):
+    self.use_gumbel = use_gumbel
+    for i, rnn in enumerate(self.rnns):
+      rnn.set_check(set_check)
+
+  def set_tau(self, tau):
+    self.tau = tau
+  
+  def get_tau(self):
+    return self.tau
+
+  def init_weights(self):
+    self.encoder.weight.data.uniform_(-INITRANGE, INITRANGE)
+    self.decoder.bias.data.fill_(0)
+    self.decoder.weight.data.uniform_(-INITRANGE, INITRANGE)
+
+  def forward(self, input, hidden, return_h=False):
+    batch_size = input.size(1)
+
+    emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
+    emb = self.lockdrop(emb, self.dropouti)
+
+    raw_output = emb
+    new_hidden = []
+    raw_outputs = []
+    outputs = []
+    if self.arch_weights is None:
+      arch_probs = None
+    else:
+      if self.use_gumbel: arch_probs = F.gumbel_softmax(self.arch_weights, self.tau, False)
+      else              : arch_probs = F.softmax(self.arch_weights, dim=-1)
+
+    for l, rnn in enumerate(self.rnns):
+      current_input = raw_output
+      raw_output, new_h = rnn(raw_output, hidden[l], arch_probs)
+      new_hidden.append(new_h)
+      raw_outputs.append(raw_output)
+    hidden = new_hidden
+
+    output = self.lockdrop(raw_output, self.dropout)
+    outputs.append(output)
+
+    logit = self.decoder(output.view(-1, self.ninp))
+    log_prob = nn.functional.log_softmax(logit, dim=-1)
+    model_output = log_prob
+    model_output = model_output.view(-1, batch_size, self.ntoken)
+
+    if return_h: return model_output, hidden, raw_outputs, outputs
+    else       : return model_output, hidden
+
+  def init_hidden(self, bsz):
+    weight = next(self.parameters()).clone()
+    return [weight.new(1, bsz, self.nhid).zero_()]
diff --git a/others/GDAS/lib/nas_rnn/genotypes.py b/others/GDAS/lib/nas_rnn/genotypes.py
new file mode 100644
index 0000000..d16ce3f
--- /dev/null
+++ b/others/GDAS/lib/nas_rnn/genotypes.py
@@ -0,0 +1,55 @@
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'recurrent concat')
+
+PRIMITIVES = [
+    'none',
+    'tanh',
+    'relu',
+    'sigmoid',
+    'identity'
+]
+STEPS = 8
+CONCAT = 8
+
+ENAS = Genotype(
+    recurrent = [
+        ('tanh', 0),
+        ('tanh', 1),
+        ('relu', 1),
+        ('tanh', 3),
+        ('tanh', 3),
+        ('relu', 3),
+        ('relu', 4),
+        ('relu', 7),
+        ('relu', 8),
+        ('relu', 8),
+        ('relu', 8),
+    ],
+    concat = [2, 5, 6, 9, 10, 11]
+)
+
+DARTS_V1 = Genotype(
+  recurrent = [
+    ('relu', 0),
+    ('relu', 1),
+    ('tanh', 2),
+    ('relu', 3), ('relu', 4), ('identity', 1), ('relu', 5), ('relu', 1)
+  ],
+  concat=range(1, 9)
+)
+
+DARTS_V2 = Genotype(
+  recurrent = [
+    ('sigmoid', 0), ('relu', 1), ('relu', 1),
+    ('identity', 1), ('tanh', 2), ('sigmoid', 5),
+    ('tanh', 3), ('relu', 5)
+  ],
+  concat=range(1, 9)
+)
+
+GDAS = Genotype(
+  recurrent=[('relu', 0), ('relu', 0), ('identity', 1), ('relu', 1), ('tanh', 0), ('relu', 2), ('identity', 4), ('identity', 2)],
+  concat=range(1, 9)
+)
+
diff --git a/others/GDAS/lib/nas_rnn/model_search.py b/others/GDAS/lib/nas_rnn/model_search.py
new file mode 100644
index 0000000..f7e2ba9
--- /dev/null
+++ b/others/GDAS/lib/nas_rnn/model_search.py
@@ -0,0 +1,104 @@
+import copy, torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import namedtuple
+from .genotypes import PRIMITIVES, STEPS, CONCAT, Genotype
+from .basemodel import DARTSCell, RNNModel
+
+
+class DARTSCellSearch(DARTSCell):
+
+  def __init__(self, ninp, nhid, dropouth, dropoutx):
+    super(DARTSCellSearch, self).__init__(ninp, nhid, dropouth, dropoutx, genotype=None)
+    self.bn = nn.BatchNorm1d(nhid, affine=False)
+    self.check_zero = False
+
+  def set_check(self, check_zero):
+    self.check_zero = check_zero
+
+  def cell(self, x, h_prev, x_mask, h_mask, arch_probs):
+    s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
+    s0 = self.bn(s0)
+    if self.check_zero:
+      arch_probs_cpu = arch_probs.cpu().tolist()
+    #arch_probs = F.softmax(self.weights, dim=-1)
+
+    offset = 0
+    states = s0.unsqueeze(0)
+    for i in range(STEPS):
+      if self.training:
+        masked_states = states * h_mask.unsqueeze(0)
+      else:
+        masked_states = states
+      ch = masked_states.view(-1, self.nhid).mm(self._Ws[i]).view(i+1, -1, 2*self.nhid)
+      c, h = torch.split(ch, self.nhid, dim=-1)
+      c = c.sigmoid()
+
+      s = torch.zeros_like(s0)
+      for k, name in enumerate(PRIMITIVES):
+        if name == 'none':
+          continue
+        fn = self._get_activation(name)
+        unweighted = states + c * (fn(h) - states)
+        if self.check_zero:
+          INDEX, INDDX = [], []
+          for jj in range(offset, offset+i+1):
+            if arch_probs_cpu[jj][k] > 0:
+              INDEX.append(jj)
+              INDDX.append(jj-offset)
+          if len(INDEX) == 0: continue
+          s += torch.sum(arch_probs[INDEX, k].unsqueeze(-1).unsqueeze(-1) * unweighted[INDDX, :, :], dim=0)
+        else:
+          s += torch.sum(arch_probs[offset:offset+i+1, k].unsqueeze(-1).unsqueeze(-1) * unweighted, dim=0)
+      s = self.bn(s)
+      states = torch.cat([states, s.unsqueeze(0)], 0)
+      offset += i+1
+    output = torch.mean(states[-CONCAT:], dim=0)
+    return output
+
+
+class RNNModelSearch(RNNModel):
+
+  def __init__(self, *args):
+    super(RNNModelSearch, self).__init__(*args)
+    self._args = copy.deepcopy( args )
+
+    k = sum(i for i in range(1, STEPS+1))
+    self.arch_weights = nn.Parameter(torch.Tensor(k, len(PRIMITIVES)))
+    nn.init.normal_(self.arch_weights, 0, 0.001)
+
+  def base_parameters(self):
+    lists  = list(self.lockdrop.parameters())
+    lists += list(self.encoder.parameters())
+    lists += list(self.rnns.parameters())
+    lists += list(self.decoder.parameters())
+    return lists
+
+  def arch_parameters(self):
+    return [self.arch_weights]
+
+  def genotype(self):
+
+    def _parse(probs):
+      gene = []
+      start = 0
+      for i in range(STEPS):
+        end = start + i + 1
+        W = probs[start:end].copy()
+        #j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[0]
+        j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) ))[0]
+        k_best = None
+        for k in range(len(W[j])):
+          #if k != PRIMITIVES.index('none'):
+          #  if k_best is None or W[j][k] > W[j][k_best]:
+          #    k_best = k
+          if k_best is None or W[j][k] > W[j][k_best]:
+            k_best = k
+        gene.append((PRIMITIVES[k_best], j))
+        start = end
+      return gene
+
+    with torch.no_grad():
+      gene = _parse(F.softmax(self.arch_weights, dim=-1).cpu().numpy())
+    genotype = Genotype(recurrent=gene, concat=list(range(STEPS+1)[-CONCAT:]))
+    return genotype
diff --git a/others/GDAS/lib/nas_rnn/utils.py b/others/GDAS/lib/nas_rnn/utils.py
new file mode 100644
index 0000000..aa23e87
--- /dev/null
+++ b/others/GDAS/lib/nas_rnn/utils.py
@@ -0,0 +1,66 @@
+import torch
+import torch.nn as nn
+import os, shutil
+import numpy as np
+
+
+def repackage_hidden(h):
+  if isinstance(h, torch.Tensor):
+    return h.detach()
+  else:
+    return tuple(repackage_hidden(v) for v in h)
+
+
+def batchify(data, bsz, use_cuda):
+  nbatch = data.size(0) // bsz
+  data = data.narrow(0, 0, nbatch * bsz)
+  data = data.view(bsz, -1).t().contiguous()
+  if use_cuda: return data.cuda()
+  else     : return data
+
+
+def get_batch(source, i, seq_len):
+  seq_len = min(seq_len, len(source) - 1 - i)
+  data    = source[i:i+seq_len].clone()
+  target  = source[i+1:i+1+seq_len].clone()
+  return data, target
+
+
+
+def embedded_dropout(embed, words, dropout=0.1, scale=None):
+  if dropout:
+    mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
+    mask.requires_grad_(True)
+    masked_embed_weight = mask * embed.weight
+  else:
+    masked_embed_weight = embed.weight
+  if scale:
+    masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
+
+  padding_idx = embed.padding_idx
+  if padding_idx is None:
+    padding_idx = -1
+  X = torch.nn.functional.embedding(
+        words, masked_embed_weight,
+        padding_idx, embed.max_norm, embed.norm_type,
+        embed.scale_grad_by_freq, embed.sparse)
+  return X
+
+
+class LockedDropout(nn.Module):
+  def __init__(self):
+    super(LockedDropout, self).__init__()
+
+  def forward(self, x, dropout=0.5):
+    if not self.training or not dropout:
+      return x
+    m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
+    mask = m.div_(1 - dropout).detach()
+    mask = mask.expand_as(x)
+    return mask * x
+
+
+def mask2d(B, D, keep_prob, cuda=True):
+  m = torch.floor(torch.rand(B, D) + keep_prob) / keep_prob
+  if cuda: return m.cuda()
+  else   : return m
diff --git a/others/GDAS/lib/scheduler/__init__.py b/others/GDAS/lib/scheduler/__init__.py
new file mode 100644
index 0000000..e5eff26
--- /dev/null
+++ b/others/GDAS/lib/scheduler/__init__.py
@@ -0,0 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .utils import load_config
+from .scheduler import MultiStepLR, obtain_scheduler
diff --git a/others/GDAS/lib/scheduler/scheduler.py b/others/GDAS/lib/scheduler/scheduler.py
new file mode 100644
index 0000000..b0d7ac1
--- /dev/null
+++ b/others/GDAS/lib/scheduler/scheduler.py
@@ -0,0 +1,32 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import torch
+from bisect import bisect_right
+
+
+class MultiStepLR(torch.optim.lr_scheduler._LRScheduler):
+
+  def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
+    if not list(milestones) == sorted(milestones):
+      raise ValueError('Milestones should be a list of'
+                       ' increasing integers. Got {:}', milestones)
+    assert len(milestones) == len(gammas), '{:} vs {:}'.format(milestones, gammas)
+    self.milestones = milestones
+    self.gammas = gammas
+    super(MultiStepLR, self).__init__(optimizer, last_epoch)
+
+  def get_lr(self):
+    LR = 1
+    for x in self.gammas[:bisect_right(self.milestones, self.last_epoch)]: LR = LR * x
+    return [base_lr * LR for base_lr in self.base_lrs]
+
+
+def obtain_scheduler(config, optimizer):
+  if config.type == 'multistep':
+    scheduler = MultiStepLR(optimizer, milestones=config.milestones, gammas=config.gammas)
+  elif config.type == 'cosine':
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)
+  else:
+    raise ValueError('Unknown learning rate scheduler type : {:}'.format(config.type))
+  return scheduler
diff --git a/others/GDAS/lib/scheduler/utils.py b/others/GDAS/lib/scheduler/utils.py
new file mode 100644
index 0000000..2a278fc
--- /dev/null
+++ b/others/GDAS/lib/scheduler/utils.py
@@ -0,0 +1,42 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, json
+from pathlib import Path
+from collections import namedtuple
+
+support_types = ('str', 'int', 'bool', 'float')
+
+def convert_param(original_lists):
+  assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
+  ctype, value = original_lists[0], original_lists[1]
+  assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
+  is_list = isinstance(value, list)
+  if not is_list: value = [value]
+  outs = []
+  for x in value:
+    if ctype == 'int':
+      x = int(x)
+    elif ctype == 'str':
+      x = str(x)
+    elif ctype == 'bool':
+      x = bool(int(x))
+    elif ctype == 'float':
+      x = float(x)
+    else:
+      raise TypeError('Does not know this type : {:}'.format(ctype))
+    outs.append(x)
+  if not is_list: outs = outs[0]
+  return outs
+
+def load_config(path):
+  path = str(path)
+  assert os.path.exists(path), 'Can not find {:}'.format(path)
+  # Reading data back
+  with open(path, 'r') as f:
+    data = json.load(f)
+  f.close()
+  content = { k: convert_param(v) for k,v in data.items()}
+  Arguments = namedtuple('Configure', ' '.join(content.keys()))
+  content = Arguments(**content)
+  return content
diff --git a/others/GDAS/lib/utils/__init__.py b/others/GDAS/lib/utils/__init__.py
new file mode 100644
index 0000000..ce07a87
--- /dev/null
+++ b/others/GDAS/lib/utils/__init__.py
@@ -0,0 +1,16 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .utils import AverageMeter, RecorderMeter, convert_secs2time
+from .utils import time_file_str, time_string
+from .utils import test_imagenet_data
+from .utils import print_log
+from .evaluation_utils import obtain_accuracy
+#from .draw_pts import draw_points
+from .gpu_manager import GPUManager
+
+from .save_meta import Save_Meta
+
+from .model_utils import count_parameters_in_MB
+from .model_utils import Cutout
+from .flop_benchmark import print_FLOPs
diff --git a/others/GDAS/lib/utils/draw_pts.py b/others/GDAS/lib/utils/draw_pts.py
new file mode 100644
index 0000000..a0f24ad
--- /dev/null
+++ b/others/GDAS/lib/utils/draw_pts.py
@@ -0,0 +1,41 @@
+import os, sys, time
+import numpy as np
+import matplotlib
+import random
+matplotlib.use('agg')
+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+
+def draw_points(points, labels, save_path):
+  title = 'the visualized features'
+  dpi = 100 
+  width, height = 1000, 1000
+  legend_fontsize = 10
+  figsize = width / float(dpi), height / float(dpi)
+  fig = plt.figure(figsize=figsize)
+
+  classes = np.unique(labels).tolist()
+  colors = cm.rainbow(np.linspace(0, 1, len(classes)))
+
+  legends = []
+  legendnames = []
+
+  for cls, c in zip(classes, colors):
+    
+    indexes = labels == cls
+    ptss = points[indexes, :]
+    x = ptss[:,0]
+    y = ptss[:,1]
+    if cls % 2 == 0: marker = 'x'
+    else:            marker = 'o'
+    legend = plt.scatter(x, y, color=c, s=1, marker=marker)
+    legendname = '{:02d}'.format(cls+1)
+    legends.append( legend )
+    legendnames.append( legendname )
+
+  plt.legend(legends, legendnames, scatterpoints=1, ncol=5, fontsize=8)
+
+  if save_path is not None:
+    fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
+    print ('---- save figure {} into {}'.format(title, save_path))
+  plt.close(fig)
diff --git a/others/GDAS/lib/utils/evaluation_utils.py b/others/GDAS/lib/utils/evaluation_utils.py
new file mode 100644
index 0000000..cf853c8
--- /dev/null
+++ b/others/GDAS/lib/utils/evaluation_utils.py
@@ -0,0 +1,16 @@
+import torch
+
+def obtain_accuracy(output, target, topk=(1,)):
+  """Computes the precision@k for the specified values of k"""
+  maxk = max(topk)
+  batch_size = target.size(0)
+
+  _, pred = output.topk(maxk, 1, True, True)
+  pred = pred.t()
+  correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+  res = []
+  for k in topk:
+    correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+    res.append(correct_k.mul_(100.0 / batch_size))
+  return res
diff --git a/others/GDAS/lib/utils/flop_benchmark.py b/others/GDAS/lib/utils/flop_benchmark.py
new file mode 100644
index 0000000..fab0506
--- /dev/null
+++ b/others/GDAS/lib/utils/flop_benchmark.py
@@ -0,0 +1,116 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+# modified from https://github.com/warmspringwinds/pytorch-segmentation-detection/blob/master/pytorch_segmentation_detection/utils/flops_benchmark.py
+import copy, torch
+
+def print_FLOPs(model, shape, logs):
+  print_log, log = logs
+  model = copy.deepcopy( model )
+
+  model = add_flops_counting_methods(model)
+  model = model.cuda()
+  model.eval()
+
+  cache_inputs = torch.zeros(*shape).cuda()
+  #print_log('In the calculating function : cache input size : {:}'.format(cache_inputs.size()), log)
+  _ = model(cache_inputs)
+  FLOPs = compute_average_flops_cost( model ) / 1e6
+  print_log('FLOPs : {:} MB'.format(FLOPs), log)
+  torch.cuda.empty_cache()
+
+
+# ---- Public functions
+def add_flops_counting_methods( model ):
+  model.__batch_counter__ = 0
+  add_batch_counter_hook_function( model )
+  model.apply( add_flops_counter_variable_or_reset )
+  model.apply( add_flops_counter_hook_function )
+  return model
+
+
+
+def compute_average_flops_cost(model):
+  """
+  A method that will be available after add_flops_counting_methods() is called on a desired net object.
+  Returns current mean flops consumption per image.
+  """
+  batches_count = model.__batch_counter__
+  flops_sum = 0
+  for module in model.modules():
+    if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
+      flops_sum += module.__flops__
+  return flops_sum / batches_count
+
+
+# ---- Internal functions
+def pool_flops_counter_hook(pool_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  kernel_size = pool_module.kernel_size
+  out_C, output_height, output_width = output.shape[1:]
+  assert out_C == inputs[0].size(1), '{:} vs. {:}'.format(out_C, inputs[0].size())
+
+  overall_flops = batch_size * out_C * output_height * output_width * kernel_size * kernel_size
+  pool_module.__flops__ += overall_flops
+
+
+def fc_flops_counter_hook(fc_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  xin, xout = fc_module.in_features, fc_module.out_features
+  assert xin == inputs[0].size(1) and xout == output.size(1), 'IO=({:}, {:})'.format(xin, xout)
+  overall_flops = batch_size * xin * xout
+  if fc_module.bias is not None:
+    overall_flops += batch_size * xout
+  fc_module.__flops__ += overall_flops
+
+
+def conv_flops_counter_hook(conv_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  output_height, output_width = output.shape[2:]
+  
+  kernel_height, kernel_width = conv_module.kernel_size
+  in_channels  = conv_module.in_channels
+  out_channels = conv_module.out_channels
+  groups       = conv_module.groups
+  conv_per_position_flops = kernel_height * kernel_width * in_channels * out_channels / groups
+  
+  active_elements_count = batch_size * output_height * output_width
+  overall_flops = conv_per_position_flops * active_elements_count
+    
+  if conv_module.bias is not None:
+    overall_flops += out_channels * active_elements_count
+  conv_module.__flops__ += overall_flops
+
+  
+def batch_counter_hook(module, inputs, output):
+  # Can have multiple inputs, getting the first one
+  inputs = inputs[0]
+  batch_size = inputs.shape[0]
+  module.__batch_counter__ += batch_size
+
+
+def add_batch_counter_hook_function(module):
+  if not hasattr(module, '__batch_counter_handle__'):
+    handle = module.register_forward_hook(batch_counter_hook)
+    module.__batch_counter_handle__ = handle
+
+  
+def add_flops_counter_variable_or_reset(module):
+  if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
+    or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
+    module.__flops__ = 0
+
+
+def add_flops_counter_hook_function(module):
+  if isinstance(module, torch.nn.Conv2d):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(conv_flops_counter_hook)
+      module.__flops_handle__ = handle
+  elif isinstance(module, torch.nn.Linear):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(fc_flops_counter_hook)
+      module.__flops_handle__ = handle
+  elif isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(pool_flops_counter_hook)
+      module.__flops_handle__ = handle
diff --git a/others/GDAS/lib/utils/gpu_manager.py b/others/GDAS/lib/utils/gpu_manager.py
new file mode 100644
index 0000000..8b039de
--- /dev/null
+++ b/others/GDAS/lib/utils/gpu_manager.py
@@ -0,0 +1,70 @@
+import os
+
+class GPUManager():
+  queries = ('index', 'gpu_name', 'memory.free', 'memory.used', 'memory.total', 'power.draw', 'power.limit')
+
+  def __init__(self):
+    all_gpus = self.query_gpu(False)
+
+  def get_info(self, ctype):
+    cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(ctype)
+    lines = os.popen(cmd).readlines()
+    lines = [line.strip('\n') for line in lines]
+    return lines
+
+  def query_gpu(self, show=True):
+    num_gpus = len( self.get_info('index') )
+    all_gpus = [ {} for i in range(num_gpus) ]
+    for query in self.queries:
+      infos = self.get_info(query)
+      for idx, info in enumerate(infos):
+        all_gpus[idx][query] = info
+
+    if 'CUDA_VISIBLE_DEVICES' in os.environ:
+      CUDA_VISIBLE_DEVICES = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
+      selected_gpus = []
+      for idx, CUDA_VISIBLE_DEVICE in enumerate(CUDA_VISIBLE_DEVICES):
+        find = False
+        for gpu in all_gpus:
+          if gpu['index'] == CUDA_VISIBLE_DEVICE:
+            assert find==False, 'Duplicate cuda device index : {}'.format(CUDA_VISIBLE_DEVICE)
+            find = True
+            selected_gpus.append( gpu.copy() )
+            selected_gpus[-1]['index'] = '{}'.format(idx)
+        assert find, 'Does not find the device : {}'.format(CUDA_VISIBLE_DEVICE)
+      all_gpus = selected_gpus
+    
+    if show:
+      allstrings = ''
+      for gpu in all_gpus:
+        string = '| '
+        for query in self.queries:
+          if query.find('memory') == 0: xinfo = '{:>9}'.format(gpu[query])
+          else:                         xinfo = gpu[query]
+          string = string + query + ' : ' + xinfo + ' | '
+        allstrings = allstrings + string + '\n'
+      return allstrings
+    else:
+      return all_gpus
+
+  def select_by_memory(self, numbers=1):
+    all_gpus = self.query_gpu(False)
+    assert numbers <= len(all_gpus), 'Require {} gpus more than you have'.format(numbers)
+    alls = []
+    for idx, gpu in enumerate(all_gpus):
+      free_memory = gpu['memory.free']
+      free_memory = free_memory.split(' ')[0]
+      free_memory = int(free_memory)
+      index = gpu['index']
+      alls.append((free_memory, index))
+    alls.sort(reverse = True)
+    alls = [ int(alls[i][1]) for i in range(numbers) ]
+    return sorted(alls)
+
+"""
+if __name__ == '__main__':
+  manager = GPUManager()
+  manager.query_gpu(True)
+  indexes = manager.select_by_memory(3)
+  print (indexes)
+"""
diff --git a/others/GDAS/lib/utils/model_utils.py b/others/GDAS/lib/utils/model_utils.py
new file mode 100644
index 0000000..5e97bc9
--- /dev/null
+++ b/others/GDAS/lib/utils/model_utils.py
@@ -0,0 +1,35 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+def count_parameters_in_MB(model):
+  if isinstance(model, nn.Module):
+    return np.sum(np.prod(v.size()) for v in model.parameters())/1e6
+  else:
+    return np.sum(np.prod(v.size()) for v in model)/1e6
+
+
+class Cutout(object):
+  def __init__(self, length):
+    self.length = length
+
+  def __repr__(self):
+    return ('{name}(length={length})'.format(name=self.__class__.__name__, **self.__dict__))
+
+  def __call__(self, img):
+    h, w = img.size(1), img.size(2)
+    mask = np.ones((h, w), np.float32)
+    y = np.random.randint(h)
+    x = np.random.randint(w)
+
+    y1 = np.clip(y - self.length // 2, 0, h)
+    y2 = np.clip(y + self.length // 2, 0, h)
+    x1 = np.clip(x - self.length // 2, 0, w)
+    x2 = np.clip(x + self.length // 2, 0, w)
+
+    mask[y1: y2, x1: x2] = 0.
+    mask = torch.from_numpy(mask)
+    mask = mask.expand_as(img)
+    img *= mask
+    return img
diff --git a/others/GDAS/lib/utils/save_meta.py b/others/GDAS/lib/utils/save_meta.py
new file mode 100644
index 0000000..d0cf3dc
--- /dev/null
+++ b/others/GDAS/lib/utils/save_meta.py
@@ -0,0 +1,53 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import torch
+import os, sys
+import os.path as osp
+import numpy as np
+
+def tensor2np(x):
+  if isinstance(x, np.ndarray): return x
+  if x.is_cuda: x = x.cpu()
+  return x.numpy()
+
+class Save_Meta():
+
+  def __init__(self):
+    self.reset()
+
+  def __repr__(self):
+    return ('{name}'.format(name=self.__class__.__name__)+'(number of data = {})'.format(len(self)))
+
+  def reset(self):
+    self.predictions = []
+    self.groundtruth = []
+    
+  def __len__(self):
+    return len(self.predictions)
+
+  def append(self, _pred, _ground):
+    _pred, _ground = tensor2np(_pred), tensor2np(_ground)
+    assert _ground.shape[0] == _pred.shape[0] and len(_pred.shape) == 2 and len(_ground.shape) == 1, 'The shapes are wrong : {} & {}'.format(_pred.shape, _ground.shape)
+    self.predictions.append(_pred)
+    self.groundtruth.append(_ground)
+
+  def save(self, save_dir, filename, test=True):
+    meta = {'predictions': self.predictions, 
+            'groundtruth': self.groundtruth}
+    filename = osp.join(save_dir, filename)
+    torch.save(meta, filename)
+    if test:
+      predictions = np.concatenate(self.predictions)
+      groundtruth = np.concatenate(self.groundtruth)
+      predictions = np.argmax(predictions, axis=1)
+      accuracy = np.sum(groundtruth==predictions) * 100.0 / predictions.size
+    else:
+      accuracy = None
+    print ('save save_meta into {} with accuracy = {}'.format(filename, accuracy))
+
+  def load(self, filename):
+    assert os.path.isfile(filename), '{} is not a file'.format(filename)
+    checkpoint       = torch.load(filename)
+    self.predictions = checkpoint['predictions']
+    self.groundtruth = checkpoint['groundtruth']
diff --git a/others/GDAS/lib/utils/utils.py b/others/GDAS/lib/utils/utils.py
new file mode 100644
index 0000000..27abf46
--- /dev/null
+++ b/others/GDAS/lib/utils/utils.py
@@ -0,0 +1,140 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, time
+import numpy as np
+import random
+
+class AverageMeter(object):
+  """Computes and stores the average and current value"""
+  def __init__(self):
+    self.reset()
+
+  def reset(self):
+    self.val = 0
+    self.avg = 0
+    self.sum = 0
+    self.count = 0
+
+  def update(self, val, n=1):
+    self.val = val
+    self.sum += val * n
+    self.count += n
+    self.avg = self.sum / self.count
+
+
+class RecorderMeter(object):
+  """Computes and stores the minimum loss value and its epoch index"""
+  def __init__(self, total_epoch):
+    self.reset(total_epoch)
+
+  def reset(self, total_epoch):
+    assert total_epoch > 0
+    self.total_epoch   = total_epoch
+    self.current_epoch = 0
+    self.epoch_losses  = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
+    self.epoch_losses  = self.epoch_losses - 1
+
+    self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
+    self.epoch_accuracy= self.epoch_accuracy
+
+  def update(self, idx, train_loss, train_acc, val_loss, val_acc):
+    assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
+    self.epoch_losses  [idx, 0] = train_loss
+    self.epoch_losses  [idx, 1] = val_loss
+    self.epoch_accuracy[idx, 0] = train_acc
+    self.epoch_accuracy[idx, 1] = val_acc
+    self.current_epoch = idx + 1
+    return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]
+
+  def max_accuracy(self, istrain):
+    if self.current_epoch <= 0: return 0
+    if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
+    else:       return self.epoch_accuracy[:self.current_epoch, 1].max()
+
+  def plot_curve(self, save_path):
+    import matplotlib
+    matplotlib.use('agg')
+    import matplotlib.pyplot as plt
+    title = 'the accuracy/loss curve of train/val'
+    dpi = 100 
+    width, height = 1600, 1000
+    legend_fontsize = 10
+    figsize = width / float(dpi), height / float(dpi)
+
+    fig = plt.figure(figsize=figsize)
+    x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
+    y_axis = np.zeros(self.total_epoch)
+
+    plt.xlim(0, self.total_epoch)
+    plt.ylim(0, 100)
+    interval_y = 5
+    interval_x = 5
+    plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
+    plt.yticks(np.arange(0, 100 + interval_y, interval_y))
+    plt.grid()
+    plt.title(title, fontsize=20)
+    plt.xlabel('the training epoch', fontsize=16)
+    plt.ylabel('accuracy', fontsize=16)
+  
+    y_axis[:] = self.epoch_accuracy[:, 0]
+    plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    y_axis[:] = self.epoch_accuracy[:, 1]
+    plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    
+    y_axis[:] = self.epoch_losses[:, 0]
+    plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    y_axis[:] = self.epoch_losses[:, 1]
+    plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    if save_path is not None:
+      fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
+      print ('---- save figure {} into {}'.format(title, save_path))
+    plt.close(fig)
+    
+def print_log(print_string, log):
+  print ("{:}".format(print_string))
+  if log is not None:
+    log.write('{}\n'.format(print_string))
+    log.flush()
+
+def time_file_str():
+  ISOTIMEFORMAT='%Y-%m-%d'
+  string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string + '-{}'.format(random.randint(1, 10000))
+
+def time_string():
+  ISOTIMEFORMAT='%Y-%m-%d-%X'
+  string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def convert_secs2time(epoch_time, return_str=False):
+  need_hour = int(epoch_time / 3600)
+  need_mins = int((epoch_time - 3600*need_hour) / 60)
+  need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
+  if return_str == False:
+    return need_hour, need_mins, need_secs
+  else:
+    return '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
+
+def test_imagenet_data(imagenet):
+  total_length = len(imagenet)
+  assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
+  map_id = {}
+  for index in range(total_length):
+    path, target = imagenet.imgs[index]
+    folder, image_name = os.path.split(path)
+    _, folder = os.path.split(folder)
+    if folder not in map_id:
+      map_id[folder] = target
+    else:
+      assert map_id[folder] == target, 'Class : {} is not {}'.format(folder, target)
+    assert image_name.find(folder) == 0, '{} is wrong.'.format(path)
+  print ('Check ImageNet Dataset OK')
diff --git a/others/GDAS/paddlepaddle/.gitignore b/others/GDAS/paddlepaddle/.gitignore
new file mode 100644
index 0000000..ed615b6
--- /dev/null
+++ b/others/GDAS/paddlepaddle/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.whl
+snapshots
diff --git a/others/GDAS/paddlepaddle/README.md b/others/GDAS/paddlepaddle/README.md
new file mode 100644
index 0000000..cc2f73e
--- /dev/null
+++ b/others/GDAS/paddlepaddle/README.md
@@ -0,0 +1,119 @@
+# Image Classification based on NAS-Searched Models
+
+This directory contains 10 image classification models.
+Nine of them are automatically searched models using different Neural Architecture Search (NAS) algorithms, and the other is the residual network.
+We provide codes and scripts to train these models on both CIFAR-10 and CIFAR-100.
+We use the standard data augmentation, i.e., random crop, random flip, and normalization.
+
+---
+## Table of Contents
+- [Installation](#installation)
+- [Data Preparation](#data-preparation)
+- [Training Models](#training-models)
+- [Project Structure](#project-structure)
+- [Citation](#citation)
+
+
+### Installation
+This project has the following requirements:
+- Python = 3.6
+- PadddlePaddle Fluid >= v0.15.0
+- numpy, tarfile, cPickle, PIL
+
+
+### Data Preparation
+Please download [CIFAR-10](https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz) and [CIFAR-100](https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz) before running the codes.
+Note that the MD5 of CIFAR-10-Python compressed file is `c58f30108f718f92721af3b95e74349a` and the MD5 of CIFAR-100-Python compressed file is `eb9058c3a382ffc7106e4002c42a8d85`.
+Please save the file into `${TORCH_HOME}/cifar.python`.
+After data preparation, there should be two files `${TORCH_HOME}/cifar.python/cifar-10-python.tar.gz` and `${TORCH_HOME}/cifar.python/cifar-100-python.tar.gz`.
+
+
+### Training Models
+
+After setting up the environment and preparing the data, you can train the model. The main function entrance is `train_cifar.py`. We also provide some scripts for easy usage.
+```
+bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+bash ./scripts/train-nas.sh  0 cifar-10 GDAS_V1
+bash ./scripts/train-nas.sh  0 cifar-10 GDAS_V2
+bash ./scripts/train-nas.sh  0 cifar-10  SETN
+bash ./scripts/train-nas.sh  0 cifar-10 NASNet
+bash ./scripts/train-nas.sh  0 cifar-10 ENASNet
+bash ./scripts/train-nas.sh  0 cifar-10 AmoebaNet
+bash ./scripts/train-nas.sh  0 cifar-10 PNASNet
+bash ./scripts/train-nas.sh  0 cifar-100 SETN
+```
+The first argument is the GPU-ID to train your program, the second argument is the dataset name (`cifar-10` or `cifar-100`), and the last one is the model name.
+Please use `./scripts/base-train.sh` for ResNet and use `./scripts/train-nas.sh` for NAS-searched models.
+
+
+### Project Structure
+```
+.
+├──train_cifar.py [Training CNN models]
+├──lib [Library for dataset, models, and others]
+│  └──models  
+│     ├──__init__.py [Import useful Classes and Functions in models]  
+│     ├──resnet.py [Define the ResNet models]
+│     ├──operations.py [Define the atomic operation in NAS search space]
+│     ├──genotypes.py [Define the topological structure of different NAS-searched models]
+│     └──nas_net.py [Define the macro structure of NAS models]
+│  └──utils
+│     ├──__init__.py [Import useful Classes and Functions in utils]  
+│     ├──meter.py [Define the AverageMeter class to count the accuracy and loss]
+│     ├──time_utils.py [Define some functions to print date or convert seconds into hours]
+│     └──data_utils.py [Define data augmentation functions and dataset reader for CIFAR]
+└──scripts [Scripts for running]  
+```
+
+
+### Citation
+If you find that this project helps your research, please consider citing these papers:
+```
+@inproceedings{dong2019one,
+  title     = {One-Shot Neural Architecture Search via Self-Evaluated Template Network},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
+  year      = {2019}
+}
+@inproceedings{dong2019search,
+  title     = {Searching for A Robust Neural Architecture in Four GPU Hours},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages     = {1761--1770},
+  year      = {2019}
+}
+@inproceedings{liu2018darts,
+  title     = {Darts: Differentiable architecture search},
+  author    = {Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
+  booktitle = {International Conference on Learning Representations (ICLR)},
+  year      = {2018}
+}
+@inproceedings{pham2018efficient,
+  title     = {Efficient Neural Architecture Search via Parameter Sharing},
+  author    = {Pham, Hieu and Guan, Melody and Zoph, Barret and Le, Quoc and Dean, Jeff},
+  booktitle = {International Conference on Machine Learning (ICML)},
+  pages     = {4092--4101},
+  year      = {2018}
+}
+@inproceedings{liu2018progressive,
+  title     = {Progressive neural architecture search},
+  author    = {Liu, Chenxi and Zoph, Barret and Neumann, Maxim and Shlens, Jonathon and Hua, Wei and Li, Li-Jia and Fei-Fei, Li and Yuille, Alan and Huang, Jonathan and Murphy, Kevin},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  pages     = {19--34},
+  year      = {2018}
+}
+@inproceedings{zoph2018learning,
+  title     = {Learning transferable architectures for scalable image recognition},
+  author    = {Zoph, Barret and Vasudevan, Vijay and Shlens, Jonathon and Le, Quoc V},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages     = {8697--8710},
+  year      = {2018}
+}
+@inproceedings{real2019regularized,
+  title     = {Regularized evolution for image classifier architecture search},
+  author    = {Real, Esteban and Aggarwal, Alok and Huang, Yanping and Le, Quoc V},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
+  pages     = {4780--4789},
+  year      = {2019}
+}
+```
diff --git a/others/GDAS/paddlepaddle/lib/models/__init__.py b/others/GDAS/paddlepaddle/lib/models/__init__.py
new file mode 100644
index 0000000..0bebe0b
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/models/__init__.py
@@ -0,0 +1,3 @@
+from .genotypes import Networks
+from .nas_net   import NASCifarNet
+from .resnet    import resnet_cifar
diff --git a/others/GDAS/paddlepaddle/lib/models/genotypes.py b/others/GDAS/paddlepaddle/lib/models/genotypes.py
new file mode 100644
index 0000000..08f145f
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/models/genotypes.py
@@ -0,0 +1,175 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+
+# Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018
+NASNet = Genotype(
+  normal = [
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 0)),
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 0)),
+    (('avg_pool_3x3', 1), ('skip_connect', 0)),
+    (('avg_pool_3x3', 0), ('avg_pool_3x3', 0)),
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 1), ('sep_conv_7x7', 0)),
+    (('max_pool_3x3', 1), ('sep_conv_7x7', 0)),
+    (('avg_pool_3x3', 1), ('sep_conv_5x5', 0)),
+    (('skip_connect', 3), ('avg_pool_3x3', 2)),
+    (('sep_conv_3x3', 2), ('max_pool_3x3', 1)),
+  ],
+  reduce_concat = [4, 5, 6],
+)
+
+
+# Progressive Neural Architecture Search, ECCV 2018
+PNASNet = Genotype(
+  normal = [
+    (('sep_conv_5x5', 0), ('max_pool_3x3', 0)),
+    (('sep_conv_7x7', 1), ('max_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 4), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('skip_connect', 1)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 0), ('max_pool_3x3', 0)),
+    (('sep_conv_7x7', 1), ('max_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 4), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('skip_connect', 1)),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+
+# Regularized Evolution for Image Classifier Architecture Search, AAAI 2019
+AmoebaNet = Genotype(
+  normal = [
+    (('avg_pool_3x3', 0), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('sep_conv_5x5', 2)),
+    (('sep_conv_3x3', 0), ('avg_pool_3x3', 3)),
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+    (('skip_connect', 0), ('avg_pool_3x3', 1)),
+    ],
+  normal_concat = [4, 5, 6],
+  reduce = [
+    (('avg_pool_3x3', 0), ('sep_conv_3x3', 1)),
+    (('max_pool_3x3', 0), ('sep_conv_7x7', 2)),
+    (('sep_conv_7x7', 0), ('avg_pool_3x3', 1)),
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)),
+    (('conv_7x1_1x7', 0), ('sep_conv_3x3', 5)),
+  ],
+  reduce_concat = [3, 4, 6]
+)
+
+
+# Efficient Neural Architecture Search via Parameter Sharing, ICML 2018
+ENASNet = Genotype(
+  normal = [
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+    (('sep_conv_5x5', 1), ('skip_connect', 0)),
+    (('avg_pool_3x3', 0), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 0), ('avg_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('avg_pool_3x3', 0)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)), # 2
+    (('sep_conv_3x3', 1), ('avg_pool_3x3', 1)), # 3
+    (('sep_conv_3x3', 1), ('avg_pool_3x3', 1)), # 4
+    (('avg_pool_3x3', 1), ('sep_conv_5x5', 4)), # 5
+    (('sep_conv_3x3', 5), ('sep_conv_5x5', 0)),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+
+# DARTS: Differentiable Architecture Search, ICLR 2019
+DARTS_V1 = Genotype(
+  normal=[
+    (('sep_conv_3x3', 1), ('sep_conv_3x3', 0)), # step 1
+    (('skip_connect', 0), ('sep_conv_3x3', 1)), # step 2
+    (('skip_connect', 0), ('sep_conv_3x3', 1)), # step 3
+    (('sep_conv_3x3', 0), ('skip_connect', 2))  # step 4
+  ],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)), # step 1
+    (('skip_connect', 2), ('max_pool_3x3', 0)), # step 2
+    (('max_pool_3x3', 0), ('skip_connect', 2)), # step 3
+    (('skip_connect', 2), ('avg_pool_3x3', 0))  # step 4
+  ],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+# DARTS: Differentiable Architecture Search, ICLR 2019
+DARTS_V2 = Genotype(
+  normal=[
+    (('sep_conv_3x3', 0), ('sep_conv_3x3', 1)), # step 1
+    (('sep_conv_3x3', 0), ('sep_conv_3x3', 1)), # step 2
+    (('sep_conv_3x3', 1), ('skip_connect', 0)), # step 3
+    (('skip_connect', 0), ('dil_conv_3x3', 2))  # step 4
+  ],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)), # step 1
+    (('skip_connect', 2), ('max_pool_3x3', 1)), # step 2
+    (('max_pool_3x3', 0), ('skip_connect', 2)), # step 3
+    (('skip_connect', 2), ('max_pool_3x3', 1))  # step 4
+  ],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+
+# One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019
+SETN = Genotype(
+  normal=[
+    (('skip_connect', 0), ('sep_conv_5x5', 1)),
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_5x5', 3)),
+    (('max_pool_3x3', 1), ('conv_3x1_1x3', 4))],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('sep_conv_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('skip_connect', 1))],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+# Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019
+GDAS_V1 = Genotype(
+  normal=[
+    (('skip_connect', 0), ('skip_connect', 1)),
+    (('skip_connect', 0), ('sep_conv_5x5', 2)),
+    (('sep_conv_3x3', 3), ('skip_connect', 0)),
+    (('sep_conv_5x5', 4), ('sep_conv_3x3', 3))],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)), 
+    (('sep_conv_5x5', 2), ('sep_conv_5x5', 1)),
+    (('dil_conv_5x5', 2), ('sep_conv_3x3', 1)),
+    (('sep_conv_5x5', 0), ('sep_conv_5x5', 1))],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+Networks = {'DARTS_V1' : DARTS_V1,
+            'DARTS_V2' : DARTS_V2,
+            'DARTS'    : DARTS_V2,
+            'NASNet'   : NASNet,
+            'ENASNet'  : ENASNet,
+            'AmoebaNet': AmoebaNet,
+            'GDAS_V1'  : GDAS_V1,
+            'PNASNet'  : PNASNet,
+            'SETN'     : SETN,
+           }
diff --git a/others/GDAS/paddlepaddle/lib/models/nas_net.py b/others/GDAS/paddlepaddle/lib/models/nas_net.py
new file mode 100644
index 0000000..10815c7
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/models/nas_net.py
@@ -0,0 +1,79 @@
+import paddle
+import paddle.fluid as fluid
+from .operations import OPS
+
+
+def AuxiliaryHeadCIFAR(inputs, C, class_num):
+  print ('AuxiliaryHeadCIFAR : inputs-shape : {:}'.format(inputs.shape))
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.pool2d(temp, pool_size=5, pool_stride=3, pool_padding=0, pool_type='avg')
+  temp = fluid.layers.conv2d(temp, filter_size=1, num_filters=128, stride=1, padding=0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  temp = fluid.layers.conv2d(temp, filter_size=1, num_filters=768, stride=2, padding=0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  print ('AuxiliaryHeadCIFAR : last---shape : {:}'.format(temp.shape))
+  predict = fluid.layers.fc(input=temp, size=class_num, act='softmax')
+  return predict
+
+
+def InferCell(name, inputs_prev_prev, inputs_prev, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+  print ('[{:}] C_prev_prev={:} C_prev={:}, C={:}, reduction_prev={:}, reduction={:}'.format(name, C_prev_prev, C_prev, C, reduction_prev, reduction))
+  print ('inputs_prev_prev : {:}'.format(inputs_prev_prev.shape))
+  print ('inputs_prev      : {:}'.format(inputs_prev.shape))
+  inputs_prev_prev = OPS['skip_connect'](inputs_prev_prev, C_prev_prev, C, 2 if reduction_prev else 1)
+  inputs_prev      = OPS['skip_connect'](inputs_prev, C_prev, C, 1)
+  print ('inputs_prev_prev : {:}'.format(inputs_prev_prev.shape))
+  print ('inputs_prev      : {:}'.format(inputs_prev.shape))
+  if reduction: step_ops, concat = genotype.reduce, genotype.reduce_concat
+  else        : step_ops, concat = genotype.normal, genotype.normal_concat
+  states = [inputs_prev_prev, inputs_prev]
+  for istep, operations in enumerate(step_ops):
+    op_a, op_b = operations
+    # the first operation
+    #print ('-->>[{:}/{:}] [{:}] + [{:}]'.format(istep, len(step_ops), op_a, op_b))
+    stride  = 2 if reduction and op_a[1] < 2 else 1
+    tensor1 = OPS[ op_a[0] ](states[op_a[1]], C, C, stride)
+    stride  = 2 if reduction and op_b[1] < 2 else 1
+    tensor2 = OPS[ op_b[0] ](states[op_b[1]], C, C, stride)
+    state   = fluid.layers.elementwise_add(x=tensor1, y=tensor2, act=None)
+    assert tensor1.shape == tensor2.shape, 'invalid shape {:} vs. {:}'.format(tensor1.shape, tensor2.shape)
+    print ('-->>[{:}/{:}] tensor={:} from {:} + {:}'.format(istep, len(step_ops), state.shape, tensor1.shape, tensor2.shape))
+    states.append( state )
+  states_to_cat = [states[x] for x in concat]
+  outputs = fluid.layers.concat(states_to_cat, axis=1)
+  print ('-->> output-shape : {:} from concat={:}'.format(outputs.shape, concat))
+  return outputs
+
+
+
+# NASCifarNet(inputs, 36, 6, 3, 10, 'xxx', True)
+def NASCifarNet(ipt, C, N, stem_multiplier, class_num, genotype, auxiliary):
+  # cifar head module
+  C_curr = stem_multiplier * C
+  stem   = fluid.layers.conv2d(ipt, filter_size=3, num_filters=C_curr, stride=1, padding=1, act=None, bias_attr=False)
+  stem   = fluid.layers.batch_norm(input=stem, act=None, bias_attr=None)
+  print ('stem-shape : {:}'.format(stem.shape))
+  # N + 1 + N + 1 + N cells
+  layer_channels   = [C    ] * N + [C*2 ] + [C*2  ] * N + [C*4 ] + [C*4  ] * N
+  layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
+  C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+  reduction_prev = False
+  auxiliary_pred = None
+
+  cell_results = [stem, stem]
+  for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
+    xstr = '{:02d}/{:02d}'.format(index, len(layer_channels))
+    cell_result    = InferCell(xstr, cell_results[-2], cell_results[-1], genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+    reduction_prev = reduction
+    C_prev_prev, C_prev = C_prev, cell_result.shape[1]
+    cell_results.append( cell_result )
+    if auxiliary and reduction and C_curr == C*4:
+      auxiliary_pred = AuxiliaryHeadCIFAR(cell_result, C_prev, class_num)
+
+  global_P = fluid.layers.pool2d(input=cell_results[-1], pool_size=8, pool_type='avg', pool_stride=1)
+  predicts = fluid.layers.fc(input=global_P, size=class_num, act='softmax')
+  print ('predict-shape : {:}'.format(predicts.shape))
+  if auxiliary_pred is None:
+    return predicts
+  else:
+    return [predicts, auxiliary_pred]
diff --git a/others/GDAS/paddlepaddle/lib/models/operations.py b/others/GDAS/paddlepaddle/lib/models/operations.py
new file mode 100644
index 0000000..cbfe2b3
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/models/operations.py
@@ -0,0 +1,91 @@
+import paddle
+import paddle.fluid as fluid
+
+
+OPS = {
+  'none'         : lambda inputs, C_in, C_out, stride: ZERO(inputs, stride),
+  'avg_pool_3x3' : lambda inputs, C_in, C_out, stride: POOL_3x3(inputs, C_in, C_out, stride, 'avg'),
+  'max_pool_3x3' : lambda inputs, C_in, C_out, stride: POOL_3x3(inputs, C_in, C_out, stride, 'max'),
+  'skip_connect' : lambda inputs, C_in, C_out, stride: Identity(inputs, C_in, C_out, stride),
+  'sep_conv_3x3' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 3, stride, 1),
+  'sep_conv_5x5' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 5, stride, 2),
+  'sep_conv_7x7' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 7, stride, 3),
+  'dil_conv_3x3' : lambda inputs, C_in, C_out, stride: DilConv(inputs, C_in, C_out, 3, stride, 2, 2),
+  'dil_conv_5x5' : lambda inputs, C_in, C_out, stride: DilConv(inputs, C_in, C_out, 5, stride, 4, 2),
+  'conv_3x1_1x3' : lambda inputs, C_in, C_out, stride: Conv313(inputs, C_in, C_out, stride),
+  'conv_7x1_1x7' : lambda inputs, C_in, C_out, stride: Conv717(inputs, C_in, C_out, stride),
+}
+
+
+def ReLUConvBN(inputs, C_in, C_out, kernel, stride, padding):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_out, stride=stride, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def ZERO(inputs, stride):
+  if stride == 1:
+    return inputs * 0
+  elif stride == 2:
+    return fluid.layers.pool2d(inputs, filter_size=2, pool_stride=2, pool_padding=0, pool_type='avg') * 0
+  else:
+    raise ValueError('invalid stride of {:} not [1, 2]'.format(stride))
+
+
+def Identity(inputs, C_in, C_out, stride):
+  if C_in == C_out and stride == 1:
+    return inputs
+  elif stride == 1:
+    return ReLUConvBN(inputs, C_in, C_out, 1, 1, 0)
+  else:
+    temp1 = fluid.layers.relu(inputs)
+    temp2 = fluid.layers.pad2d(input=temp1, paddings=[0, 1, 0, 1], mode='reflect')
+    temp2 = fluid.layers.slice(temp2, axes=[0, 1, 2, 3], starts=[0, 0, 1, 1], ends=[999, 999, 999, 999])
+    temp1 = fluid.layers.conv2d(temp1, filter_size=1, num_filters=C_out//2, stride=stride, padding=0, act=None, bias_attr=False)
+    temp2 = fluid.layers.conv2d(temp2, filter_size=1, num_filters=C_out-C_out//2, stride=stride, padding=0, act=None, bias_attr=False)
+    temp  = fluid.layers.concat([temp1,temp2], axis=1)
+    return fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+
+
+def POOL_3x3(inputs, C_in, C_out, stride, mode):
+  if C_in == C_out:
+    xinputs = inputs
+  else:
+    xinputs = ReLUConvBN(inputs, C_in, C_out, 1, 1, 0)
+  return fluid.layers.pool2d(xinputs, pool_size=3, pool_stride=stride, pool_padding=1, pool_type=mode)
+
+
+def SepConv(inputs, C_in, C_out, kernel, stride, padding):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=stride, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_in , stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=     1, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_out, stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None  , bias_attr=None)
+  return temp
+
+
+def DilConv(inputs, C_in, C_out, kernel, stride, padding, dilation):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=stride, padding=padding, dilation=dilation, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_out, stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def Conv313(inputs, C_in, C_out, stride):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=(1,3), num_filters=C_out, stride=(1,stride), padding=(0,1), act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=(3,1), num_filters=C_out, stride=(stride,1), padding=(1,0), act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def Conv717(inputs, C_in, C_out, stride):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=(1,7), num_filters=C_out, stride=(1,stride), padding=(0,3), act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=(7,1), num_filters=C_out, stride=(stride,1), padding=(3,0), act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
diff --git a/others/GDAS/paddlepaddle/lib/models/resnet.py b/others/GDAS/paddlepaddle/lib/models/resnet.py
new file mode 100644
index 0000000..5c15fab
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/models/resnet.py
@@ -0,0 +1,65 @@
+import paddle
+import paddle.fluid as fluid
+
+
+def conv_bn_layer(input,
+          ch_out,
+          filter_size,
+          stride,
+          padding,
+          act='relu',
+          bias_attr=False):
+  tmp = fluid.layers.conv2d(
+    input=input,
+    filter_size=filter_size,
+    num_filters=ch_out,
+    stride=stride,
+    padding=padding,
+    act=None,
+    bias_attr=bias_attr)
+  return fluid.layers.batch_norm(input=tmp, act=act)
+
+
+def shortcut(input, ch_in, ch_out, stride):
+  if stride == 2:
+    temp = fluid.layers.pool2d(input, pool_size=2, pool_type='avg', pool_stride=2)
+    temp = fluid.layers.conv2d(temp , filter_size=1, num_filters=ch_out, stride=1, padding=0, act=None, bias_attr=None)
+    return temp
+  elif ch_in != ch_out:
+    return conv_bn_layer(input, ch_out, 1, stride, 0, None, None)
+  else:
+    return input
+
+
+def basicblock(input, ch_in, ch_out, stride):
+  tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
+  tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
+  short = shortcut(input, ch_in, ch_out, stride)
+  return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+
+
+def layer_warp(block_func, input, ch_in, ch_out, count, stride):
+  tmp = block_func(input, ch_in, ch_out, stride)
+  for i in range(1, count):
+    tmp = block_func(tmp, ch_out, ch_out, 1)
+  return tmp
+
+
+def resnet_cifar(ipt, depth, class_num):
+  # depth should be one of 20, 32, 44, 56, 110, 1202
+  assert (depth - 2) % 6 == 0
+  n = (depth - 2) // 6
+  print('[resnet] depth : {:}, class_num : {:}'.format(depth, class_num))
+  conv1 = conv_bn_layer(ipt, ch_out=16, filter_size=3, stride=1, padding=1)
+  print('conv-1 : shape = {:}'.format(conv1.shape))
+  res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+  print('res--1 : shape = {:}'.format(res1.shape))
+  res2 = layer_warp(basicblock, res1 , 16, 32, n, 2)
+  print('res--2 : shape = {:}'.format(res2.shape))
+  res3 = layer_warp(basicblock, res2 , 32, 64, n, 2)
+  print('res--3 : shape = {:}'.format(res3.shape))
+  pool = fluid.layers.pool2d(input=res3, pool_size=8, pool_type='avg', pool_stride=1)
+  print('pool   : shape = {:}'.format(pool.shape))
+  predict = fluid.layers.fc(input=pool, size=class_num, act='softmax')
+  print('predict: shape = {:}'.format(predict.shape))
+  return predict
diff --git a/others/GDAS/paddlepaddle/lib/utils/__init__.py b/others/GDAS/paddlepaddle/lib/utils/__init__.py
new file mode 100644
index 0000000..2c02373
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/utils/__init__.py
@@ -0,0 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .meter        import AverageMeter
+from .time_utils   import time_for_file, time_string, time_string_short, time_print, convert_size2str, convert_secs2time
+from .data_utils   import reader_creator
diff --git a/others/GDAS/paddlepaddle/lib/utils/data_utils.py b/others/GDAS/paddlepaddle/lib/utils/data_utils.py
new file mode 100644
index 0000000..305c0e7
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/utils/data_utils.py
@@ -0,0 +1,64 @@
+import random, tarfile
+import numpy, six
+from six.moves import cPickle as pickle
+from PIL import Image, ImageOps
+
+
+def train_cifar_augmentation(image):
+  # flip
+  if random.random() < 0.5: image1 = image.transpose(Image.FLIP_LEFT_RIGHT)
+  else:                     image1 = image
+  # random crop
+  image2 = ImageOps.expand(image1, border=4, fill=0)
+  i = random.randint(0, 40 - 32)
+  j = random.randint(0, 40 - 32)
+  image3 = image2.crop((j,i,j+32,i+32))
+  # to numpy
+  image3 = numpy.array(image3) / 255.0
+  mean   = numpy.array([x / 255 for x in [125.3, 123.0, 113.9]]).reshape(1, 1, 3)
+  std    = numpy.array([x / 255 for x in [63.0, 62.1, 66.7]]).reshape(1, 1, 3)
+  return (image3 - mean) / std
+
+
+def valid_cifar_augmentation(image):
+  image3 = numpy.array(image) / 255.0
+  mean   = numpy.array([x / 255 for x in [125.3, 123.0, 113.9]]).reshape(1, 1, 3)
+  std    = numpy.array([x / 255 for x in [63.0, 62.1, 66.7]]).reshape(1, 1, 3)
+  return (image3 - mean) / std
+
+
+def reader_creator(filename, sub_name, is_train, cycle=False):
+  def read_batch(batch):
+    data = batch[six.b('data')]
+    labels = batch.get(
+      six.b('labels'), batch.get(six.b('fine_labels'), None))
+    assert labels is not None
+    for sample, label in six.moves.zip(data, labels):
+      sample = sample.reshape(3, 32, 32)
+      sample = sample.transpose((1, 2, 0))
+      image  = Image.fromarray(sample)
+      if is_train:
+        ximage = train_cifar_augmentation(image)
+      else:
+        ximage = valid_cifar_augmentation(image)
+      ximage = ximage.transpose((2, 0, 1))
+      yield ximage.astype(numpy.float32), int(label)
+
+  def reader():
+    with tarfile.open(filename, mode='r') as f:
+      names = (each_item.name for each_item in f
+           if sub_name in each_item.name)
+
+      while True:
+        for name in names:
+          if six.PY2:
+            batch = pickle.load(f.extractfile(name))
+          else:
+            batch = pickle.load(
+              f.extractfile(name), encoding='bytes')
+          for item in read_batch(batch):
+            yield item
+        if not cycle:
+          break
+
+  return reader
diff --git a/others/GDAS/paddlepaddle/lib/utils/meter.py b/others/GDAS/paddlepaddle/lib/utils/meter.py
new file mode 100644
index 0000000..603f233
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/utils/meter.py
@@ -0,0 +1,23 @@
+import time, sys
+import numpy as np
+
+
+class AverageMeter(object):     
+  """Computes and stores the average and current value"""    
+  def __init__(self):   
+    self.reset()
+  
+  def reset(self):
+    self.val   = 0.0
+    self.avg   = 0.0
+    self.sum   = 0.0
+    self.count = 0.0
+  
+  def update(self, val, n=1): 
+    self.val = val    
+    self.sum += val * n     
+    self.count += n
+    self.avg = self.sum / self.count    
+
+  def __repr__(self):
+    return ('{name}(val={val}, avg={avg}, count={count})'.format(name=self.__class__.__name__, **self.__dict__))
diff --git a/others/GDAS/paddlepaddle/lib/utils/time_utils.py b/others/GDAS/paddlepaddle/lib/utils/time_utils.py
new file mode 100644
index 0000000..1e80287
--- /dev/null
+++ b/others/GDAS/paddlepaddle/lib/utils/time_utils.py
@@ -0,0 +1,46 @@
+import time, sys
+import numpy as np
+
+def time_for_file():
+  ISOTIMEFORMAT='%d-%h-at-%H-%M-%S'
+  return '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+
+def time_string():
+  ISOTIMEFORMAT='%Y-%m-%d %X'
+  string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def time_string_short():
+  ISOTIMEFORMAT='%Y%m%d'
+  string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def time_print(string, is_print=True):
+  if (is_print):
+    print('{} : {}'.format(time_string(), string))
+
+def convert_size2str(torch_size):
+  dims = len(torch_size)
+  string = '['
+  for idim in range(dims):
+    string = string + ' {}'.format(torch_size[idim])
+  return string + ']'
+  
+def convert_secs2time(epoch_time, return_str=False):    
+  need_hour = int(epoch_time / 3600)
+  need_mins = int((epoch_time - 3600*need_hour) / 60)  
+  need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
+  if return_str:
+    str = '[{:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
+    return str
+  else:
+    return need_hour, need_mins, need_secs
+
+def print_log(print_string, log):
+  #if isinstance(log, Logger): log.log('{:}'.format(print_string))
+  if hasattr(log, 'log'): log.log('{:}'.format(print_string))
+  else:
+    print("{:}".format(print_string))
+    if log is not None:
+      log.write('{:}\n'.format(print_string))
+      log.flush()
diff --git a/others/GDAS/paddlepaddle/scripts/base-train.sh b/others/GDAS/paddlepaddle/scripts/base-train.sh
new file mode 100644
index 0000000..f4eed75
--- /dev/null
+++ b/others/GDAS/paddlepaddle/scripts/base-train.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+echo script name: $0
+echo $# arguments
+if [ "$#" -ne 3 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 3 parameters for GPU and dataset and the-model-name"
+  exit 1
+fi
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+GPU=$1
+dataset=$2
+model=$3
+
+save_dir=snapshots/${dataset}-${model}
+
+export FLAGS_fraction_of_gpu_memory_to_use="0.005"
+export FLAGS_free_idle_memory=True
+
+CUDA_VISIBLE_DEVICES=${GPU} python train_cifar.py \
+	--data_path $TORCH_HOME/cifar.python/${dataset}-python.tar.gz \
+	--log_dir ${save_dir} \
+	--dataset ${dataset}  \
+	--model_name ${model} \
+	--lr 0.1 --epochs 300 --batch_size 256 --step_each_epoch 196
diff --git a/others/GDAS/paddlepaddle/scripts/train-nas.sh b/others/GDAS/paddlepaddle/scripts/train-nas.sh
new file mode 100644
index 0000000..e2bdde1
--- /dev/null
+++ b/others/GDAS/paddlepaddle/scripts/train-nas.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+echo script name: $0
+echo $# arguments
+if [ "$#" -ne 3 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 3 parameters for GPU and dataset and the-model-name"
+  exit 1
+fi
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+GPU=$1
+dataset=$2
+model=$3
+
+save_dir=snapshots/${dataset}-${model}
+
+export FLAGS_fraction_of_gpu_memory_to_use="0.005"
+export FLAGS_free_idle_memory=True
+
+CUDA_VISIBLE_DEVICES=${GPU} python train_cifar.py \
+	--data_path $TORCH_HOME/cifar.python/${dataset}-python.tar.gz \
+	--log_dir ${save_dir} \
+	--dataset ${dataset}  \
+	--model_name ${model} \
+	--lr 0.025 --epochs 600 --batch_size 96 --step_each_epoch 521
diff --git a/others/GDAS/paddlepaddle/train_cifar.py b/others/GDAS/paddlepaddle/train_cifar.py
new file mode 100644
index 0000000..b501380
--- /dev/null
+++ b/others/GDAS/paddlepaddle/train_cifar.py
@@ -0,0 +1,189 @@
+import os, sys, numpy as np, argparse
+from pathlib import Path
+import paddle.fluid as fluid
+import math, time, paddle
+import paddle.fluid.layers.ops as ops
+#from tb_paddle import SummaryWriter
+
+lib_dir = (Path(__file__).parent / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from models import resnet_cifar, NASCifarNet, Networks
+from utils  import AverageMeter, time_for_file, time_string, convert_secs2time
+from utils  import reader_creator
+
+
+def inference_program(model_name, num_class):
+  # The image is 32 * 32 with RGB representation.
+  data_shape = [3, 32, 32]
+  images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+
+  if model_name == 'ResNet20':
+    predict = resnet_cifar(images,  20, num_class)
+  elif model_name == 'ResNet32':
+    predict = resnet_cifar(images,  32, num_class)
+  elif model_name == 'ResNet110':
+    predict = resnet_cifar(images, 110, num_class)
+  else:
+    predict = NASCifarNet(images, 36, 6, 3, num_class, Networks[model_name], True)
+  return predict
+
+
+def train_program(predict):
+  label   = fluid.layers.data(name='label', shape=[1], dtype='int64')
+  if isinstance(predict, (list, tuple)):
+    predict, aux_predict = predict
+    x_losses   = fluid.layers.cross_entropy(input=predict, label=label)
+    aux_losses = fluid.layers.cross_entropy(input=aux_predict, label=label)
+    x_loss     = fluid.layers.mean(x_losses)
+    aux_loss   = fluid.layers.mean(aux_losses)
+    loss = x_loss + aux_loss * 0.4
+    accuracy = fluid.layers.accuracy(input=predict, label=label)
+  else:
+    losses  = fluid.layers.cross_entropy(input=predict, label=label)
+    loss    = fluid.layers.mean(losses)
+    accuracy = fluid.layers.accuracy(input=predict, label=label)
+  return [loss, accuracy]
+
+
+# For training test cost
+def evaluation(program, reader, fetch_list, place):
+  feed_var_list = [program.global_block().var('pixel'), program.global_block().var('label')]
+  feeder_test   = fluid.DataFeeder(feed_list=feed_var_list, place=place)
+  test_exe      = fluid.Executor(place)
+  losses, accuracies = AverageMeter(), AverageMeter()
+  for tid, test_data in enumerate(reader()):
+    loss, acc = test_exe.run(program=program, feed=feeder_test.feed(test_data), fetch_list=fetch_list)
+    losses.update(float(loss), len(test_data))
+    accuracies.update(float(acc)*100, len(test_data))
+  return losses.avg, accuracies.avg
+
+
+def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
+  """Applies cosine decay to the learning rate.
+  lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
+  decrease lr for every mini-batch and start with warmup.
+  """
+  from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
+  from paddle.fluid.initializer import init_on_cpu
+  global_step = _decay_step_counter()
+  lr = fluid.layers.tensor.create_global_var(
+      shape=[1],
+      value=0.0,
+      dtype='float32',
+      persistable=True,
+      name="learning_rate")
+
+  warmup_epoch = fluid.layers.fill_constant(
+      shape=[1], dtype='float32', value=float(5), force_cpu=True)
+
+  with init_on_cpu():
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+      with switch.case(epoch < warmup_epoch):
+        decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch))
+        fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+      with switch.default():
+        decayed_lr = learning_rate * \
+          (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+        fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+  return lr
+
+
+def main(xargs):
+
+  save_dir = Path(xargs.log_dir) / time_for_file()
+  save_dir.mkdir(parents=True, exist_ok=True)
+  
+  print ('save dir : {:}'.format(save_dir))
+  print ('xargs : {:}'.format(xargs))
+
+  if xargs.dataset == 'cifar-10':
+    train_data = reader_creator(xargs.data_path, 'data_batch', True , False)
+    test__data = reader_creator(xargs.data_path, 'test_batch', False, False)
+    class_num  = 10
+    print ('create cifar-10  dataset')
+  elif xargs.dataset == 'cifar-100':
+    train_data = reader_creator(xargs.data_path, 'train', True , False)
+    test__data = reader_creator(xargs.data_path, 'test' , False, False)
+    class_num  = 100
+    print ('create cifar-100 dataset')
+  else:
+    raise ValueError('invalid dataset : {:}'.format(xargs.dataset))
+  
+  train_reader = paddle.batch(
+    paddle.reader.shuffle(train_data, buf_size=5000),
+    batch_size=xargs.batch_size)
+
+  # Reader for testing. A separated data set for testing.
+  test_reader = paddle.batch(test__data, batch_size=xargs.batch_size)
+
+  place = fluid.CUDAPlace(0)
+
+  main_program = fluid.default_main_program()
+  star_program = fluid.default_startup_program()
+
+  # programs
+  predict      = inference_program(xargs.model_name, class_num)
+  [loss, accuracy] = train_program(predict)
+  print ('training program setup done')
+  test_program = main_program.clone(for_test=True)
+  print ('testing  program setup done')
+
+  #infer_writer = SummaryWriter( str(save_dir / 'infer') )
+  #infer_writer.add_paddle_graph(fluid_program=fluid.default_main_program(), verbose=True)
+  #infer_writer.close()
+  #print(test_program.to_string(True))
+
+  #learning_rate = fluid.layers.cosine_decay(learning_rate=xargs.lr, step_each_epoch=xargs.step_each_epoch, epochs=xargs.epochs)
+  #learning_rate = fluid.layers.cosine_decay(learning_rate=0.1, step_each_epoch=196, epochs=300)
+  learning_rate = cosine_decay_with_warmup(xargs.lr, xargs.step_each_epoch, xargs.epochs)
+  optimizer = fluid.optimizer.Momentum(
+            learning_rate=learning_rate,
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(0.0005),
+            use_nesterov=True)
+  optimizer.minimize( loss )
+
+  exe = fluid.Executor(place)
+
+  feed_var_list_loop = [main_program.global_block().var('pixel'), main_program.global_block().var('label')]
+  feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
+  exe.run(star_program)
+
+  start_time, epoch_time = time.time(), AverageMeter()
+  for iepoch in range(xargs.epochs):
+    losses, accuracies, steps = AverageMeter(), AverageMeter(), 0
+    for step_id, train_data in enumerate(train_reader()):
+      tloss, tacc, xlr = exe.run(main_program, feed=feeder.feed(train_data), fetch_list=[loss, accuracy, learning_rate])
+      tloss, tacc, xlr = float(tloss), float(tacc) * 100, float(xlr)
+      steps += 1
+      losses.update(tloss, len(train_data))
+      accuracies.update(tacc, len(train_data))
+      if step_id % 100 == 0:
+        print('{:} [{:03d}/{:03d}] [{:03d}] lr = {:.7f}, loss = {:.4f} ({:.4f}), accuracy = {:.2f} ({:.2f}), error={:.2f}'.format(time_string(), iepoch, xargs.epochs, step_id, xlr, tloss, losses.avg, tacc, accuracies.avg, 100-accuracies.avg))
+    test_loss, test_acc = evaluation(test_program, test_reader, [loss, accuracy], place)
+    need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (xargs.epochs-iepoch), True) )
+    print('{:}x[{:03d}/{:03d}] {:} train-loss = {:.4f}, train-accuracy = {:.2f}, test-loss = {:.4f}, test-accuracy = {:.2f} test-error = {:.2f} [{:} steps per epoch]\n'.format(time_string(), iepoch, xargs.epochs, need_time, losses.avg, accuracies.avg, test_loss, test_acc, 100-test_acc, steps))
+    if isinstance(predict, list):
+      fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"],   predict, exe)
+    else:
+      fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"], [predict], exe)
+    # measure elapsed time
+    epoch_time.update(time.time() - start_time)
+    start_time = time.time()
+
+  print('finish training and evaluation with {:} epochs in {:}'.format(xargs.epochs, convert_secs2time(epoch_time.sum, True)))
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser(description='Train.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument('--log_dir' ,       type=str,                   help='Save dir.')
+  parser.add_argument('--dataset',        type=str,                   help='The dataset name.')
+  parser.add_argument('--data_path',      type=str,                   help='The dataset path.')
+  parser.add_argument('--model_name',     type=str,                   help='The model name.')
+  parser.add_argument('--lr',             type=float,                 help='The learning rate.')
+  parser.add_argument('--batch_size',     type=int,                   help='The batch size.')
+  parser.add_argument('--step_each_epoch',type=int,                   help='The batch size.')
+  parser.add_argument('--epochs'    ,     type=int,                   help='The total training epochs.')
+  args = parser.parse_args()
+  main(args)
diff --git a/others/GDAS/scripts-cluster/README.md b/others/GDAS/scripts-cluster/README.md
new file mode 100644
index 0000000..442f5fe
--- /dev/null
+++ b/others/GDAS/scripts-cluster/README.md
@@ -0,0 +1,14 @@
+# Commands on Cluster
+
+## RNN
+```
+bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 WT2-GDAS 1 "bash ./scripts-rnn/train-WT2.sh GDAS"
+bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 PTB-GDAS 1 "bash ./scripts-rnn/train-PTB.sh GDAS"
+```
+
+## CNN
+```
+bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 CIFAR10-CUT-GDAS-F1 1 "bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10  cut"
+bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 IMAGENET-GDAS-F1    1 "bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14"
+bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 IMAGENET-GDAS-V1    1 "bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14"
+```
diff --git a/others/GDAS/scripts-cluster/job-script.sh b/others/GDAS/scripts-cluster/job-script.sh
new file mode 100644
index 0000000..fbf4828
--- /dev/null
+++ b/others/GDAS/scripts-cluster/job-script.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+echo "CHECK-DATA-DIR START"
+sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
+    COMM_KM_Data COMM_km_2018 \
+    `pwd`/hadoop-data \
+    afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
+
+export TORCH_HOME="./data/data/"
+tar -xf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
+
+cifar_dir="${TORCH_HOME}/cifar.python"
+if [ -d ${cifar_dir} ]; then
+  echo "Find cifar-dir: "${cifar_dir}
+else
+  echo "Can not find cifar-dir: "${cifar_dir}
+  exit 1
+fi
+echo "CHECK-DATA-DIR DONE"
+
+PID=$$
+
+# config python
+PYTHON_ENV=py36_pytorch1.0_env0.1.3.tar.gz
+wget -e "http_proxy=cp01-sys-hic-gpu-02.cp01:8888" http://cp01-sys-hic-gpu-02.cp01/HGCP_DEMO/$PYTHON_ENV > screen.log 2>&1
+tar xzf $PYTHON_ENV
+
+echo "JOB-PID   : "${PID}
+echo "JOB-PWD   : "$(pwd)
+echo "JOB-files : "$(ls)
+echo "JOB-CUDA_VISIBLE_DEVICES: " ${CUDA_VISIBLE_DEVICES}
+
+./env/bin/python --version
+echo "JOB-TORCH_HOME: "${TORCH_HOME}
+
+# real commands
diff --git a/others/GDAS/scripts-cluster/submit.sh b/others/GDAS/scripts-cluster/submit.sh
new file mode 100644
index 0000000..59f2017
--- /dev/null
+++ b/others/GDAS/scripts-cluster/submit.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# bash ./scripts-cluster/submit.sh ${QUEUE} ${JOB-NAME} ${GPUs}
+#find -name "._*" | xargs rm -rf
+ODIR=$(pwd)
+FDIR=$(cd $(dirname $0); pwd)
+echo "Bash-Dir  : "${ODIR}
+echo "File-Dir  : "${FDIR}
+echo "File-Name : "${0}
+
+if [ "$#" -ne 4 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 4 parameters for the queue-name, the job-name, and the number-of-GPUs"
+  exit 1               
+fi
+find -name "__pycache__" | xargs rm -rf
+
+QUEUE=$1
+NAME=$2
+GPUs=$3
+CMD=$4
+TIME=$(date +"%Y-%h-%d--%T")
+TIME="${TIME//:/-}"
+
+JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh"
+HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${NAME}-${TIME}"
+echo "JOB-SCRIPT: "${JOB_SCRIPT}
+
+cat ${FDIR}/job-script.sh > ${JOB_SCRIPT}
+echo ${CMD}              >> ${JOB_SCRIPT}
+
+${HDP} -mkdir ${HDFS_DIR} 
+echo "Create "${HDFS_DIR}" done!"
+sleep 1s
+
+HGCP_CLIENT_BIN="${HOME}/.hgcp/software-install/HGCP_client/bin"
+
+${HGCP_CLIENT_BIN}/submit \
+    --hdfs afs://xingtian.afs.baidu.com:9902 \
+    --hdfs-user COMM_KM_Data \
+    --hdfs-passwd COMM_km_2018 \
+    --hdfs-path ${HDFS_DIR} \
+    --file-dir ./ \
+    --job-name ${NAME} \
+    --queue-name ${QUEUE} \
+    --num-nodes 1 \
+    --num-task-pernode 1 \
+    --gpu-pnode ${GPUs} \
+    --time-limit 0 \
+    --job-script ${JOB_SCRIPT}
+
+#--job-script ${FDIR}/job-script.sh
+#echo "JOB-SCRIPT: " ${JOB_SCRIPT}
diff --git a/others/GDAS/scripts-cnn/train-cifar.sh b/others/GDAS/scripts-cnn/train-cifar.sh
new file mode 100644
index 0000000..255fe96
--- /dev/null
+++ b/others/GDAS/scripts-cnn/train-cifar.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env sh
+# bash scripts-cnn/train-cifar.sh GDAS cifar10 cut
+if [ "$#" -ne 3 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 3 parameters for the architecture, and the dataset-name, and the cutout"
+  exit 1               
+fi 
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+arch=$1
+dataset=$2
+cutout=$3
+SAVED=./output/NAS-CNN/${arch}-${dataset}-${cutout}-E600
+
+PY_C="./env/bin/python"
+
+if [ ! -f ${PY_C} ]; then
+  echo "Local Run with Python: "`which python`
+  PY_C="python"
+else
+  echo "Cluster Run with Python: "${PY_C}
+fi
+
+${PY_C} --version
+
+${PY_C} ./exps-cnn/train_base.py \
+        --data_path $TORCH_HOME/cifar.python \
+	--dataset ${dataset} --arch ${arch} \
+	--save_path ${SAVED} \
+	--grad_clip 5 \
+	--init_channels 36 --layers 20 \
+	--model_config ./configs/nas-cifar-cos-${cutout}.config \
+	--print_freq 100 --workers 6
diff --git a/others/GDAS/scripts-cnn/train-imagenet.sh b/others/GDAS/scripts-cnn/train-imagenet.sh
new file mode 100644
index 0000000..5569656
--- /dev/null
+++ b/others/GDAS/scripts-cnn/train-imagenet.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env sh
+if [ "$#" -ne 5 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 5 parameters for the architecture, and the channel, and the layers, and the batch-size, and the seed"
+  exit 1               
+fi 
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+arch=$1
+dataset=imagenet
+channels=$2
+layers=$3
+BATCH=$4
+seed=$5
+SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-${BATCH}-E250
+
+PY_C="./env/bin/python"
+#PY_C="$CONDA_PYTHON_EXE"
+
+if [ ! -f ${PY_C} ]; then
+  echo "Local Run with Python: "`which python`
+  PY_C="python"
+else
+  echo "Cluster Run with Python: "${PY_C}
+  echo "Unzip ILSVRC2012"
+  tar --version
+  tar -xf ./hadoop-data/ILSVRC2012.tar   -C ${TORCH_HOME}
+  #commands="./data/data/get_imagenet.sh"
+  #${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
+  #${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
+  #bash ./data/data/get_imagenet.sh
+  #count=0
+  #while read -r line; do
+  #  temp_file="./data/data/TEMP-${count}.sh"
+  #  echo "${line}" > ${temp_file}
+  #  bash ${temp_file}
+  #  count=$((count+1))
+    #${PY_C} ./data/ps_mem.py -p $$
+  #  free -g
+  #done < "${commands}"
+  #wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
+  #${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
+  #count=0
+  #while read -r line; do
+  #  temp_file="./data/data/TEMP-${count}.sh"
+  #  echo "${line}" > ${temp_file}
+  #  bash ${temp_file}
+  #  count=$((count+1))
+   #${PY_C} ./data/ps_mem.py -p $$
+  #  free -g
+  #done < "${commands}"
+  #echo "Copy ILSVRC2012 done"
+  #tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
+  #rm ${TORCH_HOME}/ILSVRC2012.tar
+  echo "Unzip ILSVRC2012 done"
+fi
+
+${PY_C} --version
+
+${PY_C} ./exps-cnn/train_base.py \
+	--data_path $TORCH_HOME/ILSVRC2012 \
+	--dataset ${dataset} --arch ${arch} \
+	--save_path ${SAVED} \
+	--grad_clip 5 \
+	--init_channels ${channels} --layers ${layers} \
+	--model_config ./configs/nas-imagenet-${BATCH}.config \
+	--manualSeed ${seed} \
+	--print_freq 200 --workers 20
diff --git a/others/GDAS/scripts-rnn/train-PTB.sh b/others/GDAS/scripts-rnn/train-PTB.sh
new file mode 100644
index 0000000..ff98115
--- /dev/null
+++ b/others/GDAS/scripts-rnn/train-PTB.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env sh
+if [ "$#" -ne 1 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 1 parameters for the GPU and the architecture"
+  exit 1               
+fi 
+
+arch=$1
+SAVED=./output/NAS-RNN/Search-${arch}-PTB
+PY_C="./env/bin/python"
+
+if [ ! -f ${PY_C} ]; then
+  echo "Local Run with Python: "`which python`
+  PY_C="python"
+else
+  echo "Cluster Run with Python: "${PY_C}
+fi
+
+${PY_C} --version
+
+${PY_C} ./exps-rnn/train_rnn_base.py \
+	--arch ${arch} \
+	--save_path ${SAVED} \
+	--config_path ./configs/NAS-PTB-BASE.config \
+	--print_freq 200
diff --git a/others/GDAS/scripts-rnn/train-WT2.sh b/others/GDAS/scripts-rnn/train-WT2.sh
new file mode 100644
index 0000000..8c11d7b
--- /dev/null
+++ b/others/GDAS/scripts-rnn/train-WT2.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+if [ "$#" -ne 1 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 1 parameters for the architectures"
+  exit 1               
+fi 
+
+arch=$1
+SAVED=./output/NAS-RNN/Search-${arch}-WT2
+PY_C="./env/bin/python"
+
+if [ ! -f ${PY_C} ]; then
+  echo "Local Run with Python: "`which python`
+  PY_C="python"
+else
+  echo "Cluster Run with Python: "${PY_C}
+fi
+
+${PY_C} --version
+
+${PY_C} ./exps-rnn/train_rnn_base.py \
+	--arch ${arch} \
+	--save_path ${SAVED} \
+	--config_path ./configs/NAS-WT2-BASE.config \
+	--print_freq 300
diff --git a/others/paddlepaddle/.gitignore b/others/paddlepaddle/.gitignore
new file mode 100644
index 0000000..ed615b6
--- /dev/null
+++ b/others/paddlepaddle/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.whl
+snapshots
diff --git a/others/paddlepaddle/README.md b/others/paddlepaddle/README.md
new file mode 100644
index 0000000..b6a1b01
--- /dev/null
+++ b/others/paddlepaddle/README.md
@@ -0,0 +1,118 @@
+# Image Classification based on NAS-Searched Models
+
+This directory contains 10 image classification models.
+Nine of them are automatically searched models from different Neural Architecture Search (NAS) algorithms. The other is the residual network.
+We provide codes and scripts to train these models on both CIFAR-10 and CIFAR-100.
+We use the standard data augmentation, i.e., random crop, random flip, and normalization.
+
+---
+## Table of Contents
+- [Installation](#installation)
+- [Data Preparation](#data-preparation)
+- [Training Models](#training-models)
+- [Project Structure](#project-structure)
+- [Citation](#citation)
+
+
+### Installation
+This project has the following requirements:
+- Python = 3.6
+- PadddlePaddle Fluid >= v0.15.0
+
+
+### Data Preparation
+Please download [CIFAR-10](https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz) and [CIFAR-100](https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz) before running the codes.
+Note that the MD5 of CIFAR-10-Python compressed file is `c58f30108f718f92721af3b95e74349a` and the MD5 of CIFAR-100-Python compressed file is `eb9058c3a382ffc7106e4002c42a8d85`.
+Please save the file into `${TORCH_HOME}/cifar.python`.
+After data preparation, there should be two files `${TORCH_HOME}/cifar.python/cifar-10-python.tar.gz` and `${TORCH_HOME}/cifar.python/cifar-100-python.tar.gz`.
+
+
+### Training Models
+
+After setting up the environment and preparing the data, one can train the model. The main function entrance is `train_cifar.py`. We also provide some scripts for easy usage.
+```
+bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+bash ./scripts/train-nas.sh  0 cifar-10 GDAS_V1
+bash ./scripts/train-nas.sh  0 cifar-10 GDAS_V2
+bash ./scripts/train-nas.sh  0 cifar-10  SETN
+bash ./scripts/train-nas.sh  0 cifar-10 NASNet
+bash ./scripts/train-nas.sh  0 cifar-10 ENASNet
+bash ./scripts/train-nas.sh  0 cifar-10 AmoebaNet
+bash ./scripts/train-nas.sh  0 cifar-10 PNASNet
+bash ./scripts/train-nas.sh  0 cifar-100 SETN
+```
+The first argument is the GPU-ID to train your program, the second argument is the dataset name, and the last one is the model name.
+Please use `./scripts/base-train.sh` for ResNet and use `./scripts/train-nas.sh` for NAS-searched models.
+
+
+### Project Structure
+```
+.
+├──train_cifar.py [Training CNN models]
+├──lib [Library for dataset, models, and others]
+│  └──models  
+│     ├──__init__.py [Import useful Classes and Functions in models]  
+│     ├──resnet.py [Define the ResNet models]
+│     ├──operations.py [Define the atomic operation in NAS search space]
+│     ├──genotypes.py [Define the topological structure of different NAS-searched models]
+│     └──nas_net.py [Define the macro structure of NAS models]
+│  └──utils
+│     ├──__init__.py [Import useful Classes and Functions in utils]  
+│     ├──meter.py [Define the AverageMeter class to count the accuracy and loss]
+│     ├──time_utils.py [Define some functions to print date or convert seconds into hours]
+│     └──data_utils.py [Define data augmentation functions and dataset reader for CIFAR]
+└──scripts [Scripts for running]  
+```
+
+
+### Citation
+If you find that this project helps your research, please consider citing these papers:
+```
+@inproceedings{dong2019one,
+  title     = {One-Shot Neural Architecture Search via Self-Evaluated Template Network},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
+  year      = {2019}
+}
+@inproceedings{dong2019search,
+  title     = {Searching for A Robust Neural Architecture in Four GPU Hours},
+  author    = {Dong, Xuanyi and Yang, Yi},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages     = {1761--1770},
+  year      = {2019}
+}
+@inproceedings{liu2018darts,
+  title     = {Darts: Differentiable architecture search},
+  author    = {Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
+  booktitle = {ICLR},
+  year      = {2018}
+}
+@inproceedings{pham2018efficient,
+  title     = {Efficient Neural Architecture Search via Parameter Sharing},
+  author    = {Pham, Hieu and Guan, Melody and Zoph, Barret and Le, Quoc and Dean, Jeff},
+  booktitle = {International Conference on Machine Learning (ICML)},
+  pages     = {4092--4101},
+  year      = {2018}
+}
+@inproceedings{liu2018progressive,
+  title     = {Progressive neural architecture search},
+  author    = {Liu, Chenxi and Zoph, Barret and Neumann, Maxim and Shlens, Jonathon and Hua, Wei and Li, Li-Jia and Fei-Fei, Li and Yuille, Alan and Huang, Jonathan and Murphy, Kevin},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  pages     = {19--34},
+  year      = {2018}
+}
+@inproceedings{zoph2018learning,
+  title     = {Learning transferable architectures for scalable image recognition},
+  author    = {Zoph, Barret and Vasudevan, Vijay and Shlens, Jonathon and Le, Quoc V},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  pages     = {8697--8710},
+  year      = {2018}
+}
+@inproceedings{real2019regularized,
+  title     = {Regularized evolution for image classifier architecture search},
+  author    = {Real, Esteban and Aggarwal, Alok and Huang, Yanping and Le, Quoc V},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  pages     = {4780--4789},
+  year      = {2019}
+}
+```
diff --git a/others/paddlepaddle/lib/models/__init__.py b/others/paddlepaddle/lib/models/__init__.py
new file mode 100644
index 0000000..0bebe0b
--- /dev/null
+++ b/others/paddlepaddle/lib/models/__init__.py
@@ -0,0 +1,3 @@
+from .genotypes import Networks
+from .nas_net   import NASCifarNet
+from .resnet    import resnet_cifar
diff --git a/others/paddlepaddle/lib/models/genotypes.py b/others/paddlepaddle/lib/models/genotypes.py
new file mode 100644
index 0000000..08f145f
--- /dev/null
+++ b/others/paddlepaddle/lib/models/genotypes.py
@@ -0,0 +1,175 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+
+# Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018
+NASNet = Genotype(
+  normal = [
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 0)),
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 0)),
+    (('avg_pool_3x3', 1), ('skip_connect', 0)),
+    (('avg_pool_3x3', 0), ('avg_pool_3x3', 0)),
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 1), ('sep_conv_7x7', 0)),
+    (('max_pool_3x3', 1), ('sep_conv_7x7', 0)),
+    (('avg_pool_3x3', 1), ('sep_conv_5x5', 0)),
+    (('skip_connect', 3), ('avg_pool_3x3', 2)),
+    (('sep_conv_3x3', 2), ('max_pool_3x3', 1)),
+  ],
+  reduce_concat = [4, 5, 6],
+)
+
+
+# Progressive Neural Architecture Search, ECCV 2018
+PNASNet = Genotype(
+  normal = [
+    (('sep_conv_5x5', 0), ('max_pool_3x3', 0)),
+    (('sep_conv_7x7', 1), ('max_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 4), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('skip_connect', 1)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 0), ('max_pool_3x3', 0)),
+    (('sep_conv_7x7', 1), ('max_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 4), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('skip_connect', 1)),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+
+# Regularized Evolution for Image Classifier Architecture Search, AAAI 2019
+AmoebaNet = Genotype(
+  normal = [
+    (('avg_pool_3x3', 0), ('max_pool_3x3', 1)),
+    (('sep_conv_3x3', 0), ('sep_conv_5x5', 2)),
+    (('sep_conv_3x3', 0), ('avg_pool_3x3', 3)),
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+    (('skip_connect', 0), ('avg_pool_3x3', 1)),
+    ],
+  normal_concat = [4, 5, 6],
+  reduce = [
+    (('avg_pool_3x3', 0), ('sep_conv_3x3', 1)),
+    (('max_pool_3x3', 0), ('sep_conv_7x7', 2)),
+    (('sep_conv_7x7', 0), ('avg_pool_3x3', 1)),
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)),
+    (('conv_7x1_1x7', 0), ('sep_conv_3x3', 5)),
+  ],
+  reduce_concat = [3, 4, 6]
+)
+
+
+# Efficient Neural Architecture Search via Parameter Sharing, ICML 2018
+ENASNet = Genotype(
+  normal = [
+    (('sep_conv_3x3', 1), ('skip_connect', 1)),
+    (('sep_conv_5x5', 1), ('skip_connect', 0)),
+    (('avg_pool_3x3', 0), ('sep_conv_3x3', 1)),
+    (('sep_conv_3x3', 0), ('avg_pool_3x3', 1)),
+    (('sep_conv_5x5', 1), ('avg_pool_3x3', 0)),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)), # 2
+    (('sep_conv_3x3', 1), ('avg_pool_3x3', 1)), # 3
+    (('sep_conv_3x3', 1), ('avg_pool_3x3', 1)), # 4
+    (('avg_pool_3x3', 1), ('sep_conv_5x5', 4)), # 5
+    (('sep_conv_3x3', 5), ('sep_conv_5x5', 0)),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+
+# DARTS: Differentiable Architecture Search, ICLR 2019
+DARTS_V1 = Genotype(
+  normal=[
+    (('sep_conv_3x3', 1), ('sep_conv_3x3', 0)), # step 1
+    (('skip_connect', 0), ('sep_conv_3x3', 1)), # step 2
+    (('skip_connect', 0), ('sep_conv_3x3', 1)), # step 3
+    (('sep_conv_3x3', 0), ('skip_connect', 2))  # step 4
+  ],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)), # step 1
+    (('skip_connect', 2), ('max_pool_3x3', 0)), # step 2
+    (('max_pool_3x3', 0), ('skip_connect', 2)), # step 3
+    (('skip_connect', 2), ('avg_pool_3x3', 0))  # step 4
+  ],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+# DARTS: Differentiable Architecture Search, ICLR 2019
+DARTS_V2 = Genotype(
+  normal=[
+    (('sep_conv_3x3', 0), ('sep_conv_3x3', 1)), # step 1
+    (('sep_conv_3x3', 0), ('sep_conv_3x3', 1)), # step 2
+    (('sep_conv_3x3', 1), ('skip_connect', 0)), # step 3
+    (('skip_connect', 0), ('dil_conv_3x3', 2))  # step 4
+  ],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('max_pool_3x3', 0), ('max_pool_3x3', 1)), # step 1
+    (('skip_connect', 2), ('max_pool_3x3', 1)), # step 2
+    (('max_pool_3x3', 0), ('skip_connect', 2)), # step 3
+    (('skip_connect', 2), ('max_pool_3x3', 1))  # step 4
+  ],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+
+# One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019
+SETN = Genotype(
+  normal=[
+    (('skip_connect', 0), ('sep_conv_5x5', 1)),
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)),
+    (('sep_conv_5x5', 1), ('sep_conv_5x5', 3)),
+    (('max_pool_3x3', 1), ('conv_3x1_1x3', 4))],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('sep_conv_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('sep_conv_5x5', 1)),
+    (('avg_pool_3x3', 0), ('skip_connect', 1))],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+# Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019
+GDAS_V1 = Genotype(
+  normal=[
+    (('skip_connect', 0), ('skip_connect', 1)),
+    (('skip_connect', 0), ('sep_conv_5x5', 2)),
+    (('sep_conv_3x3', 3), ('skip_connect', 0)),
+    (('sep_conv_5x5', 4), ('sep_conv_3x3', 3))],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    (('sep_conv_5x5', 0), ('sep_conv_3x3', 1)), 
+    (('sep_conv_5x5', 2), ('sep_conv_5x5', 1)),
+    (('dil_conv_5x5', 2), ('sep_conv_3x3', 1)),
+    (('sep_conv_5x5', 0), ('sep_conv_5x5', 1))],
+  reduce_concat=[2, 3, 4, 5],
+)
+
+
+Networks = {'DARTS_V1' : DARTS_V1,
+            'DARTS_V2' : DARTS_V2,
+            'DARTS'    : DARTS_V2,
+            'NASNet'   : NASNet,
+            'ENASNet'  : ENASNet,
+            'AmoebaNet': AmoebaNet,
+            'GDAS_V1'  : GDAS_V1,
+            'PNASNet'  : PNASNet,
+            'SETN'     : SETN,
+           }
diff --git a/others/paddlepaddle/lib/models/nas_net.py b/others/paddlepaddle/lib/models/nas_net.py
new file mode 100644
index 0000000..10815c7
--- /dev/null
+++ b/others/paddlepaddle/lib/models/nas_net.py
@@ -0,0 +1,79 @@
+import paddle
+import paddle.fluid as fluid
+from .operations import OPS
+
+
+def AuxiliaryHeadCIFAR(inputs, C, class_num):
+  print ('AuxiliaryHeadCIFAR : inputs-shape : {:}'.format(inputs.shape))
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.pool2d(temp, pool_size=5, pool_stride=3, pool_padding=0, pool_type='avg')
+  temp = fluid.layers.conv2d(temp, filter_size=1, num_filters=128, stride=1, padding=0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  temp = fluid.layers.conv2d(temp, filter_size=1, num_filters=768, stride=2, padding=0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  print ('AuxiliaryHeadCIFAR : last---shape : {:}'.format(temp.shape))
+  predict = fluid.layers.fc(input=temp, size=class_num, act='softmax')
+  return predict
+
+
+def InferCell(name, inputs_prev_prev, inputs_prev, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+  print ('[{:}] C_prev_prev={:} C_prev={:}, C={:}, reduction_prev={:}, reduction={:}'.format(name, C_prev_prev, C_prev, C, reduction_prev, reduction))
+  print ('inputs_prev_prev : {:}'.format(inputs_prev_prev.shape))
+  print ('inputs_prev      : {:}'.format(inputs_prev.shape))
+  inputs_prev_prev = OPS['skip_connect'](inputs_prev_prev, C_prev_prev, C, 2 if reduction_prev else 1)
+  inputs_prev      = OPS['skip_connect'](inputs_prev, C_prev, C, 1)
+  print ('inputs_prev_prev : {:}'.format(inputs_prev_prev.shape))
+  print ('inputs_prev      : {:}'.format(inputs_prev.shape))
+  if reduction: step_ops, concat = genotype.reduce, genotype.reduce_concat
+  else        : step_ops, concat = genotype.normal, genotype.normal_concat
+  states = [inputs_prev_prev, inputs_prev]
+  for istep, operations in enumerate(step_ops):
+    op_a, op_b = operations
+    # the first operation
+    #print ('-->>[{:}/{:}] [{:}] + [{:}]'.format(istep, len(step_ops), op_a, op_b))
+    stride  = 2 if reduction and op_a[1] < 2 else 1
+    tensor1 = OPS[ op_a[0] ](states[op_a[1]], C, C, stride)
+    stride  = 2 if reduction and op_b[1] < 2 else 1
+    tensor2 = OPS[ op_b[0] ](states[op_b[1]], C, C, stride)
+    state   = fluid.layers.elementwise_add(x=tensor1, y=tensor2, act=None)
+    assert tensor1.shape == tensor2.shape, 'invalid shape {:} vs. {:}'.format(tensor1.shape, tensor2.shape)
+    print ('-->>[{:}/{:}] tensor={:} from {:} + {:}'.format(istep, len(step_ops), state.shape, tensor1.shape, tensor2.shape))
+    states.append( state )
+  states_to_cat = [states[x] for x in concat]
+  outputs = fluid.layers.concat(states_to_cat, axis=1)
+  print ('-->> output-shape : {:} from concat={:}'.format(outputs.shape, concat))
+  return outputs
+
+
+
+# NASCifarNet(inputs, 36, 6, 3, 10, 'xxx', True)
+def NASCifarNet(ipt, C, N, stem_multiplier, class_num, genotype, auxiliary):
+  # cifar head module
+  C_curr = stem_multiplier * C
+  stem   = fluid.layers.conv2d(ipt, filter_size=3, num_filters=C_curr, stride=1, padding=1, act=None, bias_attr=False)
+  stem   = fluid.layers.batch_norm(input=stem, act=None, bias_attr=None)
+  print ('stem-shape : {:}'.format(stem.shape))
+  # N + 1 + N + 1 + N cells
+  layer_channels   = [C    ] * N + [C*2 ] + [C*2  ] * N + [C*4 ] + [C*4  ] * N
+  layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
+  C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+  reduction_prev = False
+  auxiliary_pred = None
+
+  cell_results = [stem, stem]
+  for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
+    xstr = '{:02d}/{:02d}'.format(index, len(layer_channels))
+    cell_result    = InferCell(xstr, cell_results[-2], cell_results[-1], genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+    reduction_prev = reduction
+    C_prev_prev, C_prev = C_prev, cell_result.shape[1]
+    cell_results.append( cell_result )
+    if auxiliary and reduction and C_curr == C*4:
+      auxiliary_pred = AuxiliaryHeadCIFAR(cell_result, C_prev, class_num)
+
+  global_P = fluid.layers.pool2d(input=cell_results[-1], pool_size=8, pool_type='avg', pool_stride=1)
+  predicts = fluid.layers.fc(input=global_P, size=class_num, act='softmax')
+  print ('predict-shape : {:}'.format(predicts.shape))
+  if auxiliary_pred is None:
+    return predicts
+  else:
+    return [predicts, auxiliary_pred]
diff --git a/others/paddlepaddle/lib/models/operations.py b/others/paddlepaddle/lib/models/operations.py
new file mode 100644
index 0000000..cbfe2b3
--- /dev/null
+++ b/others/paddlepaddle/lib/models/operations.py
@@ -0,0 +1,91 @@
+import paddle
+import paddle.fluid as fluid
+
+
+OPS = {
+  'none'         : lambda inputs, C_in, C_out, stride: ZERO(inputs, stride),
+  'avg_pool_3x3' : lambda inputs, C_in, C_out, stride: POOL_3x3(inputs, C_in, C_out, stride, 'avg'),
+  'max_pool_3x3' : lambda inputs, C_in, C_out, stride: POOL_3x3(inputs, C_in, C_out, stride, 'max'),
+  'skip_connect' : lambda inputs, C_in, C_out, stride: Identity(inputs, C_in, C_out, stride),
+  'sep_conv_3x3' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 3, stride, 1),
+  'sep_conv_5x5' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 5, stride, 2),
+  'sep_conv_7x7' : lambda inputs, C_in, C_out, stride: SepConv(inputs, C_in, C_out, 7, stride, 3),
+  'dil_conv_3x3' : lambda inputs, C_in, C_out, stride: DilConv(inputs, C_in, C_out, 3, stride, 2, 2),
+  'dil_conv_5x5' : lambda inputs, C_in, C_out, stride: DilConv(inputs, C_in, C_out, 5, stride, 4, 2),
+  'conv_3x1_1x3' : lambda inputs, C_in, C_out, stride: Conv313(inputs, C_in, C_out, stride),
+  'conv_7x1_1x7' : lambda inputs, C_in, C_out, stride: Conv717(inputs, C_in, C_out, stride),
+}
+
+
+def ReLUConvBN(inputs, C_in, C_out, kernel, stride, padding):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_out, stride=stride, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def ZERO(inputs, stride):
+  if stride == 1:
+    return inputs * 0
+  elif stride == 2:
+    return fluid.layers.pool2d(inputs, filter_size=2, pool_stride=2, pool_padding=0, pool_type='avg') * 0
+  else:
+    raise ValueError('invalid stride of {:} not [1, 2]'.format(stride))
+
+
+def Identity(inputs, C_in, C_out, stride):
+  if C_in == C_out and stride == 1:
+    return inputs
+  elif stride == 1:
+    return ReLUConvBN(inputs, C_in, C_out, 1, 1, 0)
+  else:
+    temp1 = fluid.layers.relu(inputs)
+    temp2 = fluid.layers.pad2d(input=temp1, paddings=[0, 1, 0, 1], mode='reflect')
+    temp2 = fluid.layers.slice(temp2, axes=[0, 1, 2, 3], starts=[0, 0, 1, 1], ends=[999, 999, 999, 999])
+    temp1 = fluid.layers.conv2d(temp1, filter_size=1, num_filters=C_out//2, stride=stride, padding=0, act=None, bias_attr=False)
+    temp2 = fluid.layers.conv2d(temp2, filter_size=1, num_filters=C_out-C_out//2, stride=stride, padding=0, act=None, bias_attr=False)
+    temp  = fluid.layers.concat([temp1,temp2], axis=1)
+    return fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+
+
+def POOL_3x3(inputs, C_in, C_out, stride, mode):
+  if C_in == C_out:
+    xinputs = inputs
+  else:
+    xinputs = ReLUConvBN(inputs, C_in, C_out, 1, 1, 0)
+  return fluid.layers.pool2d(xinputs, pool_size=3, pool_stride=stride, pool_padding=1, pool_type=mode)
+
+
+def SepConv(inputs, C_in, C_out, kernel, stride, padding):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=stride, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_in , stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act='relu', bias_attr=None)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=     1, padding=padding, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_out, stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None  , bias_attr=None)
+  return temp
+
+
+def DilConv(inputs, C_in, C_out, kernel, stride, padding, dilation):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=kernel, num_filters=C_in , stride=stride, padding=padding, dilation=dilation, act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=     1, num_filters=C_out, stride=     1, padding=      0, act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def Conv313(inputs, C_in, C_out, stride):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=(1,3), num_filters=C_out, stride=(1,stride), padding=(0,1), act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=(3,1), num_filters=C_out, stride=(stride,1), padding=(1,0), act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
+
+
+def Conv717(inputs, C_in, C_out, stride):
+  temp = fluid.layers.relu(inputs)
+  temp = fluid.layers.conv2d(temp, filter_size=(1,7), num_filters=C_out, stride=(1,stride), padding=(0,3), act=None, bias_attr=False)
+  temp = fluid.layers.conv2d(temp, filter_size=(7,1), num_filters=C_out, stride=(stride,1), padding=(3,0), act=None, bias_attr=False)
+  temp = fluid.layers.batch_norm(input=temp, act=None, bias_attr=None)
+  return temp
diff --git a/others/paddlepaddle/lib/models/resnet.py b/others/paddlepaddle/lib/models/resnet.py
new file mode 100644
index 0000000..5c15fab
--- /dev/null
+++ b/others/paddlepaddle/lib/models/resnet.py
@@ -0,0 +1,65 @@
+import paddle
+import paddle.fluid as fluid
+
+
+def conv_bn_layer(input,
+          ch_out,
+          filter_size,
+          stride,
+          padding,
+          act='relu',
+          bias_attr=False):
+  tmp = fluid.layers.conv2d(
+    input=input,
+    filter_size=filter_size,
+    num_filters=ch_out,
+    stride=stride,
+    padding=padding,
+    act=None,
+    bias_attr=bias_attr)
+  return fluid.layers.batch_norm(input=tmp, act=act)
+
+
+def shortcut(input, ch_in, ch_out, stride):
+  if stride == 2:
+    temp = fluid.layers.pool2d(input, pool_size=2, pool_type='avg', pool_stride=2)
+    temp = fluid.layers.conv2d(temp , filter_size=1, num_filters=ch_out, stride=1, padding=0, act=None, bias_attr=None)
+    return temp
+  elif ch_in != ch_out:
+    return conv_bn_layer(input, ch_out, 1, stride, 0, None, None)
+  else:
+    return input
+
+
+def basicblock(input, ch_in, ch_out, stride):
+  tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
+  tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
+  short = shortcut(input, ch_in, ch_out, stride)
+  return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+
+
+def layer_warp(block_func, input, ch_in, ch_out, count, stride):
+  tmp = block_func(input, ch_in, ch_out, stride)
+  for i in range(1, count):
+    tmp = block_func(tmp, ch_out, ch_out, 1)
+  return tmp
+
+
+def resnet_cifar(ipt, depth, class_num):
+  # depth should be one of 20, 32, 44, 56, 110, 1202
+  assert (depth - 2) % 6 == 0
+  n = (depth - 2) // 6
+  print('[resnet] depth : {:}, class_num : {:}'.format(depth, class_num))
+  conv1 = conv_bn_layer(ipt, ch_out=16, filter_size=3, stride=1, padding=1)
+  print('conv-1 : shape = {:}'.format(conv1.shape))
+  res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+  print('res--1 : shape = {:}'.format(res1.shape))
+  res2 = layer_warp(basicblock, res1 , 16, 32, n, 2)
+  print('res--2 : shape = {:}'.format(res2.shape))
+  res3 = layer_warp(basicblock, res2 , 32, 64, n, 2)
+  print('res--3 : shape = {:}'.format(res3.shape))
+  pool = fluid.layers.pool2d(input=res3, pool_size=8, pool_type='avg', pool_stride=1)
+  print('pool   : shape = {:}'.format(pool.shape))
+  predict = fluid.layers.fc(input=pool, size=class_num, act='softmax')
+  print('predict: shape = {:}'.format(predict.shape))
+  return predict
diff --git a/others/paddlepaddle/lib/utils/__init__.py b/others/paddlepaddle/lib/utils/__init__.py
new file mode 100644
index 0000000..2c02373
--- /dev/null
+++ b/others/paddlepaddle/lib/utils/__init__.py
@@ -0,0 +1,6 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .meter        import AverageMeter
+from .time_utils   import time_for_file, time_string, time_string_short, time_print, convert_size2str, convert_secs2time
+from .data_utils   import reader_creator
diff --git a/others/paddlepaddle/lib/utils/data_utils.py b/others/paddlepaddle/lib/utils/data_utils.py
new file mode 100644
index 0000000..305c0e7
--- /dev/null
+++ b/others/paddlepaddle/lib/utils/data_utils.py
@@ -0,0 +1,64 @@
+import random, tarfile
+import numpy, six
+from six.moves import cPickle as pickle
+from PIL import Image, ImageOps
+
+
+def train_cifar_augmentation(image):
+  # flip
+  if random.random() < 0.5: image1 = image.transpose(Image.FLIP_LEFT_RIGHT)
+  else:                     image1 = image
+  # random crop
+  image2 = ImageOps.expand(image1, border=4, fill=0)
+  i = random.randint(0, 40 - 32)
+  j = random.randint(0, 40 - 32)
+  image3 = image2.crop((j,i,j+32,i+32))
+  # to numpy
+  image3 = numpy.array(image3) / 255.0
+  mean   = numpy.array([x / 255 for x in [125.3, 123.0, 113.9]]).reshape(1, 1, 3)
+  std    = numpy.array([x / 255 for x in [63.0, 62.1, 66.7]]).reshape(1, 1, 3)
+  return (image3 - mean) / std
+
+
+def valid_cifar_augmentation(image):
+  image3 = numpy.array(image) / 255.0
+  mean   = numpy.array([x / 255 for x in [125.3, 123.0, 113.9]]).reshape(1, 1, 3)
+  std    = numpy.array([x / 255 for x in [63.0, 62.1, 66.7]]).reshape(1, 1, 3)
+  return (image3 - mean) / std
+
+
+def reader_creator(filename, sub_name, is_train, cycle=False):
+  def read_batch(batch):
+    data = batch[six.b('data')]
+    labels = batch.get(
+      six.b('labels'), batch.get(six.b('fine_labels'), None))
+    assert labels is not None
+    for sample, label in six.moves.zip(data, labels):
+      sample = sample.reshape(3, 32, 32)
+      sample = sample.transpose((1, 2, 0))
+      image  = Image.fromarray(sample)
+      if is_train:
+        ximage = train_cifar_augmentation(image)
+      else:
+        ximage = valid_cifar_augmentation(image)
+      ximage = ximage.transpose((2, 0, 1))
+      yield ximage.astype(numpy.float32), int(label)
+
+  def reader():
+    with tarfile.open(filename, mode='r') as f:
+      names = (each_item.name for each_item in f
+           if sub_name in each_item.name)
+
+      while True:
+        for name in names:
+          if six.PY2:
+            batch = pickle.load(f.extractfile(name))
+          else:
+            batch = pickle.load(
+              f.extractfile(name), encoding='bytes')
+          for item in read_batch(batch):
+            yield item
+        if not cycle:
+          break
+
+  return reader
diff --git a/others/paddlepaddle/lib/utils/meter.py b/others/paddlepaddle/lib/utils/meter.py
new file mode 100644
index 0000000..1e3d02d
--- /dev/null
+++ b/others/paddlepaddle/lib/utils/meter.py
@@ -0,0 +1,26 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import time, sys
+import numpy as np
+
+
+class AverageMeter(object):     
+  """Computes and stores the average and current value"""    
+  def __init__(self):   
+    self.reset()
+  
+  def reset(self):
+    self.val   = 0.0
+    self.avg   = 0.0
+    self.sum   = 0.0
+    self.count = 0.0
+  
+  def update(self, val, n=1): 
+    self.val = val    
+    self.sum += val * n     
+    self.count += n
+    self.avg = self.sum / self.count    
+
+  def __repr__(self):
+    return ('{name}(val={val}, avg={avg}, count={count})'.format(name=self.__class__.__name__, **self.__dict__))
diff --git a/others/paddlepaddle/lib/utils/time_utils.py b/others/paddlepaddle/lib/utils/time_utils.py
new file mode 100644
index 0000000..7886fcc
--- /dev/null
+++ b/others/paddlepaddle/lib/utils/time_utils.py
@@ -0,0 +1,52 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+import time, sys
+import numpy as np
+
+def time_for_file():
+  ISOTIMEFORMAT='%d-%h-at-%H-%M-%S'
+  return '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+
+def time_string():
+  ISOTIMEFORMAT='%Y-%m-%d %X'
+  string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def time_string_short():
+  ISOTIMEFORMAT='%Y%m%d'
+  string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def time_print(string, is_print=True):
+  if (is_print):
+    print('{} : {}'.format(time_string(), string))
+
+def convert_size2str(torch_size):
+  dims = len(torch_size)
+  string = '['
+  for idim in range(dims):
+    string = string + ' {}'.format(torch_size[idim])
+  return string + ']'
+  
+def convert_secs2time(epoch_time, return_str=False):    
+  need_hour = int(epoch_time / 3600)
+  need_mins = int((epoch_time - 3600*need_hour) / 60)  
+  need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
+  if return_str:
+    str = '[{:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
+    return str
+  else:
+    return need_hour, need_mins, need_secs
+
+def print_log(print_string, log):
+  #if isinstance(log, Logger): log.log('{:}'.format(print_string))
+  if hasattr(log, 'log'): log.log('{:}'.format(print_string))
+  else:
+    print("{:}".format(print_string))
+    if log is not None:
+      log.write('{:}\n'.format(print_string))
+      log.flush()
diff --git a/others/paddlepaddle/scripts/base-train.sh b/others/paddlepaddle/scripts/base-train.sh
new file mode 100644
index 0000000..f4eed75
--- /dev/null
+++ b/others/paddlepaddle/scripts/base-train.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+echo script name: $0
+echo $# arguments
+if [ "$#" -ne 3 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 3 parameters for GPU and dataset and the-model-name"
+  exit 1
+fi
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+GPU=$1
+dataset=$2
+model=$3
+
+save_dir=snapshots/${dataset}-${model}
+
+export FLAGS_fraction_of_gpu_memory_to_use="0.005"
+export FLAGS_free_idle_memory=True
+
+CUDA_VISIBLE_DEVICES=${GPU} python train_cifar.py \
+	--data_path $TORCH_HOME/cifar.python/${dataset}-python.tar.gz \
+	--log_dir ${save_dir} \
+	--dataset ${dataset}  \
+	--model_name ${model} \
+	--lr 0.1 --epochs 300 --batch_size 256 --step_each_epoch 196
diff --git a/others/paddlepaddle/scripts/train-nas.sh b/others/paddlepaddle/scripts/train-nas.sh
new file mode 100644
index 0000000..e2bdde1
--- /dev/null
+++ b/others/paddlepaddle/scripts/train-nas.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# bash ./scripts/base-train.sh 0 cifar-10 ResNet110
+echo script name: $0
+echo $# arguments
+if [ "$#" -ne 3 ] ;then
+  echo "Input illegal number of parameters " $#
+  echo "Need 3 parameters for GPU and dataset and the-model-name"
+  exit 1
+fi
+if [ "$TORCH_HOME" = "" ]; then
+  echo "Must set TORCH_HOME envoriment variable for data dir saving"
+  exit 1
+else
+  echo "TORCH_HOME : $TORCH_HOME"
+fi
+
+GPU=$1
+dataset=$2
+model=$3
+
+save_dir=snapshots/${dataset}-${model}
+
+export FLAGS_fraction_of_gpu_memory_to_use="0.005"
+export FLAGS_free_idle_memory=True
+
+CUDA_VISIBLE_DEVICES=${GPU} python train_cifar.py \
+	--data_path $TORCH_HOME/cifar.python/${dataset}-python.tar.gz \
+	--log_dir ${save_dir} \
+	--dataset ${dataset}  \
+	--model_name ${model} \
+	--lr 0.025 --epochs 600 --batch_size 96 --step_each_epoch 521
diff --git a/others/paddlepaddle/train_cifar.py b/others/paddlepaddle/train_cifar.py
new file mode 100644
index 0000000..b501380
--- /dev/null
+++ b/others/paddlepaddle/train_cifar.py
@@ -0,0 +1,189 @@
+import os, sys, numpy as np, argparse
+from pathlib import Path
+import paddle.fluid as fluid
+import math, time, paddle
+import paddle.fluid.layers.ops as ops
+#from tb_paddle import SummaryWriter
+
+lib_dir = (Path(__file__).parent / 'lib').resolve()
+if str(lib_dir) not in sys.path: sys.path.insert(0, str(lib_dir))
+from models import resnet_cifar, NASCifarNet, Networks
+from utils  import AverageMeter, time_for_file, time_string, convert_secs2time
+from utils  import reader_creator
+
+
+def inference_program(model_name, num_class):
+  # The image is 32 * 32 with RGB representation.
+  data_shape = [3, 32, 32]
+  images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+
+  if model_name == 'ResNet20':
+    predict = resnet_cifar(images,  20, num_class)
+  elif model_name == 'ResNet32':
+    predict = resnet_cifar(images,  32, num_class)
+  elif model_name == 'ResNet110':
+    predict = resnet_cifar(images, 110, num_class)
+  else:
+    predict = NASCifarNet(images, 36, 6, 3, num_class, Networks[model_name], True)
+  return predict
+
+
+def train_program(predict):
+  label   = fluid.layers.data(name='label', shape=[1], dtype='int64')
+  if isinstance(predict, (list, tuple)):
+    predict, aux_predict = predict
+    x_losses   = fluid.layers.cross_entropy(input=predict, label=label)
+    aux_losses = fluid.layers.cross_entropy(input=aux_predict, label=label)
+    x_loss     = fluid.layers.mean(x_losses)
+    aux_loss   = fluid.layers.mean(aux_losses)
+    loss = x_loss + aux_loss * 0.4
+    accuracy = fluid.layers.accuracy(input=predict, label=label)
+  else:
+    losses  = fluid.layers.cross_entropy(input=predict, label=label)
+    loss    = fluid.layers.mean(losses)
+    accuracy = fluid.layers.accuracy(input=predict, label=label)
+  return [loss, accuracy]
+
+
+# For training test cost
+def evaluation(program, reader, fetch_list, place):
+  feed_var_list = [program.global_block().var('pixel'), program.global_block().var('label')]
+  feeder_test   = fluid.DataFeeder(feed_list=feed_var_list, place=place)
+  test_exe      = fluid.Executor(place)
+  losses, accuracies = AverageMeter(), AverageMeter()
+  for tid, test_data in enumerate(reader()):
+    loss, acc = test_exe.run(program=program, feed=feeder_test.feed(test_data), fetch_list=fetch_list)
+    losses.update(float(loss), len(test_data))
+    accuracies.update(float(acc)*100, len(test_data))
+  return losses.avg, accuracies.avg
+
+
+def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
+  """Applies cosine decay to the learning rate.
+  lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
+  decrease lr for every mini-batch and start with warmup.
+  """
+  from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
+  from paddle.fluid.initializer import init_on_cpu
+  global_step = _decay_step_counter()
+  lr = fluid.layers.tensor.create_global_var(
+      shape=[1],
+      value=0.0,
+      dtype='float32',
+      persistable=True,
+      name="learning_rate")
+
+  warmup_epoch = fluid.layers.fill_constant(
+      shape=[1], dtype='float32', value=float(5), force_cpu=True)
+
+  with init_on_cpu():
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+      with switch.case(epoch < warmup_epoch):
+        decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch))
+        fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+      with switch.default():
+        decayed_lr = learning_rate * \
+          (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+        fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+  return lr
+
+
+def main(xargs):
+
+  save_dir = Path(xargs.log_dir) / time_for_file()
+  save_dir.mkdir(parents=True, exist_ok=True)
+  
+  print ('save dir : {:}'.format(save_dir))
+  print ('xargs : {:}'.format(xargs))
+
+  if xargs.dataset == 'cifar-10':
+    train_data = reader_creator(xargs.data_path, 'data_batch', True , False)
+    test__data = reader_creator(xargs.data_path, 'test_batch', False, False)
+    class_num  = 10
+    print ('create cifar-10  dataset')
+  elif xargs.dataset == 'cifar-100':
+    train_data = reader_creator(xargs.data_path, 'train', True , False)
+    test__data = reader_creator(xargs.data_path, 'test' , False, False)
+    class_num  = 100
+    print ('create cifar-100 dataset')
+  else:
+    raise ValueError('invalid dataset : {:}'.format(xargs.dataset))
+  
+  train_reader = paddle.batch(
+    paddle.reader.shuffle(train_data, buf_size=5000),
+    batch_size=xargs.batch_size)
+
+  # Reader for testing. A separated data set for testing.
+  test_reader = paddle.batch(test__data, batch_size=xargs.batch_size)
+
+  place = fluid.CUDAPlace(0)
+
+  main_program = fluid.default_main_program()
+  star_program = fluid.default_startup_program()
+
+  # programs
+  predict      = inference_program(xargs.model_name, class_num)
+  [loss, accuracy] = train_program(predict)
+  print ('training program setup done')
+  test_program = main_program.clone(for_test=True)
+  print ('testing  program setup done')
+
+  #infer_writer = SummaryWriter( str(save_dir / 'infer') )
+  #infer_writer.add_paddle_graph(fluid_program=fluid.default_main_program(), verbose=True)
+  #infer_writer.close()
+  #print(test_program.to_string(True))
+
+  #learning_rate = fluid.layers.cosine_decay(learning_rate=xargs.lr, step_each_epoch=xargs.step_each_epoch, epochs=xargs.epochs)
+  #learning_rate = fluid.layers.cosine_decay(learning_rate=0.1, step_each_epoch=196, epochs=300)
+  learning_rate = cosine_decay_with_warmup(xargs.lr, xargs.step_each_epoch, xargs.epochs)
+  optimizer = fluid.optimizer.Momentum(
+            learning_rate=learning_rate,
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(0.0005),
+            use_nesterov=True)
+  optimizer.minimize( loss )
+
+  exe = fluid.Executor(place)
+
+  feed_var_list_loop = [main_program.global_block().var('pixel'), main_program.global_block().var('label')]
+  feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
+  exe.run(star_program)
+
+  start_time, epoch_time = time.time(), AverageMeter()
+  for iepoch in range(xargs.epochs):
+    losses, accuracies, steps = AverageMeter(), AverageMeter(), 0
+    for step_id, train_data in enumerate(train_reader()):
+      tloss, tacc, xlr = exe.run(main_program, feed=feeder.feed(train_data), fetch_list=[loss, accuracy, learning_rate])
+      tloss, tacc, xlr = float(tloss), float(tacc) * 100, float(xlr)
+      steps += 1
+      losses.update(tloss, len(train_data))
+      accuracies.update(tacc, len(train_data))
+      if step_id % 100 == 0:
+        print('{:} [{:03d}/{:03d}] [{:03d}] lr = {:.7f}, loss = {:.4f} ({:.4f}), accuracy = {:.2f} ({:.2f}), error={:.2f}'.format(time_string(), iepoch, xargs.epochs, step_id, xlr, tloss, losses.avg, tacc, accuracies.avg, 100-accuracies.avg))
+    test_loss, test_acc = evaluation(test_program, test_reader, [loss, accuracy], place)
+    need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (xargs.epochs-iepoch), True) )
+    print('{:}x[{:03d}/{:03d}] {:} train-loss = {:.4f}, train-accuracy = {:.2f}, test-loss = {:.4f}, test-accuracy = {:.2f} test-error = {:.2f} [{:} steps per epoch]\n'.format(time_string(), iepoch, xargs.epochs, need_time, losses.avg, accuracies.avg, test_loss, test_acc, 100-test_acc, steps))
+    if isinstance(predict, list):
+      fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"],   predict, exe)
+    else:
+      fluid.io.save_inference_model(str(save_dir / 'inference_model'), ["pixel"], [predict], exe)
+    # measure elapsed time
+    epoch_time.update(time.time() - start_time)
+    start_time = time.time()
+
+  print('finish training and evaluation with {:} epochs in {:}'.format(xargs.epochs, convert_secs2time(epoch_time.sum, True)))
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser(description='Train.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument('--log_dir' ,       type=str,                   help='Save dir.')
+  parser.add_argument('--dataset',        type=str,                   help='The dataset name.')
+  parser.add_argument('--data_path',      type=str,                   help='The dataset path.')
+  parser.add_argument('--model_name',     type=str,                   help='The model name.')
+  parser.add_argument('--lr',             type=float,                 help='The learning rate.')
+  parser.add_argument('--batch_size',     type=int,                   help='The batch size.')
+  parser.add_argument('--step_each_epoch',type=int,                   help='The batch size.')
+  parser.add_argument('--epochs'    ,     type=int,                   help='The total training epochs.')
+  args = parser.parse_args()
+  main(args)