update NAS-Bench-102 baselines

This commit is contained in:
D-X-Y 2019-12-25 10:30:50 +11:00
parent 44a0d51449
commit 1d5e8debad
5 changed files with 48 additions and 17 deletions

View File

@ -62,11 +62,12 @@ class MyWorker(Worker):
def compute(self, config, budget, **kwargs):
structure = self.convert_func( config )
reward = train_and_eval(structure, self.nas_bench, None)
reward, time_cost = train_and_eval(structure, self.nas_bench, None)
import pdb; pdb.set_trace()
self.test_time += 1
return ({
'loss': float(100-reward),
'info': None})
'info': time_cost})
def main(xargs, nas_bench):
@ -121,7 +122,7 @@ def main(xargs, nas_bench):
bohb = BOHB(configspace=cs,
run_id=hb_run_id,
eta=3, min_budget=3, max_budget=108,
eta=3, min_budget=3, max_budget=xargs.time_budget,
nameserver=ns_host,
nameserver_port=ns_port,
num_samples=xargs.num_samples,
@ -130,6 +131,7 @@ def main(xargs, nas_bench):
# optimization_strategy=xargs.strategy, num_samples=xargs.num_samples,
results = bohb.run(xargs.n_iters, min_n_workers=num_workers)
import pdb; pdb.set_trace()
bohb.shutdown(shutdown_workers=True)
NS.shutdown()
@ -160,6 +162,7 @@ if __name__ == '__main__':
parser.add_argument('--max_nodes', type=int, help='The maximum number of nodes.')
parser.add_argument('--channel', type=int, help='The number of channels.')
parser.add_argument('--num_cells', type=int, help='The number of cells in one stage.')
parser.add_argument('--time_budget', type=int, help='The total time cost budge for searching (in seconds).')
# BOHB
parser.add_argument('--strategy', default="sampling", type=str, nargs='?', help='optimization strategy for the acquisition function')
parser.add_argument('--min_bandwidth', default=.3, type=float, nargs='?', help='minimum bandwidth for KDE')

View File

@ -82,6 +82,16 @@ def valid_func(xloader, network, criterion):
return arch_losses.avg, arch_top1.avg, arch_top5.avg
def search_find_best(valid_loader, network, criterion, select_num):
best_arch, best_acc = None, -1
for iarch in range(select_num):
arch = network.module.random_genotype( True )
valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(valid_loader, network, criterion)
if best_arch is None or best_acc < valid_a_top1:
best_arch, best_acc = arch, valid_a_top1
return best_arch
def main(xargs):
assert torch.cuda.is_available(), 'CUDA is not available.'
torch.backends.cudnn.enabled = True
@ -143,6 +153,7 @@ def main(xargs):
last_info = torch.load(last_info)
start_epoch = last_info['epoch']
checkpoint = torch.load(last_info['last_checkpoint'])
genotypes = checkpoint['genotypes']
valid_accuracies = checkpoint['valid_accuracies']
search_model.load_state_dict( checkpoint['search_model'] )
w_scheduler.load_state_dict ( checkpoint['w_scheduler'] )
@ -150,7 +161,7 @@ def main(xargs):
logger.log("=> loading checkpoint of the last-info '{:}' start with {:}-th epoch.".format(last_info, start_epoch))
else:
logger.log("=> do not find the last-info file : {:}".format(last_info))
start_epoch, valid_accuracies = 0, {'best': -1}
start_epoch, valid_accuracies, genotypes = 0, {'best': -1}, {}
# start training
start_time, search_time, epoch_time, total_epoch = time.time(), AverageMeter(), AverageMeter(), config.epochs + config.warmup
@ -160,11 +171,14 @@ def main(xargs):
epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch)
logger.log('\n[Search the {:}-th epoch] {:}, LR={:}'.format(epoch_str, need_time, min(w_scheduler.get_lr())))
# selected_arch = search_find_best(valid_loader, network, criterion, xargs.select_num)
search_w_loss, search_w_top1, search_w_top5 = search_func(search_loader, network, criterion, w_scheduler, w_optimizer, epoch_str, xargs.print_freq, logger)
search_time.update(time.time() - start_time)
logger.log('[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s'.format(epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum))
valid_a_loss , valid_a_top1 , valid_a_top5 = valid_func(valid_loader, network, criterion)
logger.log('[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
cur_arch = search_find_best(valid_loader, network, criterion, xargs.select_num)
genotypes[epoch] = cur_arch
# check the best accuracy
valid_accuracies[epoch] = valid_a_top1
if valid_a_top1 > valid_accuracies['best']:
@ -178,6 +192,7 @@ def main(xargs):
'search_model': search_model.state_dict(),
'w_optimizer' : w_optimizer.state_dict(),
'w_scheduler' : w_scheduler.state_dict(),
'genotypes' : genotypes,
'valid_accuracies' : valid_accuracies},
model_base_path, logger)
last_info = save_checkpoint({
@ -188,6 +203,7 @@ def main(xargs):
if find_best:
logger.log('<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.'.format(epoch_str, valid_a_top1))
copy_checkpoint(model_base_path, model_best_path, logger)
if api is not None: logger.log('{:}'.format(api.query_by_arch( genotypes[epoch] )))
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
@ -202,7 +218,6 @@ def main(xargs):
logger.log('final evaluation [{:02d}/{:02d}] : {:} : accuracy={:.2f}%, loss={:.3f}'.format(iarch, xargs.select_num, arch, valid_a_top1, valid_a_loss))
if best_arch is None or best_acc < valid_a_top1:
best_arch, best_acc = arch, valid_a_top1
search_time.update(time.time() - start_time)
logger.log('RANDOM-NAS finds the best one : {:} with accuracy={:.2f}%, with {:.1f} s.'.format(best_arch, best_acc, search_time.sum))
if api is not None: logger.log('{:}'.format( api.query_by_arch(best_arch) ))

View File

@ -17,7 +17,7 @@ from datasets import get_datasets, SearchDataset
from procedures import prepare_seed, prepare_logger, save_checkpoint, copy_checkpoint, get_optim_scheduler
from utils import get_model_infos, obtain_accuracy
from log_utils import AverageMeter, time_string, convert_secs2time
from nas_102_api import NASBench102API
from nas_102_api import NASBench102API as API
from models import CellStructure, get_search_spaces
from R_EA import train_and_eval
@ -132,10 +132,18 @@ def main(xargs, nas_bench):
# REINFORCE
# attempts = 0
for istep in range(xargs.RL_steps):
logger.log('Will start searching with time budget of {:} s.'.format(xargs.time_budget))
total_steps, total_costs = 0, 0
#for istep in range(xargs.RL_steps):
while total_costs < xargs.time_budget:
start_time = time.time()
log_prob, action = select_action( policy )
arch = policy.generate_arch( action )
reward = train_and_eval(arch, nas_bench, extra_info)
reward, cost_time = train_and_eval(arch, nas_bench, extra_info)
# accumulate time
if total_costs + cost_time < xargs.time_budget:
total_costs += cost_time
else: break
baseline.update(reward)
# calculate loss
@ -143,13 +151,15 @@ def main(xargs, nas_bench):
optimizer.zero_grad()
policy_loss.backward()
optimizer.step()
logger.log('step [{:3d}/{:3d}] : average-reward={:.3f} : policy_loss={:.4f} : {:}'.format(istep, xargs.RL_steps, baseline.value(), policy_loss.item(), policy.genotype()))
# accumulate time
total_costs += time.time() - start_time
total_steps += 1
logger.log('step [{:3d}] : average-reward={:.3f} : policy_loss={:.4f} : {:}'.format(total_steps, baseline.value(), policy_loss.item(), policy.genotype()))
#logger.log('----> {:}'.format(policy.arch_parameters))
logger.log('')
#logger.log('')
best_arch = policy.genotype()
logger.log('REINFORCE finish with {:} steps and {:.1f} s.'.format(total_steps, total_costs))
info = nas_bench.query_by_arch( best_arch )
if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch))
else : logger.log('{:}'.format(info))
@ -169,8 +179,9 @@ if __name__ == '__main__':
parser.add_argument('--channel', type=int, help='The number of channels.')
parser.add_argument('--num_cells', type=int, help='The number of cells in one stage.')
parser.add_argument('--learning_rate', type=float, help='The learning rate for REINFORCE.')
parser.add_argument('--RL_steps', type=int, help='The steps for REINFORCE.')
#parser.add_argument('--RL_steps', type=int, help='The steps for REINFORCE.')
parser.add_argument('--EMA_momentum', type=float, help='The momentum value for EMA.')
parser.add_argument('--time_budget', type=int, help='The total time cost budge for searching (in seconds).')
# log
parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
parser.add_argument('--save_dir', type=str, help='Folder to save checkpoints and log.')
@ -183,7 +194,7 @@ if __name__ == '__main__':
nas_bench = None
else:
print ('{:} build NAS-Benchmark-API from {:}'.format(time_string(), args.arch_nas_dataset))
nas_bench = AANASBenchAPI(args.arch_nas_dataset)
nas_bench = API(args.arch_nas_dataset)
if args.rand_seed < 0:
save_dir, all_indexes, num = None, [], 500
for i in range(num):

View File

@ -34,5 +34,6 @@ OMP_NUM_THREADS=4 python ./exps/algos/BOHB.py \
--dataset ${dataset} --data_path ${data_path} \
--search_space_name ${space} \
--arch_nas_dataset ${TORCH_HOME}/NAS-Bench-102-v1_0-e61699.pth \
--n_iters 6 --num_samples 3 \
--time_budget 12000 \
--n_iters 100 --num_samples 4 --random_fraction 0 \
--workers 4 --print_freq 200 --rand_seed ${seed}

View File

@ -34,5 +34,6 @@ OMP_NUM_THREADS=4 python ./exps/algos/reinforce.py \
--dataset ${dataset} --data_path ${data_path} \
--search_space_name ${space} \
--arch_nas_dataset ${TORCH_HOME}/NAS-Bench-102-v1_0-e61699.pth \
--learning_rate 0.001 --RL_steps 100 --EMA_momentum 0.9 \
--time_budget 12000 \
--learning_rate 0.001 --EMA_momentum 0.9 \
--workers 4 --print_freq 200 --rand_seed ${seed}