Update q-config and black for procedures/utils

This commit is contained in:
D-X-Y 2021-03-07 03:09:47 +00:00
parent 349d9fcc9f
commit 55c9734c31
22 changed files with 1938 additions and 1390 deletions

View File

@ -147,5 +147,8 @@ If you find that this project helps your research, please consider citing the re
If you want to contribute to this repo, please see [CONTRIBUTING.md](.github/CONTRIBUTING.md).
Besides, please follow [CODE-OF-CONDUCT.md](.github/CODE-OF-CONDUCT.md).
We use `[black](https://github.com/psf/black)` for Python code formatter.
Please use `black . -l 120`.
# License
The entire codebase is under the [MIT license](LICENSE.md).

View File

@ -0,0 +1,82 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market all
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: DNNModelPytorch
module_path: qlib.contrib.model.pytorch_nn
kwargs:
loss: mse
input_dim: 360
output_dim: 1
lr: 0.002
lr_decay: 0.96
lr_decay_steps: 100
optimizer: adam
max_steps: 8000
batch_size: 4096
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@ -0,0 +1,85 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market all
benchmark: &benchmark SH000300
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors:
- class: RobustZScoreNorm
kwargs:
fields_group: feature
clip_outlier: true
- class: Fillna
kwargs:
fields_group: feature
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy.strategy
kwargs:
topk: 50
n_drop: 5
backtest:
verbose: False
limit_threshold: 0.095
account: 100000000
benchmark: *benchmark
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: SFM
module_path: qlib.contrib.model.pytorch_sfm
kwargs:
d_feat: 6
hidden_size: 64
output_dim: 32
freq_dim: 25
dropout_W: 0.5
dropout_U: 0.5
n_epochs: 20
lr: 1e-3
batch_size: 1600
early_stop: 20
eval_steps: 5
loss: mse
optimizer: adam
GPU: 0
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs: {}
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@ -4,6 +4,8 @@
# python exps/trading/baselines.py --alg GRU
# python exps/trading/baselines.py --alg LSTM
# python exps/trading/baselines.py --alg ALSTM
# python exps/trading/baselines.py --alg MLP
# python exps/trading/baselines.py --alg SFM
# python exps/trading/baselines.py --alg XGBoost
# python exps/trading/baselines.py --alg LightGBM
#####################################################
@ -17,6 +19,10 @@ lib_dir = (Path(__file__).parent / ".." / ".." / "lib").resolve()
if str(lib_dir) not in sys.path:
sys.path.insert(0, str(lib_dir))
from procedures.q_exps import update_gpu
from procedures.q_exps import update_market
from procedures.q_exps import run_exp
import qlib
from qlib.utils import init_instance_by_config
from qlib.workflow import R
@ -31,15 +37,19 @@ def retrieve_configs():
alg2names = OrderedDict()
alg2names["GRU"] = "workflow_config_gru_Alpha360.yaml"
alg2names["LSTM"] = "workflow_config_lstm_Alpha360.yaml"
alg2names["MLP"] = "workflow_config_mlp_Alpha360.yaml"
# A dual-stage attention-based recurrent neural network for time series prediction, IJCAI-2017
alg2names["ALSTM"] = "workflow_config_alstm_Alpha360.yaml"
# XGBoost: A Scalable Tree Boosting System, KDD-2016
alg2names["XGBoost"] = "workflow_config_xgboost_Alpha360.yaml"
# LightGBM: A Highly Efficient Gradient Boosting Decision Tree, NeurIPS-2017
alg2names["LightGBM"] = "workflow_config_lightgbm_Alpha360.yaml"
# State Frequency Memory (SFM): Stock Price Prediction via Discovering Multi-Frequency Trading Patterns, KDD-2017
alg2names["SFM"] = "workflow_config_sfm_Alpha360.yaml"
# find the yaml paths
alg2paths = OrderedDict()
print("Start retrieving the algorithm configurations")
for idx, (alg, name) in enumerate(alg2names.items()):
path = config_dir / name
assert path.exists(), "{:} does not exist.".format(path)
@ -48,56 +58,6 @@ def retrieve_configs():
return alg2paths
def update_gpu(config, gpu):
config = config.copy()
if "GPU" in config["task"]["model"]:
config["task"]["model"]["GPU"] = gpu
return config
def update_market(config, market):
config = config.copy()
config["market"] = market
config["data_handler_config"]["instruments"] = market
return config
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):
# model initiaiton
print("")
print("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri))
print("dataset={:}".format(dataset))
model = init_instance_by_config(task_config["model"])
# start exp
with R.start(experiment_name=experiment_name, recorder_name=recorder_name, uri=uri):
log_file = R.get_recorder().root_uri / "{:}.log".format(experiment_name)
set_log_basic_config(log_file)
# train model
R.log_params(**flatten_dict(task_config))
model.fit(dataset)
recorder = R.get_recorder()
R.save_objects(**{"model.pkl": model})
# generate records: prediction, backtest, and analysis
for record in task_config["record"]:
record = record.copy()
if record["class"] == "SignalRecord":
srconf = {"model": model, "dataset": dataset, "recorder": recorder}
record["kwargs"].update(srconf)
sr = init_instance_by_config(record)
sr.generate()
else:
rconf = {"recorder": recorder}
record["kwargs"].update(rconf)
ar = init_instance_by_config(record)
ar.generate()
def main(xargs, exp_yaml):
assert Path(exp_yaml).exists(), "{:} does not exist.".format(exp_yaml)

View File

@ -1,24 +1,35 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from .starts import prepare_seed, prepare_logger, get_machine_info, save_checkpoint, copy_checkpoint
from .starts import prepare_seed
from .starts import prepare_logger
from .starts import get_machine_info
from .starts import save_checkpoint
from .starts import copy_checkpoint
from .optimizers import get_optim_scheduler
from .funcs_nasbench import evaluate_for_seed as bench_evaluate_for_seed
from .funcs_nasbench import pure_evaluate as bench_pure_evaluate
from .funcs_nasbench import get_nas_bench_loaders
def get_procedures(procedure):
from .basic_main import basic_train, basic_valid
from .search_main import search_train, search_valid
from .search_main_v2 import search_train_v2
from .simple_KD_main import simple_KD_train, simple_KD_valid
train_funcs = {'basic' : basic_train, \
'search': search_train,'Simple-KD': simple_KD_train, \
'search-v2': search_train_v2}
valid_funcs = {'basic' : basic_valid, \
'search': search_valid,'Simple-KD': simple_KD_valid, \
'search-v2': search_valid}
train_funcs = {
"basic": basic_train,
"search": search_train,
"Simple-KD": simple_KD_train,
"search-v2": search_train_v2,
}
valid_funcs = {
"basic": basic_valid,
"search": search_valid,
"Simple-KD": simple_KD_valid,
"search-v2": search_valid,
}
train_func = train_funcs[procedure]
valid_func = valid_funcs[procedure]

View File

@ -7,48 +7,63 @@ from utils import obtain_accuracy
def basic_train(xloader, network, criterion, scheduler, optimizer, optim_config, extra_info, print_freq, logger):
loss, acc1, acc5 = procedure(xloader, network, criterion, scheduler, optimizer, 'train', optim_config, extra_info, print_freq, logger)
loss, acc1, acc5 = procedure(
xloader, network, criterion, scheduler, optimizer, "train", optim_config, extra_info, print_freq, logger
)
return loss, acc1, acc5
def basic_valid(xloader, network, criterion, optim_config, extra_info, print_freq, logger):
with torch.no_grad():
loss, acc1, acc5 = procedure(xloader, network, criterion, None, None, 'valid', None, extra_info, print_freq, logger)
loss, acc1, acc5 = procedure(
xloader, network, criterion, None, None, "valid", None, extra_info, print_freq, logger
)
return loss, acc1, acc5
def procedure(xloader, network, criterion, scheduler, optimizer, mode, config, extra_info, print_freq, logger):
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
if mode == 'train':
data_time, batch_time, losses, top1, top5 = (
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
)
if mode == "train":
network.train()
elif mode == 'valid':
elif mode == "valid":
network.eval()
else: raise ValueError("The mode is not right : {:}".format(mode))
else:
raise ValueError("The mode is not right : {:}".format(mode))
# logger.log('[{:5s}] config :: auxiliary={:}, message={:}'.format(mode, config.auxiliary if hasattr(config, 'auxiliary') else -1, network.module.get_message()))
logger.log('[{:5s}] config :: auxiliary={:}'.format(mode, config.auxiliary if hasattr(config, 'auxiliary') else -1))
logger.log(
"[{:5s}] config :: auxiliary={:}".format(mode, config.auxiliary if hasattr(config, "auxiliary") else -1)
)
end = time.time()
for i, (inputs, targets) in enumerate(xloader):
if mode == 'train': scheduler.update(None, 1.0 * i / len(xloader))
if mode == "train":
scheduler.update(None, 1.0 * i / len(xloader))
# measure data loading time
data_time.update(time.time() - end)
# calculate prediction and loss
targets = targets.cuda(non_blocking=True)
if mode == 'train': optimizer.zero_grad()
if mode == "train":
optimizer.zero_grad()
features, logits = network(inputs)
if isinstance(logits, list):
assert len(logits) == 2, 'logits must has {:} items instead of {:}'.format(2, len(logits))
assert len(logits) == 2, "logits must has {:} items instead of {:}".format(2, len(logits))
logits, logits_aux = logits
else:
logits, logits_aux = logits, None
loss = criterion(logits, targets)
if config is not None and hasattr(config, 'auxiliary') and config.auxiliary > 0:
if config is not None and hasattr(config, "auxiliary") and config.auxiliary > 0:
loss_aux = criterion(logits_aux, targets)
loss += config.auxiliary * loss_aux
if mode == 'train':
if mode == "train":
loss.backward()
optimizer.step()
@ -63,13 +78,25 @@ def procedure(xloader, network, criterion, scheduler, optimizer, mode, config, e
end = time.time()
if i % print_freq == 0 or (i + 1) == len(xloader):
Sstr = ' {:5s} '.format(mode.upper()) + time_string() + ' [{:}][{:03d}/{:03d}]'.format(extra_info, i, len(xloader))
Sstr = (
" {:5s} ".format(mode.upper())
+ time_string()
+ " [{:}][{:03d}/{:03d}]".format(extra_info, i, len(xloader))
)
if scheduler is not None:
Sstr += ' {:}'.format(scheduler.get_min_info())
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
Istr = 'Size={:}'.format(list(inputs.size()))
logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Istr)
Sstr += " {:}".format(scheduler.get_min_info())
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
batch_time=batch_time, data_time=data_time
)
Lstr = "Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
loss=losses, top1=top1, top5=top5
)
Istr = "Size={:}".format(list(inputs.size()))
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Istr)
logger.log(' **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(mode=mode.upper(), top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg))
logger.log(
" **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}".format(
mode=mode.upper(), top1=top1, top5=top5, error1=100 - top1.avg, error5=100 - top5.avg, loss=losses.avg
)
)
return losses.avg, top1.avg, top5.avg

View File

@ -11,7 +11,7 @@ from log_utils import AverageMeter, time_string, convert_secs2time
from models import get_cell_based_tiny_net
__all__ = ['evaluate_for_seed', 'pure_evaluate', 'get_nas_bench_loaders']
__all__ = ["evaluate_for_seed", "pure_evaluate", "get_nas_bench_loaders"]
def pure_evaluate(xloader, network, criterion=torch.nn.CrossEntropyLoss()):
@ -38,28 +38,33 @@ def pure_evaluate(xloader, network, criterion=torch.nn.CrossEntropyLoss()):
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
end = time.time()
if len(latencies) > 2: latencies = latencies[1:]
if len(latencies) > 2:
latencies = latencies[1:]
return losses.avg, top1.avg, top5.avg, latencies
def procedure(xloader, network, criterion, scheduler, optimizer, mode: str):
losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
if mode == 'train' : network.train()
elif mode == 'valid': network.eval()
else: raise ValueError("The mode is not right : {:}".format(mode))
if mode == "train":
network.train()
elif mode == "valid":
network.eval()
else:
raise ValueError("The mode is not right : {:}".format(mode))
device = torch.cuda.current_device()
data_time, batch_time, end = AverageMeter(), AverageMeter(), time.time()
for i, (inputs, targets) in enumerate(xloader):
if mode == 'train': scheduler.update(None, 1.0 * i / len(xloader))
if mode == "train":
scheduler.update(None, 1.0 * i / len(xloader))
targets = targets.cuda(device=device, non_blocking=True)
if mode == 'train': optimizer.zero_grad()
if mode == "train":
optimizer.zero_grad()
# forward
features, logits = network(inputs)
loss = criterion(logits, targets)
# backward
if mode == 'train':
if mode == "train":
loss.backward()
optimizer.step()
# record loss and accuracy
@ -79,9 +84,9 @@ def evaluate_for_seed(arch_config, opt_config, train_loader, valid_loaders, seed
net = get_cell_based_tiny_net(arch_config)
# net = TinyNetwork(arch_config['channel'], arch_config['num_cells'], arch, config.class_num)
flop, param = get_model_infos(net, opt_config.xshape)
logger.log('Network : {:}'.format(net.get_message()), False)
logger.log('{:} Seed-------------------------- {:} --------------------------'.format(time_string(), seed))
logger.log('FLOP = {:} MB, Param = {:} MB'.format(flop, param))
logger.log("Network : {:}".format(net.get_message()), False)
logger.log("{:} Seed-------------------------- {:} --------------------------".format(time_string(), seed))
logger.log("FLOP = {:} MB, Param = {:} MB".format(flop, param))
# train and valid
optimizer, scheduler, criterion = get_optim_scheduler(net.parameters(), opt_config)
default_device = torch.cuda.current_device()
@ -94,7 +99,9 @@ def evaluate_for_seed(arch_config, opt_config, train_loader, valid_loaders, seed
for epoch in range(total_epoch):
scheduler.update(epoch, 0.0)
lr = min(scheduler.get_lr())
train_loss, train_acc1, train_acc5, train_tm = procedure(train_loader, network, criterion, scheduler, optimizer, 'train')
train_loss, train_acc1, train_acc5, train_tm = procedure(
train_loader, network, criterion, scheduler, optimizer, "train"
)
train_losses[epoch] = train_loss
train_acc1es[epoch] = train_acc1
train_acc5es[epoch] = train_acc5
@ -102,34 +109,51 @@ def evaluate_for_seed(arch_config, opt_config, train_loader, valid_loaders, seed
lrs[epoch] = lr
with torch.no_grad():
for key, xloder in valid_loaders.items():
valid_loss, valid_acc1, valid_acc5, valid_tm = procedure(xloder , network, criterion, None, None, 'valid')
valid_losses['{:}@{:}'.format(key,epoch)] = valid_loss
valid_acc1es['{:}@{:}'.format(key,epoch)] = valid_acc1
valid_acc5es['{:}@{:}'.format(key,epoch)] = valid_acc5
valid_times ['{:}@{:}'.format(key,epoch)] = valid_tm
valid_loss, valid_acc1, valid_acc5, valid_tm = procedure(
xloder, network, criterion, None, None, "valid"
)
valid_losses["{:}@{:}".format(key, epoch)] = valid_loss
valid_acc1es["{:}@{:}".format(key, epoch)] = valid_acc1
valid_acc5es["{:}@{:}".format(key, epoch)] = valid_acc5
valid_times["{:}@{:}".format(key, epoch)] = valid_tm
# measure elapsed time
epoch_time.update(time.time() - start_time)
start_time = time.time()
need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (total_epoch-epoch-1), True) )
logger.log('{:} {:} epoch={:03d}/{:03d} :: Train [loss={:.5f}, acc@1={:.2f}%, acc@5={:.2f}%] Valid [loss={:.5f}, acc@1={:.2f}%, acc@5={:.2f}%], lr={:}'.format(time_string(), need_time, epoch, total_epoch, train_loss, train_acc1, train_acc5, valid_loss, valid_acc1, valid_acc5, lr))
info_seed = {'flop' : flop,
'param': param,
'arch_config' : arch_config._asdict(),
'opt_config' : opt_config._asdict(),
'total_epoch' : total_epoch ,
'train_losses': train_losses,
'train_acc1es': train_acc1es,
'train_acc5es': train_acc5es,
'train_times' : train_times,
'valid_losses': valid_losses,
'valid_acc1es': valid_acc1es,
'valid_acc5es': valid_acc5es,
'valid_times' : valid_times,
'learning_rates': lrs,
'net_state_dict': net.state_dict(),
'net_string' : '{:}'.format(net),
'finish-train': True
need_time = "Time Left: {:}".format(convert_secs2time(epoch_time.avg * (total_epoch - epoch - 1), True))
logger.log(
"{:} {:} epoch={:03d}/{:03d} :: Train [loss={:.5f}, acc@1={:.2f}%, acc@5={:.2f}%] Valid [loss={:.5f}, acc@1={:.2f}%, acc@5={:.2f}%], lr={:}".format(
time_string(),
need_time,
epoch,
total_epoch,
train_loss,
train_acc1,
train_acc5,
valid_loss,
valid_acc1,
valid_acc5,
lr,
)
)
info_seed = {
"flop": flop,
"param": param,
"arch_config": arch_config._asdict(),
"opt_config": opt_config._asdict(),
"total_epoch": total_epoch,
"train_losses": train_losses,
"train_acc1es": train_acc1es,
"train_acc5es": train_acc5es,
"train_times": train_times,
"valid_losses": valid_losses,
"valid_acc1es": valid_acc1es,
"valid_acc5es": valid_acc5es,
"valid_times": valid_times,
"learning_rates": lrs,
"net_state_dict": net.state_dict(),
"net_string": "{:}".format(net),
"finish-train": True,
}
return info_seed
@ -138,66 +162,191 @@ def get_nas_bench_loaders(workers):
torch.set_num_threads(workers)
root_dir = (pathlib.Path(__file__).parent / '..' / '..').resolve()
torch_dir = pathlib.Path(os.environ['TORCH_HOME'])
root_dir = (pathlib.Path(__file__).parent / ".." / "..").resolve()
torch_dir = pathlib.Path(os.environ["TORCH_HOME"])
# cifar
cifar_config_path = root_dir / 'configs' / 'nas-benchmark' / 'CIFAR.config'
cifar_config_path = root_dir / "configs" / "nas-benchmark" / "CIFAR.config"
cifar_config = load_config(cifar_config_path, None, None)
get_datasets = datasets.get_datasets # a function to return the dataset
break_line = '-' * 150
print ('{:} Create data-loader for all datasets'.format(time_string()))
break_line = "-" * 150
print("{:} Create data-loader for all datasets".format(time_string()))
print(break_line)
TRAIN_CIFAR10, VALID_CIFAR10, xshape, class_num = get_datasets('cifar10', str(torch_dir/'cifar.python'), -1)
print ('original CIFAR-10 : {:} training images and {:} test images : {:} input shape : {:} number of classes'.format(len(TRAIN_CIFAR10), len(VALID_CIFAR10), xshape, class_num))
cifar10_splits = load_config(root_dir / 'configs' / 'nas-benchmark' / 'cifar-split.txt', None, None)
assert cifar10_splits.train[:10] == [0, 5, 7, 11, 13, 15, 16, 17, 20, 24] and cifar10_splits.valid[:10] == [1, 2, 3, 4, 6, 8, 9, 10, 12, 14]
TRAIN_CIFAR10, VALID_CIFAR10, xshape, class_num = get_datasets("cifar10", str(torch_dir / "cifar.python"), -1)
print(
"original CIFAR-10 : {:} training images and {:} test images : {:} input shape : {:} number of classes".format(
len(TRAIN_CIFAR10), len(VALID_CIFAR10), xshape, class_num
)
)
cifar10_splits = load_config(root_dir / "configs" / "nas-benchmark" / "cifar-split.txt", None, None)
assert cifar10_splits.train[:10] == [0, 5, 7, 11, 13, 15, 16, 17, 20, 24] and cifar10_splits.valid[:10] == [
1,
2,
3,
4,
6,
8,
9,
10,
12,
14,
]
temp_dataset = copy.deepcopy(TRAIN_CIFAR10)
temp_dataset.transform = VALID_CIFAR10.transform
# data loader
trainval_cifar10_loader = torch.utils.data.DataLoader(TRAIN_CIFAR10, batch_size=cifar_config.batch_size, shuffle=True , num_workers=workers, pin_memory=True)
train_cifar10_loader = torch.utils.data.DataLoader(TRAIN_CIFAR10, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar10_splits.train), num_workers=workers, pin_memory=True)
valid_cifar10_loader = torch.utils.data.DataLoader(temp_dataset , batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar10_splits.valid), num_workers=workers, pin_memory=True)
test__cifar10_loader = torch.utils.data.DataLoader(VALID_CIFAR10, batch_size=cifar_config.batch_size, shuffle=False, num_workers=workers, pin_memory=True)
print ('CIFAR-10 : trval-loader has {:3d} batch with {:} per batch'.format(len(trainval_cifar10_loader), cifar_config.batch_size))
print ('CIFAR-10 : train-loader has {:3d} batch with {:} per batch'.format(len(train_cifar10_loader), cifar_config.batch_size))
print ('CIFAR-10 : valid-loader has {:3d} batch with {:} per batch'.format(len(valid_cifar10_loader), cifar_config.batch_size))
print ('CIFAR-10 : test--loader has {:3d} batch with {:} per batch'.format(len(test__cifar10_loader), cifar_config.batch_size))
trainval_cifar10_loader = torch.utils.data.DataLoader(
TRAIN_CIFAR10, batch_size=cifar_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True
)
train_cifar10_loader = torch.utils.data.DataLoader(
TRAIN_CIFAR10,
batch_size=cifar_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar10_splits.train),
num_workers=workers,
pin_memory=True,
)
valid_cifar10_loader = torch.utils.data.DataLoader(
temp_dataset,
batch_size=cifar_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar10_splits.valid),
num_workers=workers,
pin_memory=True,
)
test__cifar10_loader = torch.utils.data.DataLoader(
VALID_CIFAR10, batch_size=cifar_config.batch_size, shuffle=False, num_workers=workers, pin_memory=True
)
print(
"CIFAR-10 : trval-loader has {:3d} batch with {:} per batch".format(
len(trainval_cifar10_loader), cifar_config.batch_size
)
)
print(
"CIFAR-10 : train-loader has {:3d} batch with {:} per batch".format(
len(train_cifar10_loader), cifar_config.batch_size
)
)
print(
"CIFAR-10 : valid-loader has {:3d} batch with {:} per batch".format(
len(valid_cifar10_loader), cifar_config.batch_size
)
)
print(
"CIFAR-10 : test--loader has {:3d} batch with {:} per batch".format(
len(test__cifar10_loader), cifar_config.batch_size
)
)
print(break_line)
# CIFAR-100
TRAIN_CIFAR100, VALID_CIFAR100, xshape, class_num = get_datasets('cifar100', str(torch_dir/'cifar.python'), -1)
print ('original CIFAR-100: {:} training images and {:} test images : {:} input shape : {:} number of classes'.format(len(TRAIN_CIFAR100), len(VALID_CIFAR100), xshape, class_num))
cifar100_splits = load_config(root_dir / 'configs' / 'nas-benchmark' / 'cifar100-test-split.txt', None, None)
assert cifar100_splits.xvalid[:10] == [1, 3, 4, 5, 8, 10, 13, 14, 15, 16] and cifar100_splits.xtest[:10] == [0, 2, 6, 7, 9, 11, 12, 17, 20, 24]
train_cifar100_loader = torch.utils.data.DataLoader(TRAIN_CIFAR100, batch_size=cifar_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True)
valid_cifar100_loader = torch.utils.data.DataLoader(VALID_CIFAR100, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xvalid), num_workers=workers, pin_memory=True)
test__cifar100_loader = torch.utils.data.DataLoader(VALID_CIFAR100, batch_size=cifar_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xtest) , num_workers=workers, pin_memory=True)
print ('CIFAR-100 : train-loader has {:3d} batch'.format(len(train_cifar100_loader)))
print ('CIFAR-100 : valid-loader has {:3d} batch'.format(len(valid_cifar100_loader)))
print ('CIFAR-100 : test--loader has {:3d} batch'.format(len(test__cifar100_loader)))
TRAIN_CIFAR100, VALID_CIFAR100, xshape, class_num = get_datasets("cifar100", str(torch_dir / "cifar.python"), -1)
print(
"original CIFAR-100: {:} training images and {:} test images : {:} input shape : {:} number of classes".format(
len(TRAIN_CIFAR100), len(VALID_CIFAR100), xshape, class_num
)
)
cifar100_splits = load_config(root_dir / "configs" / "nas-benchmark" / "cifar100-test-split.txt", None, None)
assert cifar100_splits.xvalid[:10] == [1, 3, 4, 5, 8, 10, 13, 14, 15, 16] and cifar100_splits.xtest[:10] == [
0,
2,
6,
7,
9,
11,
12,
17,
20,
24,
]
train_cifar100_loader = torch.utils.data.DataLoader(
TRAIN_CIFAR100, batch_size=cifar_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True
)
valid_cifar100_loader = torch.utils.data.DataLoader(
VALID_CIFAR100,
batch_size=cifar_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xvalid),
num_workers=workers,
pin_memory=True,
)
test__cifar100_loader = torch.utils.data.DataLoader(
VALID_CIFAR100,
batch_size=cifar_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xtest),
num_workers=workers,
pin_memory=True,
)
print("CIFAR-100 : train-loader has {:3d} batch".format(len(train_cifar100_loader)))
print("CIFAR-100 : valid-loader has {:3d} batch".format(len(valid_cifar100_loader)))
print("CIFAR-100 : test--loader has {:3d} batch".format(len(test__cifar100_loader)))
print(break_line)
imagenet16_config_path = 'configs/nas-benchmark/ImageNet-16.config'
imagenet16_config_path = "configs/nas-benchmark/ImageNet-16.config"
imagenet16_config = load_config(imagenet16_config_path, None, None)
TRAIN_ImageNet16_120, VALID_ImageNet16_120, xshape, class_num = get_datasets('ImageNet16-120', str(torch_dir/'cifar.python'/'ImageNet16'), -1)
print ('original TRAIN_ImageNet16_120: {:} training images and {:} test images : {:} input shape : {:} number of classes'.format(len(TRAIN_ImageNet16_120), len(VALID_ImageNet16_120), xshape, class_num))
imagenet_splits = load_config(root_dir / 'configs' / 'nas-benchmark' / 'imagenet-16-120-test-split.txt', None, None)
assert imagenet_splits.xvalid[:10] == [1, 2, 3, 6, 7, 8, 9, 12, 16, 18] and imagenet_splits.xtest[:10] == [0, 4, 5, 10, 11, 13, 14, 15, 17, 20]
train_imagenet_loader = torch.utils.data.DataLoader(TRAIN_ImageNet16_120, batch_size=imagenet16_config.batch_size, shuffle=True, num_workers=workers, pin_memory=True)
valid_imagenet_loader = torch.utils.data.DataLoader(VALID_ImageNet16_120, batch_size=imagenet16_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_splits.xvalid), num_workers=workers, pin_memory=True)
test__imagenet_loader = torch.utils.data.DataLoader(VALID_ImageNet16_120, batch_size=imagenet16_config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_splits.xtest) , num_workers=workers, pin_memory=True)
print ('ImageNet-16-120 : train-loader has {:3d} batch with {:} per batch'.format(len(train_imagenet_loader), imagenet16_config.batch_size))
print ('ImageNet-16-120 : valid-loader has {:3d} batch with {:} per batch'.format(len(valid_imagenet_loader), imagenet16_config.batch_size))
print ('ImageNet-16-120 : test--loader has {:3d} batch with {:} per batch'.format(len(test__imagenet_loader), imagenet16_config.batch_size))
TRAIN_ImageNet16_120, VALID_ImageNet16_120, xshape, class_num = get_datasets(
"ImageNet16-120", str(torch_dir / "cifar.python" / "ImageNet16"), -1
)
print(
"original TRAIN_ImageNet16_120: {:} training images and {:} test images : {:} input shape : {:} number of classes".format(
len(TRAIN_ImageNet16_120), len(VALID_ImageNet16_120), xshape, class_num
)
)
imagenet_splits = load_config(root_dir / "configs" / "nas-benchmark" / "imagenet-16-120-test-split.txt", None, None)
assert imagenet_splits.xvalid[:10] == [1, 2, 3, 6, 7, 8, 9, 12, 16, 18] and imagenet_splits.xtest[:10] == [
0,
4,
5,
10,
11,
13,
14,
15,
17,
20,
]
train_imagenet_loader = torch.utils.data.DataLoader(
TRAIN_ImageNet16_120,
batch_size=imagenet16_config.batch_size,
shuffle=True,
num_workers=workers,
pin_memory=True,
)
valid_imagenet_loader = torch.utils.data.DataLoader(
VALID_ImageNet16_120,
batch_size=imagenet16_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_splits.xvalid),
num_workers=workers,
pin_memory=True,
)
test__imagenet_loader = torch.utils.data.DataLoader(
VALID_ImageNet16_120,
batch_size=imagenet16_config.batch_size,
sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_splits.xtest),
num_workers=workers,
pin_memory=True,
)
print(
"ImageNet-16-120 : train-loader has {:3d} batch with {:} per batch".format(
len(train_imagenet_loader), imagenet16_config.batch_size
)
)
print(
"ImageNet-16-120 : valid-loader has {:3d} batch with {:} per batch".format(
len(valid_imagenet_loader), imagenet16_config.batch_size
)
)
print(
"ImageNet-16-120 : test--loader has {:3d} batch with {:} per batch".format(
len(test__imagenet_loader), imagenet16_config.batch_size
)
)
# 'cifar10', 'cifar100', 'ImageNet16-120'
loaders = {'cifar10@trainval': trainval_cifar10_loader,
'cifar10@train' : train_cifar10_loader,
'cifar10@valid' : valid_cifar10_loader,
'cifar10@test' : test__cifar10_loader,
'cifar100@train' : train_cifar100_loader,
'cifar100@valid' : valid_cifar100_loader,
'cifar100@test' : test__cifar100_loader,
'ImageNet16-120@train': train_imagenet_loader,
'ImageNet16-120@valid': valid_imagenet_loader,
'ImageNet16-120@test' : test__imagenet_loader}
loaders = {
"cifar10@trainval": trainval_cifar10_loader,
"cifar10@train": train_cifar10_loader,
"cifar10@valid": valid_cifar10_loader,
"cifar10@test": test__cifar10_loader,
"cifar100@train": train_cifar100_loader,
"cifar100@valid": valid_cifar100_loader,
"cifar100@test": test__cifar100_loader,
"ImageNet16-120@train": train_imagenet_loader,
"ImageNet16-120@valid": valid_imagenet_loader,
"ImageNet16-120@test": test__imagenet_loader,
}
return loaders

View File

@ -8,28 +8,30 @@ from torch.optim import Optimizer
class _LRScheduler(object):
def __init__(self, optimizer, warmup_epochs, epochs):
if not isinstance(optimizer, Optimizer):
raise TypeError('{:} is not an Optimizer'.format(type(optimizer).__name__))
raise TypeError("{:} is not an Optimizer".format(type(optimizer).__name__))
self.optimizer = optimizer
for group in optimizer.param_groups:
group.setdefault('initial_lr', group['lr'])
self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
group.setdefault("initial_lr", group["lr"])
self.base_lrs = list(map(lambda group: group["initial_lr"], optimizer.param_groups))
self.max_epochs = epochs
self.warmup_epochs = warmup_epochs
self.current_epoch = 0
self.current_iter = 0
def extra_repr(self):
return ''
return ""
def __repr__(self):
return ('{name}(warmup={warmup_epochs}, max-epoch={max_epochs}, current::epoch={current_epoch}, iter={current_iter:.2f}'.format(name=self.__class__.__name__, **self.__dict__)
+ ', {:})'.format(self.extra_repr()))
return "{name}(warmup={warmup_epochs}, max-epoch={max_epochs}, current::epoch={current_epoch}, iter={current_iter:.2f}".format(
name=self.__class__.__name__, **self.__dict__
) + ", {:})".format(
self.extra_repr()
)
def state_dict(self):
return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
return {key: value for key, value in self.__dict__.items() if key != "optimizer"}
def load_state_dict(self, state_dict):
self.__dict__.update(state_dict)
@ -39,32 +41,32 @@ class _LRScheduler(object):
def get_min_info(self):
lrs = self.get_lr()
return '#LR=[{:.6f}~{:.6f}] epoch={:03d}, iter={:4.2f}#'.format(min(lrs), max(lrs), self.current_epoch, self.current_iter)
return "#LR=[{:.6f}~{:.6f}] epoch={:03d}, iter={:4.2f}#".format(
min(lrs), max(lrs), self.current_epoch, self.current_iter
)
def get_min_lr(self):
return min(self.get_lr())
def update(self, cur_epoch, cur_iter):
if cur_epoch is not None:
assert isinstance(cur_epoch, int) and cur_epoch>=0, 'invalid cur-epoch : {:}'.format(cur_epoch)
assert isinstance(cur_epoch, int) and cur_epoch >= 0, "invalid cur-epoch : {:}".format(cur_epoch)
self.current_epoch = cur_epoch
if cur_iter is not None:
assert isinstance(cur_iter, float) and cur_iter>=0, 'invalid cur-iter : {:}'.format(cur_iter)
assert isinstance(cur_iter, float) and cur_iter >= 0, "invalid cur-iter : {:}".format(cur_iter)
self.current_iter = cur_iter
for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
param_group['lr'] = lr
param_group["lr"] = lr
class CosineAnnealingLR(_LRScheduler):
def __init__(self, optimizer, warmup_epochs, epochs, T_max, eta_min):
self.T_max = T_max
self.eta_min = eta_min
super(CosineAnnealingLR, self).__init__(optimizer, warmup_epochs, epochs)
def extra_repr(self):
return 'type={:}, T-max={:}, eta-min={:}'.format('cosine', self.T_max, self.eta_min)
return "type={:}, T-max={:}, eta-min={:}".format("cosine", self.T_max, self.eta_min)
def get_lr(self):
lrs = []
@ -84,17 +86,17 @@ class CosineAnnealingLR(_LRScheduler):
return lrs
class MultiStepLR(_LRScheduler):
def __init__(self, optimizer, warmup_epochs, epochs, milestones, gammas):
assert len(milestones) == len(gammas), 'invalid {:} vs {:}'.format(len(milestones), len(gammas))
assert len(milestones) == len(gammas), "invalid {:} vs {:}".format(len(milestones), len(gammas))
self.milestones = milestones
self.gammas = gammas
super(MultiStepLR, self).__init__(optimizer, warmup_epochs, epochs)
def extra_repr(self):
return 'type={:}, milestones={:}, gammas={:}, base-lrs={:}'.format('multistep', self.milestones, self.gammas, self.base_lrs)
return "type={:}, milestones={:}, gammas={:}, base-lrs={:}".format(
"multistep", self.milestones, self.gammas, self.base_lrs
)
def get_lr(self):
lrs = []
@ -103,7 +105,8 @@ class MultiStepLR(_LRScheduler):
last_epoch = self.current_epoch - self.warmup_epochs
idx = bisect_right(self.milestones, last_epoch)
lr = base_lr
for x in self.gammas[:idx]: lr *= x
for x in self.gammas[:idx]:
lr *= x
else:
lr = (self.current_epoch / self.warmup_epochs + self.current_iter / self.warmup_epochs) * base_lr
lrs.append(lr)
@ -111,20 +114,19 @@ class MultiStepLR(_LRScheduler):
class ExponentialLR(_LRScheduler):
def __init__(self, optimizer, warmup_epochs, epochs, gamma):
self.gamma = gamma
super(ExponentialLR, self).__init__(optimizer, warmup_epochs, epochs)
def extra_repr(self):
return 'type={:}, gamma={:}, base-lrs={:}'.format('exponential', self.gamma, self.base_lrs)
return "type={:}, gamma={:}, base-lrs={:}".format("exponential", self.gamma, self.base_lrs)
def get_lr(self):
lrs = []
for base_lr in self.base_lrs:
if self.current_epoch >= self.warmup_epochs:
last_epoch = self.current_epoch - self.warmup_epochs
assert last_epoch >= 0, 'invalid last_epoch : {:}'.format(last_epoch)
assert last_epoch >= 0, "invalid last_epoch : {:}".format(last_epoch)
lr = base_lr * (self.gamma ** last_epoch)
else:
lr = (self.current_epoch / self.warmup_epochs + self.current_iter / self.warmup_epochs) * base_lr
@ -133,21 +135,22 @@ class ExponentialLR(_LRScheduler):
class LinearLR(_LRScheduler):
def __init__(self, optimizer, warmup_epochs, epochs, max_LR, min_LR):
self.max_LR = max_LR
self.min_LR = min_LR
super(LinearLR, self).__init__(optimizer, warmup_epochs, epochs)
def extra_repr(self):
return 'type={:}, max_LR={:}, min_LR={:}, base-lrs={:}'.format('LinearLR', self.max_LR, self.min_LR, self.base_lrs)
return "type={:}, max_LR={:}, min_LR={:}, base-lrs={:}".format(
"LinearLR", self.max_LR, self.min_LR, self.base_lrs
)
def get_lr(self):
lrs = []
for base_lr in self.base_lrs:
if self.current_epoch >= self.warmup_epochs:
last_epoch = self.current_epoch - self.warmup_epochs
assert last_epoch >= 0, 'invalid last_epoch : {:}'.format(last_epoch)
assert last_epoch >= 0, "invalid last_epoch : {:}".format(last_epoch)
ratio = (self.max_LR - self.min_LR) * last_epoch / self.max_epochs / self.max_LR
lr = base_lr * (1 - ratio)
else:
@ -156,9 +159,7 @@ class LinearLR(_LRScheduler):
return lrs
class CrossEntropyLabelSmooth(nn.Module):
def __init__(self, num_classes, epsilon):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
@ -173,32 +174,35 @@ class CrossEntropyLabelSmooth(nn.Module):
return loss
def get_optim_scheduler(parameters, config):
assert hasattr(config, 'optim') and hasattr(config, 'scheduler') and hasattr(config, 'criterion'), 'config must have optim / scheduler / criterion keys instead of {:}'.format(config)
if config.optim == 'SGD':
optim = torch.optim.SGD(parameters, config.LR, momentum=config.momentum, weight_decay=config.decay, nesterov=config.nesterov)
elif config.optim == 'RMSprop':
assert (
hasattr(config, "optim") and hasattr(config, "scheduler") and hasattr(config, "criterion")
), "config must have optim / scheduler / criterion keys instead of {:}".format(config)
if config.optim == "SGD":
optim = torch.optim.SGD(
parameters, config.LR, momentum=config.momentum, weight_decay=config.decay, nesterov=config.nesterov
)
elif config.optim == "RMSprop":
optim = torch.optim.RMSprop(parameters, config.LR, momentum=config.momentum, weight_decay=config.decay)
else:
raise ValueError('invalid optim : {:}'.format(config.optim))
raise ValueError("invalid optim : {:}".format(config.optim))
if config.scheduler == 'cos':
T_max = getattr(config, 'T_max', config.epochs)
if config.scheduler == "cos":
T_max = getattr(config, "T_max", config.epochs)
scheduler = CosineAnnealingLR(optim, config.warmup, config.epochs, T_max, config.eta_min)
elif config.scheduler == 'multistep':
elif config.scheduler == "multistep":
scheduler = MultiStepLR(optim, config.warmup, config.epochs, config.milestones, config.gammas)
elif config.scheduler == 'exponential':
elif config.scheduler == "exponential":
scheduler = ExponentialLR(optim, config.warmup, config.epochs, config.gamma)
elif config.scheduler == 'linear':
elif config.scheduler == "linear":
scheduler = LinearLR(optim, config.warmup, config.epochs, config.LR, config.LR_min)
else:
raise ValueError('invalid scheduler : {:}'.format(config.scheduler))
raise ValueError("invalid scheduler : {:}".format(config.scheduler))
if config.criterion == 'Softmax':
if config.criterion == "Softmax":
criterion = torch.nn.CrossEntropyLoss()
elif config.criterion == 'SmoothSoftmax':
elif config.criterion == "SmoothSoftmax":
criterion = CrossEntropyLabelSmooth(config.class_num, config.label_smooth)
else:
raise ValueError('invalid criterion : {:}'.format(config.criterion))
raise ValueError("invalid criterion : {:}".format(config.criterion))
return optim, scheduler, criterion

View File

@ -7,11 +7,12 @@ from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.utils import flatten_dict
from qlib.log import set_log_basic_config
from qlib.log import get_module_logger
def update_gpu(config, gpu):
config = config.copy()
if "task" in config and "GPU" in config["task"]["model"]:
if "task" in config and "moodel" in config["task"] and "GPU" in config["task"]["model"]:
config["task"]["model"]["GPU"] = gpu
elif "model" in config and "GPU" in config["model"]:
config["model"]["GPU"] = gpu
@ -29,11 +30,6 @@ def update_market(config, market):
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):
# model initiaiton
print("")
print("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri))
print("dataset={:}".format(dataset))
model = init_instance_by_config(task_config["model"])
# start exp
@ -41,6 +37,10 @@ def run_exp(task_config, dataset, experiment_name, recorder_name, uri):
log_file = R.get_recorder().root_uri / "{:}.log".format(experiment_name)
set_log_basic_config(log_file)
logger = get_module_logger("q.run_exp")
logger.info("task_config={:}".format(task_config))
logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri))
logger.info("dataset={:}".format(dataset))
# train model
R.log_params(**flatten_dict(task_config))

View File

@ -17,20 +17,38 @@ def get_flop_loss(expected_flop, flop_cur, flop_need, flop_tolerant):
loss = torch.log(expected_flop)
else: # Required FLOP
loss = None
if loss is None: return 0, 0
else : return loss, loss.item()
if loss is None:
return 0, 0
else:
return loss, loss.item()
def search_train(search_loader, network, criterion, scheduler, base_optimizer, arch_optimizer, optim_config, extra_info, print_freq, logger):
def search_train(
search_loader,
network,
criterion,
scheduler,
base_optimizer,
arch_optimizer,
optim_config,
extra_info,
print_freq,
logger,
):
data_time, batch_time = AverageMeter(), AverageMeter()
base_losses, arch_losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
arch_cls_losses, arch_flop_losses = AverageMeter(), AverageMeter()
epoch_str, flop_need, flop_weight, flop_tolerant = extra_info['epoch-str'], extra_info['FLOP-exp'], extra_info['FLOP-weight'], extra_info['FLOP-tolerant']
epoch_str, flop_need, flop_weight, flop_tolerant = (
extra_info["epoch-str"],
extra_info["FLOP-exp"],
extra_info["FLOP-weight"],
extra_info["FLOP-tolerant"],
)
network.train()
logger.log('[Search] : {:}, FLOP-Require={:.2f} MB, FLOP-WEIGHT={:.2f}'.format(epoch_str, flop_need, flop_weight))
logger.log("[Search] : {:}, FLOP-Require={:.2f} MB, FLOP-WEIGHT={:.2f}".format(epoch_str, flop_need, flop_weight))
end = time.time()
network.apply( change_key('search_mode', 'search') )
network.apply(change_key("search_mode", "search"))
for step, (base_inputs, base_targets, arch_inputs, arch_targets) in enumerate(search_loader):
scheduler.update(None, 1.0 * step / len(search_loader))
# calculate prediction and loss
@ -56,7 +74,7 @@ def search_train(search_loader, network, criterion, scheduler, base_optimizer, a
# update the architecture
arch_optimizer.zero_grad()
logits, expected_flop = network(arch_inputs)
flop_cur = network.module.get_flop('genotype', None, None)
flop_cur = network.module.get_flop("genotype", None, None)
flop_loss, flop_loss_scale = get_flop_loss(expected_flop, flop_cur, flop_need, flop_tolerant)
acls_loss = criterion(logits, arch_targets)
arch_loss = acls_loss + flop_loss * flop_weight
@ -72,27 +90,47 @@ def search_train(search_loader, network, criterion, scheduler, base_optimizer, a
batch_time.update(time.time() - end)
end = time.time()
if step % print_freq == 0 or (step + 1) == len(search_loader):
Sstr = '**TRAIN** ' + time_string() + ' [{:}][{:03d}/{:03d}]'.format(epoch_str, step, len(search_loader))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Base-Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=base_losses, top1=top1, top5=top5)
Vstr = 'Acls-loss {aloss.val:.3f} ({aloss.avg:.3f}) FLOP-Loss {floss.val:.3f} ({floss.avg:.3f}) Arch-Loss {loss.val:.3f} ({loss.avg:.3f})'.format(aloss=arch_cls_losses, floss=arch_flop_losses, loss=arch_losses)
logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Vstr)
Sstr = "**TRAIN** " + time_string() + " [{:}][{:03d}/{:03d}]".format(epoch_str, step, len(search_loader))
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
batch_time=batch_time, data_time=data_time
)
Lstr = "Base-Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
loss=base_losses, top1=top1, top5=top5
)
Vstr = "Acls-loss {aloss.val:.3f} ({aloss.avg:.3f}) FLOP-Loss {floss.val:.3f} ({floss.avg:.3f}) Arch-Loss {loss.val:.3f} ({loss.avg:.3f})".format(
aloss=arch_cls_losses, floss=arch_flop_losses, loss=arch_losses
)
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Vstr)
# Istr = 'Bsz={:} Asz={:}'.format(list(base_inputs.size()), list(arch_inputs.size()))
# logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Vstr + ' ' + Istr)
# print(network.module.get_arch_info())
# print(network.module.width_attentions[0])
# print(network.module.width_attentions[1])
logger.log(' **TRAIN** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Base-Loss:{baseloss:.3f}, Arch-Loss={archloss:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, baseloss=base_losses.avg, archloss=arch_losses.avg))
logger.log(
" **TRAIN** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Base-Loss:{baseloss:.3f}, Arch-Loss={archloss:.3f}".format(
top1=top1,
top5=top5,
error1=100 - top1.avg,
error5=100 - top5.avg,
baseloss=base_losses.avg,
archloss=arch_losses.avg,
)
)
return base_losses.avg, arch_losses.avg, top1.avg, top5.avg
def search_valid(xloader, network, criterion, extra_info, print_freq, logger):
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
data_time, batch_time, losses, top1, top5 = (
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
)
network.eval()
network.apply( change_key('search_mode', 'search') )
network.apply(change_key("search_mode", "search"))
end = time.time()
# logger.log('Starting evaluating {:}'.format(epoch_info))
with torch.no_grad():
@ -115,12 +153,20 @@ def search_valid(xloader, network, criterion, extra_info, print_freq, logger):
end = time.time()
if i % print_freq == 0 or (i + 1) == len(xloader):
Sstr = '**VALID** ' + time_string() + ' [{:}][{:03d}/{:03d}]'.format(extra_info, i, len(xloader))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
Istr = 'Size={:}'.format(list(inputs.size()))
logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Istr)
Sstr = "**VALID** " + time_string() + " [{:}][{:03d}/{:03d}]".format(extra_info, i, len(xloader))
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
batch_time=batch_time, data_time=data_time
)
Lstr = "Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
loss=losses, top1=top1, top5=top5
)
Istr = "Size={:}".format(list(inputs.size()))
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Istr)
logger.log(' **VALID** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg))
logger.log(
" **VALID** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}".format(
top1=top1, top5=top5, error1=100 - top1.avg, error5=100 - top5.avg, loss=losses.avg
)
)
return losses.avg, top1.avg, top5.avg

View File

@ -17,20 +17,38 @@ def get_flop_loss(expected_flop, flop_cur, flop_need, flop_tolerant):
loss = torch.log(expected_flop)
else: # Required FLOP
loss = None
if loss is None: return 0, 0
else : return loss, loss.item()
if loss is None:
return 0, 0
else:
return loss, loss.item()
def search_train_v2(search_loader, network, criterion, scheduler, base_optimizer, arch_optimizer, optim_config, extra_info, print_freq, logger):
def search_train_v2(
search_loader,
network,
criterion,
scheduler,
base_optimizer,
arch_optimizer,
optim_config,
extra_info,
print_freq,
logger,
):
data_time, batch_time = AverageMeter(), AverageMeter()
base_losses, arch_losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
arch_cls_losses, arch_flop_losses = AverageMeter(), AverageMeter()
epoch_str, flop_need, flop_weight, flop_tolerant = extra_info['epoch-str'], extra_info['FLOP-exp'], extra_info['FLOP-weight'], extra_info['FLOP-tolerant']
epoch_str, flop_need, flop_weight, flop_tolerant = (
extra_info["epoch-str"],
extra_info["FLOP-exp"],
extra_info["FLOP-weight"],
extra_info["FLOP-tolerant"],
)
network.train()
logger.log('[Search] : {:}, FLOP-Require={:.2f} MB, FLOP-WEIGHT={:.2f}'.format(epoch_str, flop_need, flop_weight))
logger.log("[Search] : {:}, FLOP-Require={:.2f} MB, FLOP-WEIGHT={:.2f}".format(epoch_str, flop_need, flop_weight))
end = time.time()
network.apply( change_key('search_mode', 'search') )
network.apply(change_key("search_mode", "search"))
for step, (base_inputs, base_targets, arch_inputs, arch_targets) in enumerate(search_loader):
scheduler.update(None, 1.0 * step / len(search_loader))
# calculate prediction and loss
@ -54,7 +72,7 @@ def search_train_v2(search_loader, network, criterion, scheduler, base_optimizer
# update the architecture
arch_optimizer.zero_grad()
logits, expected_flop = network(arch_inputs)
flop_cur = network.module.get_flop('genotype', None, None)
flop_cur = network.module.get_flop("genotype", None, None)
flop_loss, flop_loss_scale = get_flop_loss(expected_flop, flop_cur, flop_need, flop_tolerant)
acls_loss = criterion(logits, arch_targets)
arch_loss = acls_loss + flop_loss * flop_weight
@ -70,11 +88,17 @@ def search_train_v2(search_loader, network, criterion, scheduler, base_optimizer
batch_time.update(time.time() - end)
end = time.time()
if step % print_freq == 0 or (step + 1) == len(search_loader):
Sstr = '**TRAIN** ' + time_string() + ' [{:}][{:03d}/{:03d}]'.format(epoch_str, step, len(search_loader))
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Base-Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=base_losses, top1=top1, top5=top5)
Vstr = 'Acls-loss {aloss.val:.3f} ({aloss.avg:.3f}) FLOP-Loss {floss.val:.3f} ({floss.avg:.3f}) Arch-Loss {loss.val:.3f} ({loss.avg:.3f})'.format(aloss=arch_cls_losses, floss=arch_flop_losses, loss=arch_losses)
logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Vstr)
Sstr = "**TRAIN** " + time_string() + " [{:}][{:03d}/{:03d}]".format(epoch_str, step, len(search_loader))
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
batch_time=batch_time, data_time=data_time
)
Lstr = "Base-Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
loss=base_losses, top1=top1, top5=top5
)
Vstr = "Acls-loss {aloss.val:.3f} ({aloss.avg:.3f}) FLOP-Loss {floss.val:.3f} ({floss.avg:.3f}) Arch-Loss {loss.val:.3f} ({loss.avg:.3f})".format(
aloss=arch_cls_losses, floss=arch_flop_losses, loss=arch_losses
)
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Vstr)
# num_bytes = torch.cuda.max_memory_allocated( next(network.parameters()).device ) * 1.0
# logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Vstr + ' GPU={:.2f}MB'.format(num_bytes/1e6))
# Istr = 'Bsz={:} Asz={:}'.format(list(base_inputs.size()), list(arch_inputs.size()))
@ -83,5 +107,14 @@ def search_train_v2(search_loader, network, criterion, scheduler, base_optimizer
# print(network.module.width_attentions[0])
# print(network.module.width_attentions[1])
logger.log(' **TRAIN** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Base-Loss:{baseloss:.3f}, Arch-Loss={archloss:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, baseloss=base_losses.avg, archloss=arch_losses.avg))
logger.log(
" **TRAIN** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Base-Loss:{baseloss:.3f}, Arch-Loss={archloss:.3f}".format(
top1=top1,
top5=top5,
error1=100 - top1.avg,
error5=100 - top5.avg,
baseloss=base_losses.avg,
archloss=arch_losses.avg,
)
)
return base_losses.avg, arch_losses.avg, top1.avg, top5.avg

View File

@ -3,65 +3,100 @@
#####################################################
import os, sys, time, torch
import torch.nn.functional as F
# our modules
from log_utils import AverageMeter, time_string
from utils import obtain_accuracy
def simple_KD_train(xloader, teacher, network, criterion, scheduler, optimizer, optim_config, extra_info, print_freq, logger):
loss, acc1, acc5 = procedure(xloader, teacher, network, criterion, scheduler, optimizer, 'train', optim_config, extra_info, print_freq, logger)
def simple_KD_train(
xloader, teacher, network, criterion, scheduler, optimizer, optim_config, extra_info, print_freq, logger
):
loss, acc1, acc5 = procedure(
xloader,
teacher,
network,
criterion,
scheduler,
optimizer,
"train",
optim_config,
extra_info,
print_freq,
logger,
)
return loss, acc1, acc5
def simple_KD_valid(xloader, teacher, network, criterion, optim_config, extra_info, print_freq, logger):
with torch.no_grad():
loss, acc1, acc5 = procedure(xloader, teacher, network, criterion, None, None, 'valid', optim_config, extra_info, print_freq, logger)
loss, acc1, acc5 = procedure(
xloader, teacher, network, criterion, None, None, "valid", optim_config, extra_info, print_freq, logger
)
return loss, acc1, acc5
def loss_KD_fn(criterion, student_logits, teacher_logits, studentFeatures, teacherFeatures, targets, alpha, temperature):
basic_loss = criterion(student_logits, targets) * (1. - alpha)
def loss_KD_fn(
criterion, student_logits, teacher_logits, studentFeatures, teacherFeatures, targets, alpha, temperature
):
basic_loss = criterion(student_logits, targets) * (1.0 - alpha)
log_student = F.log_softmax(student_logits / temperature, dim=1)
sof_teacher = F.softmax(teacher_logits / temperature, dim=1)
KD_loss = F.kl_div(log_student, sof_teacher, reduction='batchmean') * (alpha * temperature * temperature)
KD_loss = F.kl_div(log_student, sof_teacher, reduction="batchmean") * (alpha * temperature * temperature)
return basic_loss + KD_loss
def procedure(xloader, teacher, network, criterion, scheduler, optimizer, mode, config, extra_info, print_freq, logger):
data_time, batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
data_time, batch_time, losses, top1, top5 = (
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
AverageMeter(),
)
Ttop1, Ttop5 = AverageMeter(), AverageMeter()
if mode == 'train':
if mode == "train":
network.train()
elif mode == 'valid':
elif mode == "valid":
network.eval()
else: raise ValueError("The mode is not right : {:}".format(mode))
else:
raise ValueError("The mode is not right : {:}".format(mode))
teacher.eval()
logger.log('[{:5s}] config :: auxiliary={:}, KD :: [alpha={:.2f}, temperature={:.2f}]'.format(mode, config.auxiliary if hasattr(config, 'auxiliary') else -1, config.KD_alpha, config.KD_temperature))
logger.log(
"[{:5s}] config :: auxiliary={:}, KD :: [alpha={:.2f}, temperature={:.2f}]".format(
mode, config.auxiliary if hasattr(config, "auxiliary") else -1, config.KD_alpha, config.KD_temperature
)
)
end = time.time()
for i, (inputs, targets) in enumerate(xloader):
if mode == 'train': scheduler.update(None, 1.0 * i / len(xloader))
if mode == "train":
scheduler.update(None, 1.0 * i / len(xloader))
# measure data loading time
data_time.update(time.time() - end)
# calculate prediction and loss
targets = targets.cuda(non_blocking=True)
if mode == 'train': optimizer.zero_grad()
if mode == "train":
optimizer.zero_grad()
student_f, logits = network(inputs)
if isinstance(logits, list):
assert len(logits) == 2, 'logits must has {:} items instead of {:}'.format(2, len(logits))
assert len(logits) == 2, "logits must has {:} items instead of {:}".format(2, len(logits))
logits, logits_aux = logits
else:
logits, logits_aux = logits, None
with torch.no_grad():
teacher_f, teacher_logits = teacher(inputs)
loss = loss_KD_fn(criterion, logits, teacher_logits, student_f, teacher_f, targets, config.KD_alpha, config.KD_temperature)
if config is not None and hasattr(config, 'auxiliary') and config.auxiliary > 0:
loss = loss_KD_fn(
criterion, logits, teacher_logits, student_f, teacher_f, targets, config.KD_alpha, config.KD_temperature
)
if config is not None and hasattr(config, "auxiliary") and config.auxiliary > 0:
loss_aux = criterion(logits_aux, targets)
loss += config.auxiliary * loss_aux
if mode == 'train':
if mode == "train":
loss.backward()
optimizer.step()
@ -80,15 +115,31 @@ def procedure(xloader, teacher, network, criterion, scheduler, optimizer, mode,
end = time.time()
if i % print_freq == 0 or (i + 1) == len(xloader):
Sstr = ' {:5s} '.format(mode.upper()) + time_string() + ' [{:}][{:03d}/{:03d}]'.format(extra_info, i, len(xloader))
Sstr = (
" {:5s} ".format(mode.upper())
+ time_string()
+ " [{:}][{:03d}/{:03d}]".format(extra_info, i, len(xloader))
)
if scheduler is not None:
Sstr += ' {:}'.format(scheduler.get_min_info())
Tstr = 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})'.format(batch_time=batch_time, data_time=data_time)
Lstr = 'Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})'.format(loss=losses, top1=top1, top5=top5)
Lstr+= ' Teacher : acc@1={:.2f}, acc@5={:.2f}'.format(Ttop1.avg, Ttop5.avg)
Istr = 'Size={:}'.format(list(inputs.size()))
logger.log(Sstr + ' ' + Tstr + ' ' + Lstr + ' ' + Istr)
Sstr += " {:}".format(scheduler.get_min_info())
Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
batch_time=batch_time, data_time=data_time
)
Lstr = "Loss {loss.val:.3f} ({loss.avg:.3f}) Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})".format(
loss=losses, top1=top1, top5=top5
)
Lstr += " Teacher : acc@1={:.2f}, acc@5={:.2f}".format(Ttop1.avg, Ttop5.avg)
Istr = "Size={:}".format(list(inputs.size()))
logger.log(Sstr + " " + Tstr + " " + Lstr + " " + Istr)
logger.log(' **{:5s}** accuracy drop :: @1={:.2f}, @5={:.2f}'.format(mode.upper(), Ttop1.avg - top1.avg, Ttop5.avg - top5.avg))
logger.log(' **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}'.format(mode=mode.upper(), top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg, loss=losses.avg))
logger.log(
" **{:5s}** accuracy drop :: @1={:.2f}, @5={:.2f}".format(
mode.upper(), Ttop1.avg - top1.avg, Ttop5.avg - top5.avg
)
)
logger.log(
" **{mode:5s}** Prec@1 {top1.avg:.2f} Prec@5 {top5.avg:.2f} Error@1 {error1:.2f} Error@5 {error5:.2f} Loss:{loss:.3f}".format(
mode=mode.upper(), top1=top1, top5=top5, error1=100 - top1.avg, error5=100 - top5.avg, loss=losses.avg
)
)
return losses.avg, top1.avg, top5.avg

View File

@ -17,30 +17,35 @@ def prepare_seed(rand_seed):
def prepare_logger(xargs):
args = copy.deepcopy(xargs)
from log_utils import Logger
logger = Logger(args.save_dir, args.rand_seed)
logger.log('Main Function with logger : {:}'.format(logger))
logger.log('Arguments : -------------------------------')
logger.log("Main Function with logger : {:}".format(logger))
logger.log("Arguments : -------------------------------")
for name, value in args._get_kwargs():
logger.log('{:16} : {:}'.format(name, value))
logger.log("Python Version : {:}".format(sys.version.replace('\n', ' ')))
logger.log("{:16} : {:}".format(name, value))
logger.log("Python Version : {:}".format(sys.version.replace("\n", " ")))
logger.log("Pillow Version : {:}".format(PIL.__version__))
logger.log("PyTorch Version : {:}".format(torch.__version__))
logger.log("cuDNN Version : {:}".format(torch.backends.cudnn.version()))
logger.log("CUDA available : {:}".format(torch.cuda.is_available()))
logger.log("CUDA GPU numbers : {:}".format(torch.cuda.device_count()))
logger.log("CUDA_VISIBLE_DEVICES : {:}".format(os.environ['CUDA_VISIBLE_DEVICES'] if 'CUDA_VISIBLE_DEVICES' in os.environ else 'None'))
logger.log(
"CUDA_VISIBLE_DEVICES : {:}".format(
os.environ["CUDA_VISIBLE_DEVICES"] if "CUDA_VISIBLE_DEVICES" in os.environ else "None"
)
)
return logger
def get_machine_info():
info = "Python Version : {:}".format(sys.version.replace('\n', ' '))
info = "Python Version : {:}".format(sys.version.replace("\n", " "))
info += "\nPillow Version : {:}".format(PIL.__version__)
info += "\nPyTorch Version : {:}".format(torch.__version__)
info += "\ncuDNN Version : {:}".format(torch.backends.cudnn.version())
info += "\nCUDA available : {:}".format(torch.cuda.is_available())
info += "\nCUDA GPU numbers : {:}".format(torch.cuda.device_count())
if 'CUDA_VISIBLE_DEVICES' in os.environ:
info+= "\nCUDA_VISIBLE_DEVICES={:}".format(os.environ['CUDA_VISIBLE_DEVICES'])
if "CUDA_VISIBLE_DEVICES" in os.environ:
info += "\nCUDA_VISIBLE_DEVICES={:}".format(os.environ["CUDA_VISIBLE_DEVICES"])
else:
info += "\nDoes not set CUDA_VISIBLE_DEVICES"
return info
@ -48,17 +53,21 @@ def get_machine_info():
def save_checkpoint(state, filename, logger):
if osp.isfile(filename):
if hasattr(logger, 'log'): logger.log('Find {:} exist, delete is at first before saving'.format(filename))
if hasattr(logger, "log"):
logger.log("Find {:} exist, delete is at first before saving".format(filename))
os.remove(filename)
torch.save(state, filename)
assert osp.isfile(filename), 'save filename : {:} failed, which is not found.'.format(filename)
if hasattr(logger, 'log'): logger.log('save checkpoint into {:}'.format(filename))
assert osp.isfile(filename), "save filename : {:} failed, which is not found.".format(filename)
if hasattr(logger, "log"):
logger.log("save checkpoint into {:}".format(filename))
return filename
def copy_checkpoint(src, dst, logger):
if osp.isfile(dst):
if hasattr(logger, 'log'): logger.log('Find {:} exist, delete is at first before saving'.format(dst))
if hasattr(logger, "log"):
logger.log("Find {:} exist, delete is at first before saving".format(dst))
os.remove(dst)
copyfile(src, dst)
if hasattr(logger, 'log'): logger.log('copy the file from {:} into {:}'.format(src, dst))
if hasattr(logger, "log"):
logger.log("copy the file from {:} into {:}".format(src, dst))

View File

@ -1,8 +1,10 @@
# functions for affine transformation
import math, torch
import math
import torch
import numpy as np
import torch.nn.functional as F
def identity2affine(full=False):
if not full:
parameters = torch.zeros((2, 3))
@ -12,14 +14,17 @@ def identity2affine(full=False):
parameters[0, 0] = parameters[1, 1] = parameters[2, 2] = 1
return parameters
def normalize_L(x, L):
return -1. + 2. * x / (L-1)
return -1.0 + 2.0 * x / (L - 1)
def denormalize_L(x, L):
return (x + 1.0) / 2.0 * (L - 1)
def crop2affine(crop_box, W, H):
assert len(crop_box) == 4, 'Invalid crop-box : {:}'.format(crop_box)
assert len(crop_box) == 4, "Invalid crop-box : {:}".format(crop_box)
parameters = torch.zeros(3, 3)
x1, y1 = normalize_L(crop_box[0], W), normalize_L(crop_box[1], H)
x2, y2 = normalize_L(crop_box[2], W), normalize_L(crop_box[3], H)
@ -31,6 +36,7 @@ def crop2affine(crop_box, W, H):
parameters[2, 2] = 1
return parameters
def scale2affine(scalex, scaley):
parameters = torch.zeros(3, 3)
parameters[0, 0] = scalex
@ -38,6 +44,7 @@ def scale2affine(scalex, scaley):
parameters[2, 2] = 1
return parameters
def offset2affine(offx, offy):
parameters = torch.zeros(3, 3)
parameters[0, 0] = parameters[1, 1] = parameters[2, 2] = 1
@ -45,16 +52,18 @@ def offset2affine(offx, offy):
parameters[1, 2] = offy
return parameters
def horizontalmirror2affine():
parameters = torch.zeros(3, 3)
parameters[0, 0] = -1
parameters[1, 1] = parameters[2, 2] = 1
return parameters
# clockwise rotate image = counterclockwise rotate the rectangle
# degree is between [0, 360]
def rotate2affine(degree):
assert degree >= 0 and degree <= 360, 'Invalid degree : {:}'.format(degree)
assert degree >= 0 and degree <= 360, "Invalid degree : {:}".format(degree)
degree = degree / 180 * math.pi
parameters = torch.zeros(3, 3)
parameters[0, 0] = math.cos(-degree)
@ -64,48 +73,62 @@ def rotate2affine(degree):
parameters[2, 2] = 1
return parameters
# shape is a tuple [H, W]
def normalize_points(shape, points):
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, 'invalid shape : {:}'.format(shape)
assert isinstance(points, torch.Tensor) and (points.shape[0] == 2), 'points are wrong : {:}'.format(points.shape)
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, "invalid shape : {:}".format(
shape
)
assert isinstance(points, torch.Tensor) and (points.shape[0] == 2), "points are wrong : {:}".format(points.shape)
(H, W), points = shape, points.clone()
points[0, :] = normalize_L(points[0, :], W)
points[1, :] = normalize_L(points[1, :], H)
return points
# shape is a tuple [H, W]
def normalize_points_batch(shape, points):
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, 'invalid shape : {:}'.format(shape)
assert isinstance(points, torch.Tensor) and (points.size(-1) == 2), 'points are wrong : {:}'.format(points.shape)
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, "invalid shape : {:}".format(
shape
)
assert isinstance(points, torch.Tensor) and (points.size(-1) == 2), "points are wrong : {:}".format(points.shape)
(H, W), points = shape, points.clone()
x = normalize_L(points[..., 0], W)
y = normalize_L(points[..., 1], H)
return torch.stack((x, y), dim=-1)
# shape is a tuple [H, W]
def denormalize_points(shape, points):
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, 'invalid shape : {:}'.format(shape)
assert isinstance(points, torch.Tensor) and (points.shape[0] == 2), 'points are wrong : {:}'.format(points.shape)
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, "invalid shape : {:}".format(
shape
)
assert isinstance(points, torch.Tensor) and (points.shape[0] == 2), "points are wrong : {:}".format(points.shape)
(H, W), points = shape, points.clone()
points[0, :] = denormalize_L(points[0, :], W)
points[1, :] = denormalize_L(points[1, :], H)
return points
# shape is a tuple [H, W]
def denormalize_points_batch(shape, points):
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, 'invalid shape : {:}'.format(shape)
assert isinstance(points, torch.Tensor) and (points.shape[-1] == 2), 'points are wrong : {:}'.format(points.shape)
assert (isinstance(shape, tuple) or isinstance(shape, list)) and len(shape) == 2, "invalid shape : {:}".format(
shape
)
assert isinstance(points, torch.Tensor) and (points.shape[-1] == 2), "points are wrong : {:}".format(points.shape)
(H, W), points = shape, points.clone()
x = denormalize_L(points[..., 0], W)
y = denormalize_L(points[..., 1], H)
return torch.stack((x, y), dim=-1)
# make target * theta = source
def solve2theta(source, target):
source, target = source.clone(), target.clone()
oks = source[2, :] == 1
assert torch.sum(oks).item() >= 3, 'valid points : {:} is short'.format(oks)
if target.size(0) == 2: target = torch.cat((target, oks.unsqueeze(0).float()), dim=0)
assert torch.sum(oks).item() >= 3, "valid points : {:} is short".format(oks)
if target.size(0) == 2:
target = torch.cat((target, oks.unsqueeze(0).float()), dim=0)
source, target = source[:, oks], target[:, oks]
source, target = source.transpose(1, 0), target.transpose(1, 0)
assert source.size(1) == target.size(1) == 3
@ -115,11 +138,12 @@ def solve2theta(source, target):
theta = X_[:3, :2].transpose(1, 0)
return theta
# shape = [H,W]
def affine2image(image, theta, shape):
C, H, W = image.size()
theta = theta[:2, :].unsqueeze(0)
grid_size = torch.Size([1, C, shape[0], shape[1]])
grid = F.affine_grid(theta, grid_size)
affI = F.grid_sample(image.unsqueeze(0), grid, mode='bilinear', padding_mode='border')
affI = F.grid_sample(image.unsqueeze(0), grid, mode="bilinear", padding_mode="border")
return affI.squeeze(0)

View File

@ -1,5 +1,6 @@
import torch
def obtain_accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)

View File

@ -33,17 +33,18 @@ def get_model_infos(model, shape):
# cache_inputs = torch.zeros(*shape).cuda()
# cache_inputs = torch.zeros(*shape)
cache_inputs = torch.rand(*shape)
if next(model.parameters()).is_cuda: cache_inputs = cache_inputs.cuda()
if next(model.parameters()).is_cuda:
cache_inputs = cache_inputs.cuda()
# print_log('In the calculating function : cache input size : {:}'.format(cache_inputs.size()), log)
with torch.no_grad():
_____ = model(cache_inputs)
FLOPs = compute_average_flops_cost(model) / 1e6
Param = count_parameters_in_MB(model)
if hasattr(model, 'auxiliary_param'):
if hasattr(model, "auxiliary_param"):
aux_params = count_parameters_in_MB(model.auxiliary_param())
print ('The auxiliary params of this model is : {:}'.format(aux_params))
print ('We remove the auxiliary params from the total params ({:}) when counting'.format(Param))
print("The auxiliary params of this model is : {:}".format(aux_params))
print("We remove the auxiliary params from the total params ({:}) when counting".format(Param))
Param = Param - aux_params
# print_log('FLOPs : {:} MB'.format(FLOPs), log)
@ -61,7 +62,6 @@ def add_flops_counting_methods( model ):
return model
def compute_average_flops_cost(model):
"""
A method that will be available after add_flops_counting_methods() is called on a desired net object.
@ -71,9 +71,12 @@ def compute_average_flops_cost(model):
flops_sum = 0
# or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d) \
for module in model.modules():
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
or isinstance(module, torch.nn.Conv1d) \
or hasattr(module, 'calculate_flop_self'):
if (
isinstance(module, torch.nn.Conv2d)
or isinstance(module, torch.nn.Linear)
or isinstance(module, torch.nn.Conv1d)
or hasattr(module, "calculate_flop_self")
):
flops_sum += module.__flops__
return flops_sum / batches_count
@ -83,7 +86,7 @@ def pool_flops_counter_hook(pool_module, inputs, output):
batch_size = inputs[0].size(0)
kernel_size = pool_module.kernel_size
out_C, output_height, output_width = output.shape[1:]
assert out_C == inputs[0].size(1), '{:} vs. {:}'.format(out_C, inputs[0].size())
assert out_C == inputs[0].size(1), "{:} vs. {:}".format(out_C, inputs[0].size())
overall_flops = batch_size * out_C * output_height * output_width * kernel_size * kernel_size
pool_module.__flops__ += overall_flops
@ -97,7 +100,7 @@ def self_calculate_flops_counter_hook(self_module, inputs, output):
def fc_flops_counter_hook(fc_module, inputs, output):
batch_size = inputs[0].size(0)
xin, xout = fc_module.in_features, fc_module.out_features
assert xin == inputs[0].size(1) and xout == output.size(1), 'IO=({:}, {:})'.format(xin, xout)
assert xin == inputs[0].size(1) and xout == output.size(1), "IO=({:}, {:})".format(xin, xout)
overall_flops = batch_size * xin * xout
if fc_module.bias is not None:
overall_flops += batch_size * xout
@ -147,48 +150,53 @@ def batch_counter_hook(module, inputs, output):
def add_batch_counter_hook_function(module):
if not hasattr(module, '__batch_counter_handle__'):
if not hasattr(module, "__batch_counter_handle__"):
handle = module.register_forward_hook(batch_counter_hook)
module.__batch_counter_handle__ = handle
def add_flops_counter_variable_or_reset(module):
if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
or isinstance(module, torch.nn.Conv1d) \
or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d) \
or hasattr(module, 'calculate_flop_self'):
if (
isinstance(module, torch.nn.Conv2d)
or isinstance(module, torch.nn.Linear)
or isinstance(module, torch.nn.Conv1d)
or isinstance(module, torch.nn.AvgPool2d)
or isinstance(module, torch.nn.MaxPool2d)
or hasattr(module, "calculate_flop_self")
):
module.__flops__ = 0
def add_flops_counter_hook_function(module):
if isinstance(module, torch.nn.Conv2d):
if not hasattr(module, '__flops_handle__'):
if not hasattr(module, "__flops_handle__"):
handle = module.register_forward_hook(conv2d_flops_counter_hook)
module.__flops_handle__ = handle
elif isinstance(module, torch.nn.Conv1d):
if not hasattr(module, '__flops_handle__'):
if not hasattr(module, "__flops_handle__"):
handle = module.register_forward_hook(conv1d_flops_counter_hook)
module.__flops_handle__ = handle
elif isinstance(module, torch.nn.Linear):
if not hasattr(module, '__flops_handle__'):
if not hasattr(module, "__flops_handle__"):
handle = module.register_forward_hook(fc_flops_counter_hook)
module.__flops_handle__ = handle
elif isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
if not hasattr(module, '__flops_handle__'):
if not hasattr(module, "__flops_handle__"):
handle = module.register_forward_hook(pool_flops_counter_hook)
module.__flops_handle__ = handle
elif hasattr(module, 'calculate_flop_self'): # self-defined module
if not hasattr(module, '__flops_handle__'):
elif hasattr(module, "calculate_flop_self"): # self-defined module
if not hasattr(module, "__flops_handle__"):
handle = module.register_forward_hook(self_calculate_flops_counter_hook)
module.__flops_handle__ = handle
def remove_hook_function(module):
hookers = ['__batch_counter_handle__', '__flops_handle__']
hookers = ["__batch_counter_handle__", "__flops_handle__"]
for hooker in hookers:
if hasattr(module, hooker):
handle = getattr(module, hooker)
handle.remove()
keys = ['__flops__', '__batch_counter__', '__flops__'] + hookers
keys = ["__flops__", "__batch_counter__", "__flops__"] + hookers
for ckey in keys:
if hasattr(module, ckey): delattr(module, ckey)
if hasattr(module, ckey):
delattr(module, ckey)

View File

@ -1,66 +1,70 @@
import os
class GPUManager():
queries = ('index', 'gpu_name', 'memory.free', 'memory.used', 'memory.total', 'power.draw', 'power.limit')
class GPUManager:
queries = ("index", "gpu_name", "memory.free", "memory.used", "memory.total", "power.draw", "power.limit")
def __init__(self):
all_gpus = self.query_gpu(False)
def get_info(self, ctype):
cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(ctype)
cmd = "nvidia-smi --query-gpu={} --format=csv,noheader".format(ctype)
lines = os.popen(cmd).readlines()
lines = [line.strip('\n') for line in lines]
lines = [line.strip("\n") for line in lines]
return lines
def query_gpu(self, show=True):
num_gpus = len( self.get_info('index') )
num_gpus = len(self.get_info("index"))
all_gpus = [{} for i in range(num_gpus)]
for query in self.queries:
infos = self.get_info(query)
for idx, info in enumerate(infos):
all_gpus[idx][query] = info
if 'CUDA_VISIBLE_DEVICES' in os.environ:
CUDA_VISIBLE_DEVICES = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
if "CUDA_VISIBLE_DEVICES" in os.environ:
CUDA_VISIBLE_DEVICES = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
selected_gpus = []
for idx, CUDA_VISIBLE_DEVICE in enumerate(CUDA_VISIBLE_DEVICES):
find = False
for gpu in all_gpus:
if gpu['index'] == CUDA_VISIBLE_DEVICE:
assert not find, 'Duplicate cuda device index : {}'.format(CUDA_VISIBLE_DEVICE)
if gpu["index"] == CUDA_VISIBLE_DEVICE:
assert not find, "Duplicate cuda device index : {}".format(CUDA_VISIBLE_DEVICE)
find = True
selected_gpus.append(gpu.copy())
selected_gpus[-1]['index'] = '{}'.format(idx)
assert find, 'Does not find the device : {}'.format(CUDA_VISIBLE_DEVICE)
selected_gpus[-1]["index"] = "{}".format(idx)
assert find, "Does not find the device : {}".format(CUDA_VISIBLE_DEVICE)
all_gpus = selected_gpus
if show:
allstrings = ''
allstrings = ""
for gpu in all_gpus:
string = '| '
string = "| "
for query in self.queries:
if query.find('memory') == 0: xinfo = '{:>9}'.format(gpu[query])
else: xinfo = gpu[query]
string = string + query + ' : ' + xinfo + ' | '
allstrings = allstrings + string + '\n'
if query.find("memory") == 0:
xinfo = "{:>9}".format(gpu[query])
else:
xinfo = gpu[query]
string = string + query + " : " + xinfo + " | "
allstrings = allstrings + string + "\n"
return allstrings
else:
return all_gpus
def select_by_memory(self, numbers=1):
all_gpus = self.query_gpu(False)
assert numbers <= len(all_gpus), 'Require {} gpus more than you have'.format(numbers)
assert numbers <= len(all_gpus), "Require {} gpus more than you have".format(numbers)
alls = []
for idx, gpu in enumerate(all_gpus):
free_memory = gpu['memory.free']
free_memory = free_memory.split(' ')[0]
free_memory = gpu["memory.free"]
free_memory = free_memory.split(" ")[0]
free_memory = int(free_memory)
index = gpu['index']
index = gpu["index"]
alls.append((free_memory, index))
alls.sort(reverse=True)
alls = [int(alls[i][1]) for i in range(numbers)]
return sorted(alls)
"""
if __name__ == '__main__':
manager = GPUManager()

View File

@ -1,4 +1,5 @@
import os, hashlib
import os
import hashlib
def get_md5_file(file_path, post_truncated=5):
@ -9,7 +10,7 @@ def get_md5_file(file_path, post_truncated=5):
md5_hash.update(content)
digest = md5_hash.hexdigest()
else:
raise ValueError('[get_md5_file] {:} does not exist'.format(file_path))
raise ValueError("[get_md5_file] {:} does not exist".format(file_path))
if post_truncated is None:
return digest
else:

View File

@ -10,7 +10,9 @@ from log_utils import time_string
def evaluate_one_shot(model, xloader, api, cal_mode, seed=111):
print ('This is an old version of codes to use NAS-Bench-API, and should be modified to align with the new version. Please contact me for more details if you use this function.')
print(
"This is an old version of codes to use NAS-Bench-API, and should be modified to align with the new version. Please contact me for more details if you use this function."
)
weights = deepcopy(model.state_dict())
model.train(cal_mode)
with torch.no_grad():
@ -22,24 +24,28 @@ def evaluate_one_shot(model, xloader, api, cal_mode, seed=111):
random.shuffle(archs)
for idx, arch in enumerate(archs):
arch_index = api.query_index_by_arch(arch)
metrics = api.get_more_info(arch_index, 'cifar10-valid', None, False, False)
gt_accs_10_valid.append( metrics['valid-accuracy'] )
metrics = api.get_more_info(arch_index, 'cifar10', None, False, False)
gt_accs_10_test.append( metrics['test-accuracy'] )
metrics = api.get_more_info(arch_index, "cifar10-valid", None, False, False)
gt_accs_10_valid.append(metrics["valid-accuracy"])
metrics = api.get_more_info(arch_index, "cifar10", None, False, False)
gt_accs_10_test.append(metrics["test-accuracy"])
select_logits = []
for i, node_info in enumerate(arch.nodes):
for op, xin in node_info:
node_str = '{:}<-{:}'.format(i+1, xin)
node_str = "{:}<-{:}".format(i + 1, xin)
op_index = model.op_names.index(op)
select_logits.append(logits[model.edge2index[node_str], op_index])
cur_prob = sum(select_logits).item()
probs.append(cur_prob)
cor_prob_valid = np.corrcoef(probs, gt_accs_10_valid)[0, 1]
cor_prob_test = np.corrcoef(probs, gt_accs_10_test)[0, 1]
print ('{:} correlation for probabilities : {:.6f} on CIFAR-10 validation and {:.6f} on CIFAR-10 test'.format(time_string(), cor_prob_valid, cor_prob_test))
print(
"{:} correlation for probabilities : {:.6f} on CIFAR-10 validation and {:.6f} on CIFAR-10 test".format(
time_string(), cor_prob_valid, cor_prob_test
)
)
for idx, arch in enumerate(archs):
model.set_cal_mode('dynamic', arch)
model.set_cal_mode("dynamic", arch)
try:
inputs, targets = next(loader_iter)
except:
@ -52,6 +58,10 @@ def evaluate_one_shot(model, xloader, api, cal_mode, seed=111):
if idx != 0 and (idx % 500 == 0 or idx + 1 == len(archs)):
cor_accs_valid = np.corrcoef(accuracies, gt_accs_10_valid[: idx + 1])[0, 1]
cor_accs_test = np.corrcoef(accuracies, gt_accs_10_test[: idx + 1])[0, 1]
print ('{:} {:05d}/{:05d} mode={:5s}, correlation : accs={:.5f} for CIFAR-10 valid, {:.5f} for CIFAR-10 test.'.format(time_string(), idx, len(archs), 'Train' if cal_mode else 'Eval', cor_accs_valid, cor_accs_test))
print(
"{:} {:05d}/{:05d} mode={:5s}, correlation : accs={:.5f} for CIFAR-10 valid, {:.5f} for CIFAR-10 test.".format(
time_string(), idx, len(archs), "Train" if cal_mode else "Eval", cor_accs_valid, cor_accs_test
)
)
model.load_state_dict(weights)
return archs, probs, accuracies

View File

@ -1,18 +1,17 @@
def split_str2indexes(string: str, max_check: int, length_limit=5):
if not isinstance(string, str):
raise ValueError('Invalid scheme for {:}'.format(string))
raise ValueError("Invalid scheme for {:}".format(string))
srangestr = "".join(string.split())
indexes = set()
for srange in srangestr.split(','):
srange = srange.split('-')
for srange in srangestr.split(","):
srange = srange.split("-")
if len(srange) != 2:
raise ValueError('invalid srange : {:}'.format(srange))
raise ValueError("invalid srange : {:}".format(srange))
if length_limit is not None:
assert len(srange[0]) == len(srange[1]) == length_limit, 'invalid srange : {:}'.format(srange)
assert len(srange[0]) == len(srange[1]) == length_limit, "invalid srange : {:}".format(srange)
srange = (int(srange[0]), int(srange[1]))
if not (0 <= srange[0] <= srange[1] < max_check):
raise ValueError('{:} vs {:} vs {:}'.format(srange[0], srange[1], max_check))
raise ValueError("{:} vs {:} vs {:}".format(srange[0], srange[1], max_check))
for i in range(srange[0], srange[1] + 1):
indexes.add(i)
return indexes

View File

@ -21,11 +21,12 @@ def get_conv2D_Wmats(tensor: np.ndarray) -> List[np.ndarray]:
"""
mats = []
N, M, imax, jmax = tensor.shape
assert N + M >= imax + jmax, 'invalid tensor shape detected: {}x{} (NxM), {}x{} (i,j)'.format(N, M, imax, jmax)
assert N + M >= imax + jmax, "invalid tensor shape detected: {}x{} (NxM), {}x{} (i,j)".format(N, M, imax, jmax)
for i in range(imax):
for j in range(jmax):
w = tensor[:, :, i, j]
if N < M: w = w.T
if N < M:
w = w.T
mats.append(w)
return mats
@ -44,8 +45,11 @@ def glorot_norm_check(W, N, M, rf_size, lower=0.5, upper=1.5):
elif (check1 > lower) & (check1 < upper):
return check1, True
else:
if rf_size > 1: return check2, False
else: return check1, False
if rf_size > 1:
return check2, False
else:
return check1, False
def glorot_norm_fix(w, n, m, rf_size):
"""Apply Glorot Normalization Fix."""
@ -57,15 +61,16 @@ def glorot_norm_fix(w, n, m, rf_size):
def analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms, softranks, normalize, glorot_fix):
results = OrderedDict()
count = len(weights)
if count == 0: return results
if count == 0:
return results
for i, weight in enumerate(weights):
M, N = np.min(weight.shape), np.max(weight.shape)
Q = N / M
results[i] = cur_res = OrderedDict(N=N, M=M, Q=Q)
check, checkTF = glorot_norm_check(weight, N, M, count)
cur_res['check'] = check
cur_res['checkTF'] = checkTF
cur_res["check"] = check
cur_res["checkTF"] = checkTF
# assume receptive field size is count
if glorot_fix:
weight = glorot_norm_fix(weight, N, M, count)
@ -93,18 +98,23 @@ def analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms
cur_res["summary"] = summary
continue
elif max_size > 0 and M > max_size:
summary = "Weight matrix {}/{} ({},{}): Skipping: too big (testing) (>{})".format(i + 1, count, M, N, max_size)
summary = "Weight matrix {}/{} ({},{}): Skipping: too big (testing) (>{})".format(
i + 1, count, M, N, max_size
)
cur_res["summary"] = summary
continue
else:
summary = []
if alphas:
import powerlaw
svd = TruncatedSVD(n_components=M - 1, n_iter=7, random_state=10)
svd.fit(weight.astype(float))
sv = svd.singular_values_
if normalize: evals = sv * sv / N
else: evals = sv * sv
if normalize:
evals = sv * sv / N
else:
evals = sv * sv
lambda_max = np.max(evals)
fit = powerlaw.Fit(evals, xmax=lambda_max, verbose=False)
@ -123,9 +133,10 @@ def analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms
cur_res["logpnorm"] = logpnorm
summary.append(
"Weight matrix {}/{} ({},{}): Alpha: {}, Alpha Weighted: {}, D: {}, pNorm {}".format(i + 1, count, M, N, alpha,
alpha_weighted, D,
logpnorm))
"Weight matrix {}/{} ({},{}): Alpha: {}, Alpha Weighted: {}, D: {}, pNorm {}".format(
i + 1, count, M, N, alpha, alpha_weighted, D, logpnorm
)
)
if lognorms:
norm = np.linalg.norm(weight) # Frobenius Norm
@ -134,14 +145,16 @@ def analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms
cur_res["lognorm"] = lognorm
X = np.dot(weight.T, weight)
if normalize: X = X / N
if normalize:
X = X / N
normX = np.linalg.norm(X) # Frobenius Norm
cur_res["normX"] = normX
lognormX = np.log10(normX)
cur_res["lognormX"] = lognormX
summary.append(
"Weight matrix {}/{} ({},{}): LogNorm: {} ; LogNormX: {}".format(i + 1, count, M, N, lognorm, lognormX))
"Weight matrix {}/{} ({},{}): LogNorm: {} ; LogNormX: {}".format(i + 1, count, M, N, lognorm, lognormX)
)
if softranks:
softrank = norm ** 2 / sv_max ** 2
@ -150,8 +163,9 @@ def analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms
cur_res["softrank"] = softrank
cur_res["softranklog"] = softranklog
cur_res["softranklogratio"] = softranklogratio
summary += "{}. Softrank: {}. Softrank log: {}. Softrank log ratio: {}".format(summary, softrank, softranklog,
softranklogratio)
summary += "{}. Softrank: {}. Softrank log: {}. Softrank log ratio: {}".format(
summary, softrank, softranklog, softranklogratio
)
cur_res["summary"] = "\n".join(summary)
return results
@ -181,7 +195,7 @@ def compute_details(results):
"numofSpikes": "Number of spikes per MP fit",
"ratio_numofSpikes": "aka, percent_mass, Number of spikes / total number of evals",
"softrank_mp": "Softrank for MP fit",
"logpnorm": "alpha pNorm"
"logpnorm": "alpha pNorm",
}
metrics_stats = []
@ -194,8 +208,11 @@ def compute_details(results):
metrics_stats.append("{}_compound_max".format(metric))
metrics_stats.append("{}_compound_avg".format(metric))
columns = ["layer_id", "layer_type", "N", "M", "layer_count", "slice",
"slice_count", "level", "comment"] + [*metrics] + metrics_stats
columns = (
["layer_id", "layer_type", "N", "M", "layer_count", "slice", "slice_count", "level", "comment"]
+ [*metrics]
+ metrics_stats
)
metrics_values = {}
metrics_values_compound = {}
@ -232,8 +249,15 @@ def compute_details(results):
M = summary["M"]
Mtotal += M
data = {"layer_id": layer_id, "layer_type": layer_type, "N": N, "M": M, "slice": slice_id, "level": "SLICE",
"comment": "Slice level"}
data = {
"layer_id": layer_id,
"layer_type": layer_type,
"N": N,
"M": M,
"slice": slice_id,
"level": "SLICE",
"comment": "Slice level",
}
for metric in metrics:
if metric in summary:
value = summary[metric]
@ -242,8 +266,15 @@ def compute_details(results):
compounds[metric].append(value)
data[metric] = value
data = {"layer_id": layer_id, "layer_type": layer_type, "N": Ntotal, "M": Mtotal, "slice_count": slice_count,
"level": "LAYER", "comment": "Layer level"}
data = {
"layer_id": layer_id,
"layer_type": layer_type,
"N": Ntotal,
"M": Mtotal,
"slice_count": slice_count,
"level": "LAYER",
"comment": "Layer level",
}
# Compute the compound value over the slices
for metric, value in compounds.items():
count = len(value)
@ -282,9 +313,17 @@ def compute_details(results):
return final_summary
def analyze(model: nn.Module, min_size=50, max_size=0,
alphas: bool = False, lognorms: bool = True, spectralnorms: bool = False,
softranks: bool = False, normalize: bool = False, glorot_fix: bool = False):
def analyze(
model: nn.Module,
min_size=50,
max_size=0,
alphas: bool = False,
lognorms: bool = True,
spectralnorms: bool = False,
softranks: bool = False,
normalize: bool = False,
glorot_fix: bool = False,
):
"""
Analyze the weight matrices of a model.
:param model: A PyTorch model
@ -311,9 +350,11 @@ def analyze(model: nn.Module, min_size=50, max_size=0,
weights = [module.weight.cpu().detach().numpy()]
else:
weights = get_conv2D_Wmats(module.weight.cpu().detach().numpy())
results = analyze_weights(weights, min_size, max_size, alphas, lognorms, spectralnorms, softranks, normalize, glorot_fix)
results['id'] = index
results['type'] = type(module)
results = analyze_weights(
weights, min_size, max_size, alphas, lognorms, spectralnorms, softranks, normalize, glorot_fix
)
results["id"] = index
results["type"] = type(module)
all_results[index] = results
summary = compute_details(all_results)
return all_results, summary