#!/usr/bin/env python """Console script for lm-quant-toolkit.""" import argparse import sys from hqq.core.quantize import BaseQuantizeConfig as HQQQuantConfig from lm_quant_toolkit.eval.bench import ( ALL_MODELS, AUTOAWQ_CONFIGS, BNB_CONFIGS, GPTQ_CONFIGS, MXQ_CONFIGS, do_expermient, ) from lm_quant_toolkit.eval.bench_vit import ALL_MODELS as ALL_VIT_MODELS from lm_quant_toolkit.eval.bench_vit import MXQ_CONFIGS as VIT_MXQ_CONFIGS from lm_quant_toolkit.eval.bench_vit import do_expermient as do_expermient_vit from lm_quant_toolkit.eval.common import HQQ_CONFIGS from lm_quant_toolkit.misc.quant_sim import dump_mxq_configs, dump_mxq_objectives from lm_quant_toolkit.misc.qweight import dump_quant_allocation def get_parser_args(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers() parser_llm = subparsers.add_parser("llm", help="Evaluate Language Model") parser_llm.set_defaults(which="llm") parser_llm.add_argument( "--model", type=str, nargs="+", default="1", help="Model to evaluate", ) parser_llm.add_argument( "--algo", type=str, choices=[ "fp16", "hqq", "mxq", "gptq", "awq", "bnb", ], nargs="+", default=None, help="Algorithm to evaluate", ) parser_llm.add_argument( "--config", type=str, default=None, nargs="+", help="Algorithm specific configuration to evaluate", ) parser_llm.add_argument( "--task", type=str, default=None, choices=[ "quant", "eval_model_storage", "eval_ppl", "eval_leaderboard", ], help="Task to evaluate on.", ) parser_llm.add_argument( "--track-cuda-memory", action="store_true", default=False, help="Whether to dump CUDA memory snapshot", ) parser_llm.add_argument( "--quant-snapshot-dir", default=None, type=str, help="directory to where quantized snapshots are stored", ) parser_llm.add_argument( "--result-dir", default=None, type=str, help="directory to where evaluation results are stored", ) parser_llm.add_argument( "--experiment-name", default=None, type=str, help="name of the experiment", ) parser_llm.add_argument( "--weight-algo", default=None, type=str, help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`", ) parser_llm.add_argument( "--boost-layer", nargs="+", default=None, type=int, help="Layers to increase memory budget", ) parser_llm.add_argument( "--decline-layer", nargs="+", default=None, type=int, help="Layers to decrease memory budget", ) parser_llm.add_argument( "--boost-stop", default=None, type=int, help="stops to increase", ) parser_llm.add_argument( "--decline-stop", default=None, type=int, help="stops to decrease", ) parser_llm.add_argument( "--factor", default=2.0, type=float, help="factor to apply", ) parser_llm.add_argument( "--top-m-layer", default=1, type=int, help="The top m most sensitive layers to assign extra memory. 0 means all layers.", ) parser_llm.add_argument( "--ablation", dest="ablation", action="store_true", help="Enable ablation mode", ) parser_llm.add_argument( "--no-ablation", dest="ablation", action="store_false", help="Disable ablation mode", ) parser_llm.set_defaults(ablation=False) parser_vit = subparsers.add_parser("vit", help="Evaluate ViT models") parser_vit.set_defaults(which="vit") parser_vit.add_argument( "--model", type=str, nargs="+", default="1", help="Model to evaluate", ) parser_vit.add_argument( "--algo", type=str, choices=[ "fp16", "hqq", "bnb", "mxq", "gptq", "awq", ], nargs="+", default=None, help="Algorithm to evaluate", ) parser_vit.add_argument( "--config", type=str, default=None, nargs="+", help="Algorithm specific configuration to evaluate", ) parser_vit.add_argument( "--task", type=str, default=None, choices=[ "eval_linear_probe", "eval_zeroshot_cls", ], help="Task to evaluate on.", ) parser_vit.add_argument( "--track-cuda-memory", action="store_true", default=False, help="Whether to dump CUDA memory snapshot", ) parser_vit.add_argument( "--quant-snapshot-dir", default=None, type=str, help="directory to where quantized snapshots are stored", ) parser_vit.add_argument( "--result-dir", default=None, type=str, help="directory to where evaluation results are stored", ) parser_vit.add_argument( "--experiment-name", default=None, type=str, help="name of the experiment", ) parser_vit.add_argument( "--weight-algo", default=None, type=str, help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`", ) parser_vit.add_argument( "--boost-stop", default=None, type=int, help="stops to increase", ) parser_vit.add_argument( "--decline-stop", default=None, type=int, help="stops to decrease", ) parser_vit.add_argument( "--factor", default=2.0, type=float, help="factor to apply", ) parser_vit.add_argument( "--top-m-layer", default=1, type=int, help="The top m most sensitive layers to assign extra memory. 0 means all layers.", ) parser_vit.add_argument( "--ablation", dest="ablation", action="store_true", help="Enable ablation mode", ) parser_vit.add_argument( "--no-ablation", dest="ablation", action="store_false", help="Disable ablation mode", ) parser_dump = subparsers.add_parser("dump", help="Dump MXQ meta data") parser_dump.set_defaults(which="dump") parser_dump.add_argument( "--type", type=str, default=None, choices=[ "objective", "quant_config", "quant_config_sim", ], help="Type of data to dump.", ) parser_dump.add_argument( "--model", type=str, nargs="+", default="1", help="Model to evaluate", ) parser_dump.add_argument( "--budget", type=str, default=None, nargs="+", help="Bit budgets", ) parser_dump.add_argument( "--output-file", type=str, default="mxq-objectives.csv", help="Output file location", ) parser_dump.add_argument( "--quant-snapshot-dir", default=None, type=str, help="directory to where quantized snapshots are stored", ) parser_dump.add_argument( "--attempt", default=None, type=str, nargs="+", help="Experiment attempts", ) parser_dump.add_argument( "--weight-algo", default=None, type=str, help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`", ) parser_dump.add_argument( "--factor", default=None, type=float, help="Factor to apply to the prioritized weights", ) parser_dump.add_argument( "--config", default=None, type=str, nargs="+", help="bit-group configurations", ) parser_dump.add_argument( "--calib-dataset", default=None, type=str, nargs="+", help="calibration dataset(s) to use", ) args = parser.parse_args() return parser, args def _get_configs(algos, config_names): algo_configs = {} for algo in algos: match algo: case "fp16": algo_configs[algo] = [("base", {})] case "hqq": if config_names is None: algo_configs[algo] = HQQ_CONFIGS else: algo_configs[algo] = [ cfg for cfg in HQQ_CONFIGS if cfg[0] in config_names ] case "mxq": if config_names is None: algo_configs[algo] = MXQ_CONFIGS else: algo_configs[algo] = [ ( f"{bits:.2f}".replace(".", "_"), HQQQuantConfig(mixed=True, budget=bits, quant_scale=True), ) for bits in [float(cfg) for cfg in config_names] ] case "awq": if config_names is None: algo_configs[algo] = AUTOAWQ_CONFIGS else: algo_configs[algo] = [ cfg for cfg in AUTOAWQ_CONFIGS if cfg[0] in config_names ] case "gptq": if config_names is None: algo_configs[algo] = GPTQ_CONFIGS else: algo_configs[algo] = [ cfg for cfg in GPTQ_CONFIGS if cfg[0] in config_names ] case "bnb": if config_names is None: algo_configs[algo] = BNB_CONFIGS else: algo_configs[algo] = [ cfg for cfg in BNB_CONFIGS if cfg[0] in config_names ] return algo_configs def _get_vit_configs(algos, config_names): algo_configs = {} for algo in algos: match algo: case "fp16": algo_configs[algo] = [("base", {})] case "hqq": if config_names is None: algo_configs[algo] = HQQ_CONFIGS else: algo_configs[algo] = [ cfg for cfg in HQQ_CONFIGS if cfg[0] in config_names ] case "mxq": if config_names is None: algo_configs[algo] = VIT_MXQ_CONFIGS else: algo_configs[algo] = [ ( f"{bits:.2f}".replace(".", "_"), HQQQuantConfig(mixed=True, budget=bits, quant_scale=True), ) for bits in [float(cfg) for cfg in config_names] ] return algo_configs def main(): parser, base = get_parser_args() print(base) if not hasattr(base, "which"): parser.print_help() return 2 try: if base.which == "llm": main_llm(base) elif base.which == "vit": main_vit(base) elif base.which == "dump": main_dump(base) except Exception as e: print(e) return 1 return 0 def main_llm(args): # if len(args.algo) > 1 and args.config is not None: # print("When config is specified, you can only evaluate one algorithm") # return configs = _get_configs(args.algo, args.config) indicies = [int(m) for m in args.model] models = [ALL_MODELS[i] for i in indicies] tasks = {algo: {"type": args.task, "configs": configs[algo]} for algo in args.algo} experiment_name = args.experiment_name if experiment_name is None or len(experiment_name) < 3: algo_str = "-".join(args.algo) cfg_str = "-".join(args.config) experiment_name = f"{args.task}-{algo_str}-{cfg_str}" kwargs = { "weight_algo": args.weight_algo, "boost_layers": args.boost_layer, "decline_layers": args.decline_layer, "boost_stop": args.boost_stop, "decline_stop": args.decline_stop, "top_m_layer": args.top_m_layer, "ablation": args.ablation, "factor": args.factor, } do_expermient( experiment_name, models, tasks, quant_dir=args.quant_snapshot_dir, result_dir=args.result_dir, track_cuda_memory=args.track_cuda_memory, **kwargs, ) def main_vit(args): configs = _get_vit_configs(args.algo, args.config) indicies = [int(m) for m in args.model] models = [ALL_VIT_MODELS[i] for i in indicies] tasks = {algo: {"type": args.task, "configs": configs[algo]} for algo in args.algo} experiment_name = args.experiment_name if experiment_name is None or len(experiment_name) < 3: algo_str = "-".join(args.algo) cfg_str = "-".join(args.config) experiment_name = f"{args.task}-{algo_str}-{cfg_str}" kwargs = { "weight_algo": args.weight_algo, "boost_stop": args.boost_stop, "decline_stop": args.decline_stop, "top_m_layer": args.top_m_layer, "ablation": args.ablation, "factor": args.factor, } do_expermient_vit( experiment_name, models, tasks, quant_dir=args.quant_snapshot_dir, result_dir=args.result_dir, track_cuda_memory=args.track_cuda_memory, **kwargs, ) def main_dump(args): if args.type == "objective": budgets = args.budget csv_fp = args.output_file indicies = [int(m) for m in args.model] models = [ALL_MODELS[i] for i in indicies] dump_mxq_objectives(models, budgets, csv_fp=csv_fp) elif args.type == "quant_config": quant_dir = args.quant_snapshot_dir attempts = args.attempt if "hqq" in attempts: budgets = args.budget algo = "hqq" else: budgets = [ f"{bits:.2f}".replace(".", "_") for bits in [float(cfg) for cfg in args.budget] ] algo = "mxq" csv_fp = args.output_file indicies = [int(m) for m in args.model] models = [ALL_MODELS[i] for i in indicies] dump_quant_allocation( quant_dir, models, budgets, csv_fp=csv_fp, attempts=attempts, algo=algo, ) elif args.type == "quant_config_sim": budgets = [bits for bits in [float(cfg) for cfg in args.budget]] algo = "mxq" csv_fp = args.output_file indicies = [int(m) for m in args.model] models = [ALL_MODELS[i] for i in indicies] dump_mxq_configs( models, budgets, csv_fp=csv_fp, weight_algo=args.weight_algo, factor=args.factor, ) if __name__ == "__main__": sys.exit(main()) # pragma: no cover