File size: 15,433 Bytes
#!/usr/bin/env python
"""Console script for lm-quant-toolkit."""

import argparse
import sys

from hqq.core.quantize import BaseQuantizeConfig as HQQQuantConfig

from lm_quant_toolkit.eval.bench import (
    ALL_MODELS,
    AUTOAWQ_CONFIGS,
    BNB_CONFIGS,
    GPTQ_CONFIGS,
    MXQ_CONFIGS,
    do_expermient,
)
from lm_quant_toolkit.eval.bench_vit import ALL_MODELS as ALL_VIT_MODELS
from lm_quant_toolkit.eval.bench_vit import MXQ_CONFIGS as VIT_MXQ_CONFIGS
from lm_quant_toolkit.eval.bench_vit import do_expermient as do_expermient_vit
from lm_quant_toolkit.eval.common import HQQ_CONFIGS
from lm_quant_toolkit.misc.quant_sim import dump_mxq_configs, dump_mxq_objectives
from lm_quant_toolkit.misc.qweight import dump_quant_allocation


def get_parser_args():
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers()

    parser_llm = subparsers.add_parser("llm", help="Evaluate Language Model")
    parser_llm.set_defaults(which="llm")

    parser_llm.add_argument(
        "--model",
        type=str,
        nargs="+",
        default="1",
        help="Model to evaluate",
    )

    parser_llm.add_argument(
        "--algo",
        type=str,
        choices=[
            "fp16",
            "hqq",
            "mxq",
            "gptq",
            "awq",
            "bnb",
        ],
        nargs="+",
        default=None,
        help="Algorithm to evaluate",
    )

    parser_llm.add_argument(
        "--config",
        type=str,
        default=None,
        nargs="+",
        help="Algorithm specific configuration to evaluate",
    )

    parser_llm.add_argument(
        "--task",
        type=str,
        default=None,
        choices=[
            "quant",
            "eval_model_storage",
            "eval_ppl",
            "eval_leaderboard",
        ],
        help="Task to evaluate on.",
    )

    parser_llm.add_argument(
        "--track-cuda-memory",
        action="store_true",
        default=False,
        help="Whether to dump CUDA memory snapshot",
    )

    parser_llm.add_argument(
        "--quant-snapshot-dir",
        default=None,
        type=str,
        help="directory to where quantized snapshots are stored",
    )

    parser_llm.add_argument(
        "--result-dir",
        default=None,
        type=str,
        help="directory to where evaluation results are stored",
    )

    parser_llm.add_argument(
        "--experiment-name",
        default=None,
        type=str,
        help="name of the experiment",
    )

    parser_llm.add_argument(
        "--weight-algo",
        default=None,
        type=str,
        help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`",
    )

    parser_llm.add_argument(
        "--boost-layer",
        nargs="+",
        default=None,
        type=int,
        help="Layers to increase memory budget",
    )

    parser_llm.add_argument(
        "--decline-layer",
        nargs="+",
        default=None,
        type=int,
        help="Layers to decrease memory budget",
    )

    parser_llm.add_argument(
        "--boost-stop",
        default=None,
        type=int,
        help="stops to increase",
    )

    parser_llm.add_argument(
        "--decline-stop",
        default=None,
        type=int,
        help="stops to decrease",
    )

    parser_llm.add_argument(
        "--factor",
        default=2.0,
        type=float,
        help="factor to apply",
    )

    parser_llm.add_argument(
        "--top-m-layer",
        default=1,
        type=int,
        help="The top m most sensitive layers to assign extra memory. 0 means all layers.",
    )

    parser_llm.add_argument(
        "--ablation",
        dest="ablation",
        action="store_true",
        help="Enable ablation mode",
    )
    parser_llm.add_argument(
        "--no-ablation",
        dest="ablation",
        action="store_false",
        help="Disable ablation mode",
    )
    parser_llm.set_defaults(ablation=False)

    parser_vit = subparsers.add_parser("vit", help="Evaluate ViT models")
    parser_vit.set_defaults(which="vit")
    parser_vit.add_argument(
        "--model",
        type=str,
        nargs="+",
        default="1",
        help="Model to evaluate",
    )

    parser_vit.add_argument(
        "--algo",
        type=str,
        choices=[
            "fp16",
            "hqq",
            "bnb",
            "mxq",
            "gptq",
            "awq",
        ],
        nargs="+",
        default=None,
        help="Algorithm to evaluate",
    )

    parser_vit.add_argument(
        "--config",
        type=str,
        default=None,
        nargs="+",
        help="Algorithm specific configuration to evaluate",
    )

    parser_vit.add_argument(
        "--task",
        type=str,
        default=None,
        choices=[
            "eval_linear_probe",
            "eval_zeroshot_cls",
        ],
        help="Task to evaluate on.",
    )

    parser_vit.add_argument(
        "--track-cuda-memory",
        action="store_true",
        default=False,
        help="Whether to dump CUDA memory snapshot",
    )

    parser_vit.add_argument(
        "--quant-snapshot-dir",
        default=None,
        type=str,
        help="directory to where quantized snapshots are stored",
    )

    parser_vit.add_argument(
        "--result-dir",
        default=None,
        type=str,
        help="directory to where evaluation results are stored",
    )

    parser_vit.add_argument(
        "--experiment-name",
        default=None,
        type=str,
        help="name of the experiment",
    )

    parser_vit.add_argument(
        "--weight-algo",
        default=None,
        type=str,
        help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`",
    )

    parser_vit.add_argument(
        "--boost-stop",
        default=None,
        type=int,
        help="stops to increase",
    )

    parser_vit.add_argument(
        "--decline-stop",
        default=None,
        type=int,
        help="stops to decrease",
    )

    parser_vit.add_argument(
        "--factor",
        default=2.0,
        type=float,
        help="factor to apply",
    )

    parser_vit.add_argument(
        "--top-m-layer",
        default=1,
        type=int,
        help="The top m most sensitive layers to assign extra memory. 0 means all layers.",
    )

    parser_vit.add_argument(
        "--ablation",
        dest="ablation",
        action="store_true",
        help="Enable ablation mode",
    )
    parser_vit.add_argument(
        "--no-ablation",
        dest="ablation",
        action="store_false",
        help="Disable ablation mode",
    )

    parser_dump = subparsers.add_parser("dump", help="Dump MXQ meta data")
    parser_dump.set_defaults(which="dump")

    parser_dump.add_argument(
        "--type",
        type=str,
        default=None,
        choices=[
            "objective",
            "quant_config",
            "quant_config_sim",
        ],
        help="Type of data to dump.",
    )
    parser_dump.add_argument(
        "--model",
        type=str,
        nargs="+",
        default="1",
        help="Model to evaluate",
    )
    parser_dump.add_argument(
        "--budget",
        type=str,
        default=None,
        nargs="+",
        help="Bit budgets",
    )
    parser_dump.add_argument(
        "--output-file",
        type=str,
        default="mxq-objectives.csv",
        help="Output file location",
    )
    parser_dump.add_argument(
        "--quant-snapshot-dir",
        default=None,
        type=str,
        help="directory to where quantized snapshots are stored",
    )
    parser_dump.add_argument(
        "--attempt",
        default=None,
        type=str,
        nargs="+",
        help="Experiment attempts",
    )
    parser_dump.add_argument(
        "--weight-algo",
        default=None,
        type=str,
        help="Apply weighted F Norm for MiLP objective, None or `kurt-scaled`",
    )
    parser_dump.add_argument(
        "--factor",
        default=None,
        type=float,
        help="Factor to apply to the prioritized weights",
    )
    parser_dump.add_argument(
        "--config",
        default=None,
        type=str,
        nargs="+",
        help="bit-group configurations",
    )
    parser_dump.add_argument(
        "--calib-dataset",
        default=None,
        type=str,
        nargs="+",
        help="calibration dataset(s) to use",
    )

    args = parser.parse_args()
    return parser, args


def _get_configs(algos, config_names):
    algo_configs = {}
    for algo in algos:
        match algo:
            case "fp16":
                algo_configs[algo] = [("base", {})]
            case "hqq":
                if config_names is None:
                    algo_configs[algo] = HQQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        cfg for cfg in HQQ_CONFIGS if cfg[0] in config_names
                    ]
            case "mxq":
                if config_names is None:
                    algo_configs[algo] = MXQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        (
                            f"{bits:.2f}".replace(".", "_"),
                            HQQQuantConfig(mixed=True, budget=bits, quant_scale=True),
                        )
                        for bits in [float(cfg) for cfg in config_names]
                    ]
            case "awq":
                if config_names is None:
                    algo_configs[algo] = AUTOAWQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        cfg for cfg in AUTOAWQ_CONFIGS if cfg[0] in config_names
                    ]
            case "gptq":
                if config_names is None:
                    algo_configs[algo] = GPTQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        cfg for cfg in GPTQ_CONFIGS if cfg[0] in config_names
                    ]
            case "bnb":
                if config_names is None:
                    algo_configs[algo] = BNB_CONFIGS
                else:
                    algo_configs[algo] = [
                        cfg for cfg in BNB_CONFIGS if cfg[0] in config_names
                    ]

    return algo_configs


def _get_vit_configs(algos, config_names):
    algo_configs = {}
    for algo in algos:
        match algo:
            case "fp16":
                algo_configs[algo] = [("base", {})]
            case "hqq":
                if config_names is None:
                    algo_configs[algo] = HQQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        cfg for cfg in HQQ_CONFIGS if cfg[0] in config_names
                    ]
            case "mxq":
                if config_names is None:
                    algo_configs[algo] = VIT_MXQ_CONFIGS
                else:
                    algo_configs[algo] = [
                        (
                            f"{bits:.2f}".replace(".", "_"),
                            HQQQuantConfig(mixed=True, budget=bits, quant_scale=True),
                        )
                        for bits in [float(cfg) for cfg in config_names]
                    ]
    return algo_configs


def main():
    parser, base = get_parser_args()
    print(base)
    if not hasattr(base, "which"):
        parser.print_help()
        return 2
    try:
        if base.which == "llm":
            main_llm(base)
        elif base.which == "vit":
            main_vit(base)
        elif base.which == "dump":
            main_dump(base)
    except Exception as e:
        print(e)
        return 1
    return 0


def main_llm(args):
    # if len(args.algo) > 1 and args.config is not None:
    #     print("When config is specified, you can only evaluate one algorithm")
    #     return
    configs = _get_configs(args.algo, args.config)
    indicies = [int(m) for m in args.model]
    models = [ALL_MODELS[i] for i in indicies]
    tasks = {algo: {"type": args.task, "configs": configs[algo]} for algo in args.algo}
    experiment_name = args.experiment_name
    if experiment_name is None or len(experiment_name) < 3:
        algo_str = "-".join(args.algo)
        cfg_str = "-".join(args.config)
        experiment_name = f"{args.task}-{algo_str}-{cfg_str}"

    kwargs = {
        "weight_algo": args.weight_algo,
        "boost_layers": args.boost_layer,
        "decline_layers": args.decline_layer,
        "boost_stop": args.boost_stop,
        "decline_stop": args.decline_stop,
        "top_m_layer": args.top_m_layer,
        "ablation": args.ablation,
        "factor": args.factor,
    }
    do_expermient(
        experiment_name,
        models,
        tasks,
        quant_dir=args.quant_snapshot_dir,
        result_dir=args.result_dir,
        track_cuda_memory=args.track_cuda_memory,
        **kwargs,
    )


def main_vit(args):
    configs = _get_vit_configs(args.algo, args.config)
    indicies = [int(m) for m in args.model]
    models = [ALL_VIT_MODELS[i] for i in indicies]
    tasks = {algo: {"type": args.task, "configs": configs[algo]} for algo in args.algo}
    experiment_name = args.experiment_name
    if experiment_name is None or len(experiment_name) < 3:
        algo_str = "-".join(args.algo)
        cfg_str = "-".join(args.config)
        experiment_name = f"{args.task}-{algo_str}-{cfg_str}"
    kwargs = {
        "weight_algo": args.weight_algo,
        "boost_stop": args.boost_stop,
        "decline_stop": args.decline_stop,
        "top_m_layer": args.top_m_layer,
        "ablation": args.ablation,
        "factor": args.factor,
    }
    do_expermient_vit(
        experiment_name,
        models,
        tasks,
        quant_dir=args.quant_snapshot_dir,
        result_dir=args.result_dir,
        track_cuda_memory=args.track_cuda_memory,
        **kwargs,
    )


def main_dump(args):
    if args.type == "objective":
        budgets = args.budget
        csv_fp = args.output_file
        indicies = [int(m) for m in args.model]
        models = [ALL_MODELS[i] for i in indicies]
        dump_mxq_objectives(models, budgets, csv_fp=csv_fp)
    elif args.type == "quant_config":
        quant_dir = args.quant_snapshot_dir
        attempts = args.attempt
        if "hqq" in attempts:
            budgets = args.budget
            algo = "hqq"
        else:
            budgets = [
                f"{bits:.2f}".replace(".", "_")
                for bits in [float(cfg) for cfg in args.budget]
            ]
            algo = "mxq"
        csv_fp = args.output_file
        indicies = [int(m) for m in args.model]
        models = [ALL_MODELS[i] for i in indicies]
        dump_quant_allocation(
            quant_dir,
            models,
            budgets,
            csv_fp=csv_fp,
            attempts=attempts,
            algo=algo,
        )
    elif args.type == "quant_config_sim":
        budgets = [bits for bits in [float(cfg) for cfg in args.budget]]
        algo = "mxq"
        csv_fp = args.output_file
        indicies = [int(m) for m in args.model]
        models = [ALL_MODELS[i] for i in indicies]
        dump_mxq_configs(
            models,
            budgets,
            csv_fp=csv_fp,
            weight_algo=args.weight_algo,
            factor=args.factor,
        )


if __name__ == "__main__":
    sys.exit(main())  # pragma: no cover