{"id":15553574,"url":"https://github.com/xiaosu-zhu/mcquic","last_synced_at":"2025-10-12T18:30:16.082Z","repository":{"id":41430223,"uuid":"325442344","full_name":"xiaosu-zhu/McQuic","owner":"xiaosu-zhu","description":"Repository of CVPR'22 paper \"Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression\"","archived":false,"fork":false,"pushed_at":"2023-01-03T03:35:11.000Z","size":13556,"stargazers_count":106,"open_issues_count":6,"forks_count":12,"subscribers_count":1,"default_branch":"main","last_synced_at":"2024-04-03T13:20:55.301Z","etag":null,"topics":["computer-vision","cvpr2022","image-compression","image-processing","pytorch"],"latest_commit_sha":null,"homepage":"https://huggingface.co/spaces/xiaosu-zhu/McQuic","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/xiaosu-zhu.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2020-12-30T03:12:49.000Z","updated_at":"2024-03-17T01:36:08.000Z","dependencies_parsed_at":"2023-02-01T05:01:02.696Z","dependency_job_id":null,"html_url":"https://github.com/xiaosu-zhu/McQuic","commit_stats":null,"previous_names":[],"tags_count":67,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/xiaosu-zhu%2FMcQuic","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/xiaosu-zhu%2FMcQuic/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/xiaosu-zhu%2FMcQuic/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/xiaosu-zhu%2FMcQuic/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/xiaosu-zhu","download_url":"https://codeload.github.com/xiaosu-zhu/McQuic/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":236261752,"owners_count":19120767,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["computer-vision","cvpr2022","image-compression","image-processing","pytorch"],"created_at":"2024-10-02T14:38:47.381Z","updated_at":"2025-10-12T18:30:15.099Z","avatar_url":"https://github.com/xiaosu-zhu.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic#gh-light-mode-only\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/McQuic-light.svg#gh-light-mode-only\" alt=\"McQuic\" title=\"McQuic\" width=\"45%\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic#gh-dark-mode-only\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/McQuic-dark.svg#gh-dark-mode-only\" alt=\"McQuic\" title=\"McQuic\" width=\"45%\"/\u003e\n  \u003c/a\u003e\n  \u003cbr/\u003e\n  \u003cspan\u003e\n    \u003ci\u003ea.k.a.\u003c/i\u003e \u003cb\u003e\u003ci\u003eM\u003c/i\u003e\u003c/b\u003eulti-\u003cb\u003e\u003ci\u003ec\u003c/i\u003e\u003c/b\u003eodebook \u003cb\u003e\u003ci\u003eQu\u003c/i\u003e\u003c/b\u003eantizers for neural \u003cb\u003e\u003ci\u003ei\u003c/i\u003e\u003c/b\u003emage \u003cb\u003e\u003ci\u003ec\u003c/i\u003e\u003c/b\u003eompression\n  \u003c/span\u003e\n\u003c/p\u003e\n\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://www.python.org/\" target=\"_blank\"\u003e\n    \u003cimg src=\"https://img.shields.io/badge/python-3670A0?style=for-the-badge\u0026logo=python\u0026logoColor=ffdd54\" alt=\"Python\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://pytorch.org/\" target=\"_blank\"\u003e\n    \u003cimg src=\"https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=for-the-badge\u0026logo=PyTorch\u0026logoColor=white\" alt=\"PyTorch\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic/stargazers\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/stars/xiaosu-zhu/McQuic?logo=github\u0026style=for-the-badge\" alt=\"Github stars\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic/network/members\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/forks/xiaosu-zhu/McQuic?logo=github\u0026style=for-the-badge\" alt=\"Github forks\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic/blob/main/LICENSE\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/license/xiaosu-zhu/McQuic?logo=github\u0026style=for-the-badge\" alt=\"Github license\"/\u003e\n  \u003c/a\u003e\n\u003c/p\u003e\n\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://github.com/xiaosu-zhu/McQuic/actions/workflows/test-all.yml\"\u003e\n    \u003cimg src=\"https://github.com/xiaosu-zhu/McQuic/actions/workflows/test-all.yml/badge.svg\" alt=\"All tests\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://anaconda.org/xiaosu-zhu/mcquic\" target=\"_blank\"\u003e\n    \u003cimg src=\"https://img.shields.io/conda/v/xiaosu-zhu/mcquic?label=mcquic\" alt=\"Conda package\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://anaconda.org/xiaosu-zhu/mcquic\" target=\"_blank\"\u003e\n    \u003cimg src=\"https://img.shields.io/conda/dn/xiaosu-zhu/mcquic\" alt=\"Downloads\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"https://huggingface.co/spaces/xiaosu-zhu/McQuic\" target=\"_blank\"\u003e\n    \u003cimg src=\"https://img.shields.io/badge/dynamic/json?color=red\u0026label=Hugging%20Face%20Space\u0026query=%24.runtime.stage\u0026url=https%3A%2F%2Fhuggingface.co%2Fapi%2Fspaces%2Fxiaosu-zhu%2FMcQuic\" alt=\"Demo\"/\u003e\n  \u003c/a\u003e\n\u003c/p\u003e\n\n\n\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cb\u003e🥳Our paper will be presented at CVPR 2022!🥳\u003c/b\u003e\n\u003c/p\u003e\n\u003cbr/\u003e\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"localhost#gh-light-mode-only\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/papertitle-light.svg#gh-light-mode-only\" alt=\"Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression\" title=\"Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression\" width=\"100%\"/\u003e\n  \u003c/a\u003e\n  \u003ca href=\"localhost#gh-dark-mode-only\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/papertitle-dark.svg#gh-dark-mode-only\" alt=\"Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression\" title=\"Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression\" width=\"100%\"/\u003e\n  \u003c/a\u003e\n\u003c/p\u003e\n\u003cp align=\"center\"\u003e\u003ca href=\"localhost\" target=\"_blank\"\u003eCVF Open Access\u003c/a\u003e | \u003ca href=\"https://arxiv.org/abs/2203.10897\" target=\"_blank\"\u003earXiv\u003c/a\u003e | \u003ca href=\"#citation\"\u003eBibTex\u003c/a\u003e | \u003ca href=\"https://huggingface.co/spaces/xiaosu-zhu/McQuic\" target=\"_blank\"\u003eDemo\u003c/a\u003e\u003c/p\u003e\n\n\n\n\u003cbr/\u003e\n\u003cbr/\u003e\n\u003cbr/\u003e\n\n**Mc*****Quic*** is a deep image compressor.\n\n**Features**:\n* Solid performance and super-fast coding speed (See [Reference Models](#reference-models)).\n* Cross-platform support (Linux-64, Windows-64 and macOS-64, macOS-arm64).\n* You could try the interactive demo in the [HuggingFace Space](https://huggingface.co/spaces/xiaosu-zhu/McQuic)!\n\n**Techs**:\n\nThe **Mc*****Quic*** hold rich multi-codebooks to quantize visual features and restore images by these quantized features. Similar ideas are presented in SHA [[1](#SHA)], VQ-VAE [[2](#VQ-VAE)], VQ-GAN [[3](#VQ-GAN)], *etc*. We summarize these as vectorized priors, and our method extends these ideas to a ***unified multivariate Gaussian mixture***, to perform high-quality, low-latency image compression.\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/priors-light.svg#gh-light-mode-only\" alt=\"Vectorized prior\" title=\"Vectorized prior\" width=\"100%\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/priors-dark.svg#gh-dark-mode-only\" alt=\"Vectorized prior\" title=\"Vectorized prior\" width=\"100%\"\u003e\n    \u003cspan\u003e\u003cb\u003eFigure 1. Operational diagrams of different methods.\u003c/b\u003e\u003c/span\u003e\n\u003c/p\u003e\n\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/kodim24-light.png#gh-light-mode-only\" alt=\"kodim24.png\" title=\"kodim24.png\" width=\"100%\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/kodim24-dark.png#gh-dark-mode-only\" alt=\"kodim24.png\" title=\"kodim24.png\" width=\"100%\"\u003e\n    \u003cspan\u003e\u003cb\u003eFigure 2. Comparisons with traditional codecs on an image from Kodak dataset.\u003c/b\u003e\u003c/span\u003e\n\u003c/p\u003e\n\n\u003c!--ts--\u003e\n* [Quick Start](#quick-start)\n   * [Requirements](#requirements)\n   * [Conda (Recommended)](#conda-recommended)\n   * [Docker](#docker)\n   * [Install Manually (for dev)](#install-manually-for-dev)\n   * [(\u003cem\u003e\u003cstrong\u003eOptional\u003c/strong\u003e\u003c/em\u003e) Install NVIDIA/Apex](#optional-install-nvidiaapex)\n* [Reference Models](#reference-models)\n* [Train a New Model](#train-a-new-model)\n   * [Requirements](#requirements-1)\n   * [Configs](#configs)\n   * [Prepare a Dataset](#prepare-a-dataset)\n   * [Training](#training)\n   * [Test](#test)\n* [Implement MCQ by yourself](#implement-mcq-by-yourself)\n* [Contribute to this Repository](#contribute-to-this-repository)\n* [To-do List](#to-do-list)\n* [Detailed framework](#detailed-framework)\n* [References and License](#references-and-license)\n   * [References](#references)\n   * [Citation](#citation)\n   * [Copyright](#copyright)\n\n\u003c!-- Created by https://github.com/ekalinin/github-markdown-toc --\u003e\n\u003c!-- Added by: runner, at: Tue May 21 11:00:48 UTC 2024 --\u003e\n\n\u003c!--te--\u003e\n\n\n\n# Quick Start\nIt is easy (with a GPU, or CPU if you like) to try our model. I would give a quick guide to help you compress an image and restore it.\n\n## Requirements\nTo run the model, your device needs to meet following requirements.\n\n* Hardware\n  * a CUDA-enabled GPU (`≥ 8GiB VRAM`, Driver version `≥ 450.80.02`)\n  * If you don't have GPU, running models on CPU may be slower.\n  * `≥ 8GiB RAM`\n* OS\n  * I've tested all features on `Ubuntu`, other platforms should also work. If not, please [file bugs](#contribute-to-this-repository).\n\n## Conda (Recommended)\nInstall this package is very easy with a `conda` environment installed, *e.g.* [Miniconda](https://docs.conda.io/en/latest/miniconda.html). I recommend you to install it to a new virtual environment directly by:\n```bash\n# Install a clean pytorch with CUDA support\nconda create -n [ENV_NAME] python=3.9 \"pytorch\u003e=1.11,\u003c2\" \"torchvision\u003e=0.12,\u003c1\" cudatoolkit -c pytorch\n# Install mcquic and other dependencies\nconda install -n [ENV_NAME] mcquic -c xiaosu-zhu -c conda-forge\nconda activate [ENV_NAME]\n```\n\n\u003ca href=\"#\"\u003e\n  \u003cimage src=\"https://img.shields.io/badge/NOTE-yellow?style=for-the-badge\" alt=\"NOTE\"/\u003e\n\u003c/a\u003e\n\n\u003e Above command install packages with `CUDA` support. If you just want to run it on CPU, please use `cpuonly` other than `cudatoolkit` in the first command.\n\n\u003ca href=\"#\"\u003e\n  \u003cimage src=\"https://img.shields.io/badge/NOTE-yellow?style=for-the-badge\" alt=\"NOTE\"/\u003e\n\u003c/a\u003e\n\n\u003e Since there is no proper version of torchvision now for Apple M1, you need to change channel from `pytorch` to `conda-forge` in the first command.\n\n\n* Compress images\n```bash\nmcquic\n```\n```console\nUsage: mcquic [OPTIONS] COMMAND [ARGS]...\n\nOptions:\n  -v, --version  Print version info.\n  -h, --help     Show this message and exit.\n\nCommands:\n  -*        Compress/restore a file.\n  dataset   Create training set from `images` dir to `output` dir.\n  train     Train a model.\n  validate  Validate a trained model from `path` by images from `images`...\n\n```\n```bash\nmcquic --help\n```\n```console\nUsage: mcquic - [OPTIONS] INPUT [OUTPUT]\n\n  Compress/restore a file.\n\n  Args:\n\n      input (str): Input file path. If input is an image, compress it. If\n      input is a `.mcq` file, restore it.\n\n      output (optional, str): Output file path or dir. If not provided, this\n      program will only print compressor information of input file.\n\nOptions:\n  -D, --debug        Set logging level to DEBUG to print verbose messages.\n  -q, --quiet        Silence all messages, this option has higher priority to\n                     `-D/--debug`.\n  -qp INTEGER RANGE  Quantization parameter. Higher means better image quality\n                     and larger size.  [default: 2; 1\u003c=x\u003c=13]\n  --local FILE       Use a local model path instead of download by `qp`.\n  --disable-gpu      Use pure CPU to perform compression. This will be slow.\n  --mse              Use model optimized for PSNR other than MsSSIM.\n  --crop             Crop the image to align feature patches. Edges of image\n                     are cutted though, compressed binary will be smaller.\n  -h, --help         Show this message and exit.\n\n```\n```bash\nmcquic -qp 2 path/to/an/image path/to/output.mcq\n```\n* Decompress images\n```bash\n# `-qp` is not necessary. Since this arg is written to `output.mcq`.\nmcquic path/to/output.mcq path/to/restored.png\n```\n\n\n## Docker\nI also build [`docker` images](https://github.com/xiaosu-zhu/McQuic/pkgs/container/mcquic) for you to get away from environment issues.\n\nTry with the latest docker image:\n```bash\ndocker pull ghcr.io/xiaosu-zhu/mcquic:latest\n# or nightly build\n# docker pull ghcr.io/xiaosu-zhu/mcquic:nightly\n```\n\nThe entrypoint of this container is set to `mcquic` itself. So, you can directly use it as `mcquic` main program to execute.\n```bash\ndocker run ghcr.io/xiaosu-zhu/mcquic:latest --help\n```\n\nTo compress/restore images, you need to mount native files into the container. Therefore, a working example forms as follows:\n```bash\n# `someimage.png` is located in `path/to/some/folder`. And this folder will be mounted at `/workspace/workdir`.\ndocker run -v path/to/some/folder:/workspace/workdir ghcr.io/xiaosu-zhu/mcquic:latest /workspace/workdir/someimage.png /workspace/workdir/output.mcq\ndocker run -v path/to/some/folder:/workspace/workdir ghcr.io/xiaosu-zhu/mcquic:latest /workspace/workdir/output.mcq /workspace/workdir/restored.png\n```\n\n## Install Manually (for dev)\nThis way enables your full access to this repo for modifying. Also, if you want to go on, a `conda` environment is needed, *e.g.* [Miniconda](https://docs.conda.io/en/latest/miniconda.html).\n\n* Clone this repository\n```bash\ngit clone https://github.com/xiaosu-zhu/McQuic.git \u0026\u0026 cd McQuic\n```\n* Create a virtual env `mcquic` and install all packages by\n```powershell\n./install.sh  # for POSIX with bash\n.\\install.ps1 # for Windows with Anaconda PowerShell\n```\n\nNow you should in the `mcquic` virtual environment. If not, please activate it by `conda activate mcquic`.\n\n* Compress images\n```bash\nmcquic --help\nmcquic -qp 2 assets/sample.png assets/compressed.mcq\n```\n* Decompress images\n```bash\n# `-qp` is not necessary. Since this arg is written to `output.mcq`.\nmcquic assets/compressed.mcq assets/restored.png\n```\nAnd check outputs: [`assets/compressed.mcq`](https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/compressed.mcq) and [`assets/restored.png`](https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/restored.png).\n\n## (***Optional***) Install `NVIDIA/Apex`\n\n[`NVIDIA/Apex`](https://github.com/NVIDIA/apex) is an additional package **required** for training. If you want to [**develop, contribute**](#contribute-to-this-repository), or [**train a new model**](#train-a-new-model), please ensure you've installed `NVIDIA/Apex` by following snippets.\n```bash\ngit clone https://github.com/NVIDIA/apex \u0026\u0026 cd apex\npip install -v --disable-pip-version-check --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" ./\n```\n\n\u003ca href=\"#\"\u003e\n  \u003cimage src=\"https://img.shields.io/badge/NOTE-yellow?style=for-the-badge\" alt=\"NOTE\"/\u003e\n\u003c/a\u003e\n\n\u003e If you are using [Docker images](#docker), this step is not necessary.\n\n\u003ca href=\"#\"\u003e\n  \u003cimage src=\"https://img.shields.io/badge/NOTE-yellow?style=for-the-badge\" alt=\"NOTE\"/\u003e\n\u003c/a\u003e\n\n\u003e Please make sure you've installed it in the correct virtual environment.\n\n\n\u003ca href=\"#\"\u003e\n  \u003cimage src=\"https://img.shields.io/badge/NOTE-yellow?style=for-the-badge\" alt=\"NOTE\"/\u003e\n\u003c/a\u003e\n\n\u003e For more information such as building toolchains, please refer to [their repository](https://github.com/NVIDIA/apex).\n\n\n# Reference Models\nI've released one pretrained model (Sorry, currently I don't have much free GPUs to train models). You could fetch them by specifying `-qp [Model_NO]`. Following is the pretrained model list (Others ***TBA***):\n\n\n| Model No. \t| Channel \t| M \t|        K        \t| Throughput (Encode/Decode) \t| Avg.BPP \t|\n|:---------:\t|:-------:\t|:-:\t|:---------------:\t|:--------------------------:\t|:-------:\t|\n|         - \t|     -   \t| - \t|               - \t|              -             \t|    -    \t|\n|         2 \t|   128   \t| 2 \t| [8192,2048,512] \t|   25.45 Mpps / 22.03 Mpps  \t|  0.1277 \t|\n|         - \t|     -   \t| - \t|               - \t|              -             \t|    -    \t|\n|         12 \t|   192   \t| 12 \t| [8192,2048,512] \t|   11.07 Mpps / 10.21 Mpps  \t|    -    \t|\n\nThe coding throughput is tested on a NVIDIA RTX 3090. Image file I/O, model loading, *etc.* are not included in the test. Throughput will be further increased by `5%~15%` if you convert models to `TorchScript`. However, it is not trivial since conversion involves entropy coder, which is a cpp extension. So, I'm not going to implement it.\n\nThe main slow-down from small models to large models is caused by channel `128 -\u003e 192`.\n- **`Mpps = Mega-pixels per second`**\n- **`BPP = Bits per pixel`**\n\n# Train a New Model\nPlease ensure you've installed [`NVIDIA/Apex`](#optional-install-nvidiaapex). To train models, here are minimal and recommended system requirements.\n\n## Requirements\n* Minimal\n  * `RAM ≥ 64GiB`\n  * `VRAM ≥ 12GiB`\n* Recommended\n  * `VRAM ≥ 24GiB`\n  * Better if you have `≥4-way` NVIDIA RTX 3090s or faster GPUs.\n\n## Configs\nThe folder [configs](configs) provides example config `example.yaml` to train models. Please refer to [configs/README.md](configs/README.md) for more info.\n\n## Prepare a Dataset\nBefore training models, you need to prepare an image dataset. It is free to pick any images to form dataset, as long as the image-size is `≥512x512`.\n\n* To build a training dataset, please put all images in a folder (allow for sub-folders), then run\n```bash\nmcquic dataset --help\n```\n```console\nUsage: mcquic dataset [OPTIONS] IMAGES OUTPUT\n\n  Create training set from `images` dir to `output` dir.\n\n  Args:\n\n      images (str): All training images folder, allow sub-folders.\n\n      output (str): Output dir to create training set.\n\nOptions:\n  -D, --debug  Set logging level to DEBUG to print verbose messages.\n  -q, --quiet  Silence all messages, this option has higher priority to\n               `-D/--debug`.\n  -h, --help   Show this message and exit.\n\n```\n```bash\nmcquic dataset train_images mcquic_dataset\n```\nto build a `lmdb` dataset for `mcquic` to read.\n\n* Then, you could prepare a training config *e.g.* `configs/train.yaml`, and don't forget to speify dataset path.\n```yaml\n# `configs/train.yaml`\n...\ntrainSet: mcquic_dataset # path to the training dataset.\nvalSet: val_images # path to a folder of validation images.\nsavePath: saved # path to a folder to save checkpoints.\n...\n```\nwhere `trainSet` and `valSet` can be any relative or absolute paths, and `savePath` is a folder for saving checkpoints and logs.\n\nIn this example, the final folder structure is shown below:\n\n```yaml\n. # A nice folder\n├─ 📂configs\n│   ...\n│   └── 📄train.yaml\n├── 📄README.md # this readme\n├── 📂saved # saved models apprear here\n├── 📂train_images # a lot of training images\n│   ├── 📂ImageNet\n│   |   ├── 📂folder1 # a lot of images\n│   |   ├── 🖼️image1.png\n│   |   ...\n│   ├── 📂COCO\n│   |   ├── 🖼️image1.png\n│   |   ├── 🖼️image2.png\n│   |   ...\n|   ...\n├── 📂mcquic_dataset # generated training dataset\n|   ├── 📀data.mdb\n|   ├── 📀lock.mdb\n|   └── 📄metadata.json\n└── 📂val_images # a lot of validation images\n    ├── 🖼️image1.png\n    ├── 🖼️image2.png\n    ...\n```\n\n## Training\n* To train a new model, run\n```bash\nmcquic train --help\n```\n```console\nUsage: mcquic train [OPTIONS] [CONFIG]\n\n  Train a model.\n\n  Args:\n\n      config (str): Config file (yaml) path. If `-r/--resume` is present but\n      config is still given, then this config will be used to update the\n      resumed training.\n\nOptions:\n  -D, --debug        Set logging level to DEBUG to print verbose messages.\n  -q, --quiet        Silence all messages, this option has higher priority to\n                     `-D/--debug`.\n  -r, --resume FILE  `.ckpt` file path to resume training.\n  -h, --help         Show this message and exit.\n\n```\n```bash\nmcquic train configs/train.yaml\n```\nand saved model is located in `saved/mcquic_dataset/latest`.\n* To resume an interuptted training, run\n```bash\nmcquic train -r\n```\n, or\n```bash\nmcquic train -r configs/train.yaml\n```\nif you want to use an updated config (e.g. tuned learning rate, modified hyper-parameters) to resume training.\n\n\n## Test\nYou could use any save checkpoints (usually located in above `savePath`) to validate the performance. For example\n```bash\nmcquic validate --help\n```\n```console\nUsage: python -m mcquic.validate [OPTIONS] PATH IMAGES [OUTPUT]\n\n  Validate a trained model from `path` by images from `images` dir, and\n  publish a final state_dict to `output` path.\n\n  Args:\n\n      path (str): Saved checkpoint path.\n\n      images (str): Validation images folder.\n\n      output (str): Dir to save all restored images.\n\nOptions:\n  -D, --debug        Set logging level to DEBUG to print verbose messages.\n  -q, --quiet        Silence all messages, this option has higher priority to\n                     `-D/--debug`.\n  -e, --export PATH  Path to export the final model that is compatible with\n                     main program.\n  -h, --help         Show this message and exit.\n\n```\n```bash\nmcquic validate -e path/to/final/model path/to/a/checkpoint path/to/images/folder path/to/output/folder\n```\n\nAnd the output \"final/model\" is compatible with the main program `mcquic`, you could directly use this local model to perform compression. Try:\n```bash\nmcquic --local path/to/final/model assets/sample.png assets/compressed.mcq\n# `--local` is not necessary. Since this arg is written to `output.mcq`.\nmcquic assets/compressed.mcq assets/restored.png\n```\nIf you think your model is awesome, please don't hasitate to [Contribute to this Repository](#contribute-to-this-repository)!\n\n\n\n# Implement MCQ by yourself\nA minimal implementation of the multi-codebook quantizer comes up with (please refer to [quantizer.py](./mcquic/modules/quantizer.py#L61) for notes):\n\n```python\nclass Quantizer(nn.Module):\n    \"\"\"\n    Quantizer with `m` sub-codebooks,\n        `k` codewords for each, and\n        `n` total channels.\n    Args:\n        m (int): Number of sub-codebooks.\n        k (int): Number of codewords for each sub-codebook.\n        n (int): Number of channels of latent variables.\n    \"\"\"\n    def __init__(self, m: int, k: int, n: int):\n        super().__init__()\n        # A codebook, feature dim `d = n // m`.\n        self._codebook = nn.Parameter(torch.empty(m, k, n // m))\n        self._initParameters()\n\n    def _initParameters(self):\n        nn.init.normal_(self._codebook, std=math.sqrt(2 / (5 * n / m)))\n\n    def forward(self, x: Tensor, t: float = 1.0) -\u003e (Tensor, Tensor):\n        \"\"\"\n        Module forward.\n        Args:\n            x (Tensor): Latent variable with shape [b, n, h, w].\n            t (float, 1.0): Temperature for Gumbel softmax.\n        Return:\n            Tensor: Quantized latent with shape [b, n, h, w].\n            Tensor: Binary codes with shape [b, m, h, w].\n        \"\"\"\n        b, _, h, w = x.shape\n        # [b, m, d, h, w]\n        x = x.reshape(b, len(self._codebook), -1, h, w)\n        # [b, m, 1, h, w], square of x\n        x2 = (x ** 2).sum(2, keepdim=True)\n        # [m, k, 1, 1], square of codebook\n        c2 = (self._codebook ** 2).sum(-1, keepdim=True)[..., None]\n        # [b, m, d, h, w] * [m, k, d] -sum-\u003e [b, m, k, h, w], dot product between x and codebook\n        inter = torch.einsum(\"bmdhw,mkd-\u003ebmkhw\", x, self._codebook)\n        # [b, m, k, h, w], pairwise L2-distance\n        distance = x2 + c2 - 2 * inter\n        # [b, m, k, h, w], distance as logits to sample\n        sample = F.gumbel_softmax(-distance, t, hard=True, dim=2)\n        # [b, m, d, h, w], use sample to find codewords\n        quantized = torch.einsum(\"bmkhw,mkd-\u003ebmdhw\", sample, self._codebook)\n        # back to [b, n, h, w]\n        quantized = quantized.reshape(b, -1, h, w)\n        # [b, n, h, w], [b, m, h, w], quantizeds and binaries\n        return quantized, sample.argmax(2)\n```\n\n\n# Contribute to this Repository\nIt will be very nice if you want to check your new ideas or add new functions 😊. You will need to install `mcquic` by [**Docker**](#docker-recommended) or [**manually (with optional step)**](#install-manually-for-dev). Just like other git repos, before raising issues or pull requests, please take a thorough look at [issue templates](https://github.com/xiaosu-zhu/McQuic/issues/new/choose).\n\n\n# To-do List\n* `mcquic service`\n* More pretrained model\n\n# Detailed framework\nThanks for your attention!❤️ Here are details in the paper.\n\nFollowing previous works, we build the compression model as an AutoEncoder. Bottleneck of encoder (analysis transform) outputs a small feature map and is quantized by *multi-codebook vector-quantization* other than scalar-quantization. Quantizers are cascaded to effectively estimate latent distribution.\n\n\u003cp align=\"center\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/framework-light.svg#gh-light-mode-only\" alt=\"Framework\" title=\"Framework\" width=\"100%\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/paper/framework-dark.svg#gh-dark-mode-only\" alt=\"Framework\" title=\"Framework\" width=\"100%\"\u003e\n    \u003cspan\u003e\u003cb\u003eFigure 3. Left: Overall framework. Right: Structure of a quantizer.\u003c/b\u003e\u003c/span\u003e\n\u003c/p\u003e\n\nRight part of above figure shows detailed structure of our proposed quantizer.\n\n# References and License\n## References\n[\u003ca id=\"SHA\"\u003e1\u003c/a\u003e] Agustsson, Eirikur, et al. \"Soft-to-hard vector quantization for end-to-end learning compressible representations.\" NeurIPS 2017.\n\n[\u003ca id=\"VQ-VAE\"\u003e2\u003c/a\u003e] Van Den Oord, Aaron, and Oriol Vinyals. \"Neural discrete representation learning.\" NeurIPS 2017.\n\n[\u003ca id=\"VQ-GAN\"\u003e3\u003c/a\u003e] Esser, Patrick, Robin Rombach, and Bjorn Ommer. \"Taming transformers for high-resolution image synthesis.\" CVPR 2021.\n\n## Citation\nTo cite our paper, please use following BibTex:\n```plain\n@inproceedings{McQuic,\n  author    = {Xiaosu Zhu and\n               Jingkuan Song and\n               Lianli Gao and\n               Feng Zheng and\n               Heng Tao Shen},\n  title     = {Unified Multivariate Gaussian Mixture for Efficient Neural Image Compression},\n  booktitle = {CVPR},\n  pages     = {17612--17621}\n  year      = {2022}\n}\n```\n\n## Copyright\n\n**Fonts**:\n* [**Source Sans Pro**](https://fonts.adobe.com/fonts/source-sans). © 2010, 2012 Adobe Systems Incorporated, SIL Open Font License.\n* [**Flash Rogers 3D**](https://www.iconian.com/index.html). © 2007 Iconian Fonts, donationware.\n* [**Cambria Math**](https://docs.microsoft.com/en-us/typography/font-list/cambria-math). © 2017 Microsoft Corporation. All rights reserved.\n* [**Times New Roman**](https://docs.microsoft.com/en-us/typography/font-list/times-new-roman). © 2017 The Monotype Corporation. All Rights Reserved.\n* [**Caramel and Vanilla**](http://www.foundmyfont.com/). © 2017 FOUND MY FONT LTD. All Rights Reserved.\n\n**Pictures**:\n* [**kodim24.png**](http://r0k.us/graphics/kodak/kodim24.html) by Alfons Rudolph, Kodak Image Dataset.\n* [**assets/sample.png**](https://unsplash.com/photos/hLxqYJspAkE) by Ales Krivec, CLIC Professional valid set.\n\n\n**Third-party repos**:\n\n| Repos                                                                          | License |\n|-------------------------------------------------------------------------------:|---------|\n| [PyTorch](https://pytorch.org/)                                                | [BSD-style](https://github.com/pytorch/pytorch/blob/master/LICENSE) |\n| [Torchvision](https://pytorch.org/vision/stable/index.html)                    | [BSD-3-Clause](https://github.com/pytorch/vision/blob/main/LICENSE) |\n| [Apex](https://nvidia.github.io/apex/)                                         | [BSD-3-Clause](https://github.com/NVIDIA/apex/blob/master/LICENSE) |\n| [Tensorboard](https://www.tensorflow.org/tensorboard)                          | [Apache-2.0](https://github.com/tensorflow/tensorboard/blob/master/LICENSE) |\n| [Kornia](https://kornia.github.io/)                                            | [Apache-2.0](https://github.com/kornia/kornia/blob/master/LICENSE) |\n| [rich](https://rich.readthedocs.io/en/latest/)                                 | [MIT](https://github.com/Textualize/rich/blob/master/LICENSE) |\n| [python-lmdb](https://lmdb.readthedocs.io/en/release/)                         | [OpenLDAP Version 2.8](https://github.com/jnwatson/py-lmdb/blob/master/LICENSE) |\n| [PyYAML](https://pyyaml.org/)                                                  | [MIT](https://github.com/yaml/pyyaml/blob/master/LICENSE) |\n| [marshmallow](https://marshmallow.readthedocs.io/en/stable/)                   | [MIT](https://github.com/marshmallow-code/marshmallow/blob/dev/LICENSE) |\n| [click](https://click.palletsprojects.com/)                                    | [BSD-3-Clause](https://github.com/pallets/click/blob/main/LICENSE.rst) |\n| [vlutils](https://github.com/VL-Group/vlutils)                                 | [Apache-2.0](https://github.com/VL-Group/vlutils/blob/main/LICENSE) |\n| [MessagePack](https://msgpack.org/)                                            | [Apache-2.0](https://github.com/msgpack/msgpack-python/blob/main/COPYING) |\n| [pybind11](https://pybind11.readthedocs.io/en/stable/)                         | [BSD-style](https://github.com/pybind/pybind11/blob/master/LICENSE) |\n| [CompressAI](https://interdigitalinc.github.io/CompressAI/)                    | [BSD 3-Clause Clear](https://github.com/InterDigitalInc/CompressAI/blob/master/LICENSE) |\n| [Taming-transformer](https://compvis.github.io/taming-transformers/)           | [MIT](https://github.com/CompVis/taming-transformers/blob/master/License.txt) |\n| [marshmallow-jsonschema](https://github.com/fuhrysteve/marshmallow-jsonschema) | [MIT](https://github.com/fuhrysteve/marshmallow-jsonschema/blob/master/LICENSE) |\n| [json-schema-for-humans](https://coveooss.github.io/json-schema-for-humans/#/) | [Apache-2.0](https://github.com/coveooss/json-schema-for-humans/blob/main/LICENSE.md) |\n| [CyclicLR](https://github.com/bckenstler/CLR)                                  | [MIT](https://github.com/bckenstler/CLR/blob/master/LICENSE) |\n| [batch-transforms](https://github.com/pratogab/batch-transforms)               | [MIT](https://github.com/pratogab/batch-transforms/blob/master/LICENSE) |\n| [pytorch-msssim](https://github.com/VainF/pytorch-msssim) | [MIT](https://github.com/VainF/pytorch-msssim/blob/master/LICENSE) |\n| [Streamlit](https://streamlit.io/) | [Apache-2.0](https://github.com/streamlit/streamlit/blob/develop/LICENSE) |\n| [conda](https://docs.conda.io/projects/conda/en/latest/) | [BSD 3-Clause](https://docs.conda.io/en/latest/license.html) |\n\n\n\u003cbr/\u003e\n\u003cbr/\u003e\n\u003cp align=\"center\"\u003e\n\u003cb\u003e\nThis repo is licensed under\n\u003c/b\u003e\n\u003c/p\u003e\n\u003cp align=\"center\"\u003e\n\u003ca href=\"https://www.apache.org/licenses/LICENSE-2.0#gh-light-mode-only\" target=\"_blank\"\u003e\n  \u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/ASF_Logo-light.svg#gh-light-mode-only\" alt=\"The Apache Software Foundation\" title=\"The Apache Software Foundation\" width=\"200px\"/\u003e\n\u003c/a\u003e\n\u003ca href=\"https://www.apache.org/licenses/LICENSE-2.0#gh-dark-mode-only\" target=\"_blank\"\u003e\n\u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/ASF_Logo-light.svg#gh-dark-mode-only\" alt=\"The Apache Software Foundation\" title=\"The Apache Software Foundation\" width=\"200px\"/\u003e\n\u003c/a\u003e\n\u003c/p\u003e\n\u003cp align=\"center\"\u003e\n\u003ca href=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/LICENSE\"\u003e\n  \u003cb\u003eApache License\u003cbr/\u003eVersion 2.0\u003c/b\u003e\n\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cbr/\u003e\n\u003cbr/\u003e\n\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\n\u003ca href=\"https://github.com/yaya-cheng#gh-dark-mode-only\"\u003e\n\u003cimg src=\"https://raw.githubusercontent.com/xiaosu-zhu/McQuic/main/assets/thanks.svg#gh-dark-mode-only\" width=\"250px\"/\u003e\n\u003c/a\u003e\n\u003c/p\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fxiaosu-zhu%2Fmcquic","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fxiaosu-zhu%2Fmcquic","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fxiaosu-zhu%2Fmcquic/lists"}