AndroidJava/PicQuery/script/model-CLIP/quant_vit_onnx_int8.py

import os
from pathlib import Path

import onnxruntime as ort
from PIL import Image
from onnxruntime.quantization import quantize_dynamic, QuantType, quant_pre_process

from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize, InterpolationMode

model = "clip-image-encoder.onnx"
model_prep = "clip-image-encoder-quant-pre.onnx"
model_quant = "clip-image-encoder-quant-int8.onnx"


def quant():
    cur_path = Path(os.curdir)

    quant_pre_process(model, model_prep)  # preprocess for quantization
    quantize_dynamic(cur_path / model_prep, cur_path / model_quant, weight_type=QuantType.QInt8,
                     nodes_to_exclude=['/conv1/Conv'])


def test():
    ort_session = ort.InferenceSession(model_quant)

    input_name = ort_session.get_inputs()[0].name

    def _convert_image_to_rgb(image: Image):
        return image.convert("RGB")

    def _transform(n_px):
        return Compose([
            Resize(n_px, interpolation=InterpolationMode.NEAREST),
            CenterCrop(n_px),
            _convert_image_to_rgb,
            ToTensor(),
            Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
        ])

    preprocess = _transform(224)

    image_input = preprocess(Image.open("../../image.jpg")).unsqueeze(0).to("cpu")

    outputs = ort_session.run(None, {input_name: image_input.numpy()})

    print(outputs[0])
    return outputs[0]


if __name__ == '__main__':
    quant()
    # res = test()
# python -m onnxruntime.tools.check_onnx_model_mobile_usability clip-text-encoder-quant-int8.onnx
# python -m onnxruntime.tools.convert_onnx_models_to_ort clip-image-encoder-quant-int8.onnx