40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
import numpy as np
|
|
import onnx
|
|
import onnxruntime as ort
|
|
from PIL import Image
|
|
from clip import clip
|
|
from numpy import ndarray
|
|
from onnxruntime import SessionOptions
|
|
from torch import Tensor
|
|
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize, InterpolationMode
|
|
|
|
|
|
# quantized_model = onnx.load('../encoder/quant-int8.onnx')
|
|
|
|
def to_numpy(tensor: Tensor, dtype=None):
|
|
r: ndarray = tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
|
|
r.astype(dtype=dtype)
|
|
return r
|
|
|
|
|
|
options = SessionOptions()
|
|
options.add_session_config_entry("session.load_model_format", "ORT")
|
|
ort_session = ort.InferenceSession("clip-text-encoder-quant-int8.with_runtime_opt.ort",
|
|
sess_options=options, )
|
|
|
|
# 获取模型的输入名称
|
|
input_name = ort_session.get_inputs()[0].name
|
|
|
|
# image_orig = preprocess(i)
|
|
# print(type(image_orig))
|
|
# image_orig.save("last.jpg")
|
|
input_text = "by the sea"
|
|
text_input = clip.tokenize(input_text)
|
|
print(text_input)
|
|
# 运行推理
|
|
outputs = ort_session.run(None, {input_name: to_numpy(text_input)})
|
|
|
|
print(outputs)
|
|
|
|
print(ort_session.get_outputs()[0].name)
|