RK3588-TVM-GPU推理模型
1.前言
????????之前的博客已经在RK3588上安装了tvm的mali-gpu的版本,我们整理一下思路,本文将从模型的转换和调用两个方面进行讲解,tvm使用的是0.10版本,模型和代码也都是tvm官方的案例。
2.onnx模型转换
????????将ONNX格式的ResNet50-v2模型转换为TVM Runtime支持的形式,并将其编译为一个共享库文件。以下是对代码的解释:
1. 导入库和模块
import onnx
import tvm
import tvm.relay as relay
2. 指定ONNX模型路径和加载模型
model_path = "resnet50-v2-7.onnx"
onnx_model = onnx.load(model_path)
????????指定了ONNX模型的路径,并使用onnx.load
加载模型。
3. 设置TVM的目标和目标主机
target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')
????????设置TVM的目标设备和目标主机:
????????(1)目标设备为Mali GPU(mali(model='rk3588'))
????????(2)目标主机为ARM CPU(arm_cpu(model='rk3588'))
4. 获取输入名称和形状
input_name = "data"
input_shape = (1, 3, 224, 224)
shape_dict = {input_name: input_shape}
????????定义了输入张量的名称(input_name
)和形状(input_shape
)
????????使用netron查看onnx模型的输入名和shape。
5. TVM Relay从ONNX创建模块和参数
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
????????使用relay.frontend.from_onnx
函数将ONNX模型转换为TVM Relay模块和参数。
6. 构建TVM Relay模块
with tvm.transform.PassContext(opt_level=3):
? ?graph, lib, params = relay.build(mod,?
? ?target=tvm.target.Target(target, host=target_host), params=params)
????????使用TVM Relay的relay.build
函数将模块编译为图(graph
)、库文件(lib
),并使用指定的目标设备和目标主机。
7. 导出编译后的共享库文件
libpath = "./resnet.so"
lib.export_library(libpath)
????????将编译后的库文件保存为resnet.so
。
8. 导出神经网络结构到JSON文件
graph_json_path = "./resnet.json"
with open(graph_json_path, 'w') as fo:
? ? fo.write(graph)
????????将神经网络结构保存为JSON文件(resnet.json
)。
9. 导出神经网络模型的权重参数到二进制文件
param_path = "./resnet.params"
with open(param_path, 'wb') as fo:
? ? fo.write(relay.save_param_dict(params))
????????将神经网络模型的权重参数保存为二进制文件(resnet.params
)。
10.完整代码
import onnx
import tvm
import tvm.relay as relay
#ONNX model path
model_path = "resnet50-v2-7.onnx"
onnx_model = onnx.load(model_path)
target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')
# 使用netron查看onnx模型的输入名和shape
input_name = "data"
input_shape = (1, 3, 224, 224)
shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build(mod,
target=tvm.target.Target(target, host=target_host),
params=params)
libpath = "./resnet.so"
lib.export_library(libpath)
# 下面的函数导出我们神经网络的结构,使用json文件保存
graph_json_path = "./resnet.json"
with open(graph_json_path, 'w') as fo:
fo.write(graph)
# 下面的函数中我们导出神经网络模型的权重参数
param_path = "./resnet.params"
with open(param_path, 'wb') as fo:
fo.write(relay.save_param_dict(params))
3. 模型推理
1.导入所需的库和模块
import onnx
import numpy as np
from scipy.special import softmax
from PIL import Image
import tvm
import tvm.relay as relay
from tvm.contrib import graph_executor
import timeit
????????导入了ONNX、NumPy、SciPy、PIL、TVM等相关库和模块。
2.准备输入图像:
img_path = "kitten.jpg"
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")
????????从文件加载图像,并将其调整为224x224像素大小。
3. 图像预处理
img_data = np.transpose(img_data, (2, 0, 1))
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev img_data = np.expand_dims(norm_img_data, axis=0)
????????调整图像的格式和进行归一化,以符合ResNet50模型的输入要求。
4. 设置TVM的目标和目标主机
target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')
????????这里设置了TVM的目标设备和目标主机,与上一个代码片段中相同。
5. 加载模型结构、权重和参数
libpath = "./resnet.so"
graph_json_path = "./resnet.json"
param_path = "./resnet.params"
loaded_json = open(graph_json_path).read()
loaded_lib = tvm.runtime.load_module(libpath)
loaded_params = bytearray(open(param_path, "rb").read())
????????从之前保存的文件中加载神经网络的结构(graph_json_path
)、库文件(libpath
)、权重参数(param_path
)。
6.创建TVM图执行器
dev = tvm.device(str(target), 0)
ctx = dev
module = graph_executor.create(loaded_json, loaded_lib, ctx) module.load_params(loaded_params) module.set_input(input_name, img_data)
????????创建一个TVM图执行器,加载模型结构和权重,并设置输入数据。
7. 运行模型
ftimer = module.module.time_evaluator("run", dev, number=1, repeat=30)
prof_res = np.array(ftimer().results) * 1
????????使用TVM的时间评估器运行模型,并记录执行时间。
8. 输出模型推理结果
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
????????获取模型输出。
9. 加载标签文件
labels_path = "synset.txt"
with open(labels_path, "r") as f:
? ? labels = [l.rstrip() for l in f]
????????从文件加载标签。
10. 后处理和输出预测结果
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
? ? print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
????????使用Softmax进行后处理,并输出前五个类别的预测结果及其概率。
11.完整代码
import onnx
import numpy as np
from scipy.special import softmax
from PIL import Image
import tvm
import tvm.relay as relay
from tvm.contrib import graph_executor
import timeit
img_path = "kitten.jpg"
# Resize it to 224x224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")
# Our input image is in HWC layout while ONNX expects CHW input, so convert the array
img_data = np.transpose(img_data, (2, 0, 1))
# Normalize according to the ImageNet input specification
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev
# Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(norm_img_data, axis=0)
target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')
input_name = "data"
shape_dict = {input_name: img_data.shape}
libpath = "./resnet.so"
# 下面的函数导出我们神经网络的结构,使用json文件保存
graph_json_path = "./resnet.json"
# 下面的函数中我们导出神经网络模型的权重参数
param_path = "./resnet.params"
loaded_json = open(graph_json_path).read()
loaded_lib = tvm.runtime.load_module(libpath)
loaded_params = bytearray(open(param_path, "rb").read())
dev = tvm.device(str(target), 0)
ctx = dev
module = graph_executor.create(loaded_json, loaded_lib, ctx)
module.load_params(loaded_params)
module.set_input(input_name, img_data)
ftimer = module.module.time_evaluator("run", dev, number=1, repeat=30)
prof_res = np.array(ftimer().results) * 1 # multiply 1000 for converting to millisecond
print(
"%-20s %-19s (%s)" % ("resnet50", "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res))
)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
labels_path = "synset.txt"
with open(labels_path, "r") as f:
labels = [l.rstrip() for l in f]
# Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
12.推理结果
4.完整的代码和模型下载
完整代码https://download.csdn.net/download/weixin_43999691/88652823?spm=1001.2014.3001.5503
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!