RK3588-TVM-GPU推理模型

2023-12-22 17:09:43

1.前言

????????之前的博客已经在RK3588上安装了tvm的mali-gpu的版本,我们整理一下思路,本文将从模型的转换和调用两个方面进行讲解,tvm使用的是0.10版本,模型和代码也都是tvm官方的案例。

2.onnx模型转换

????????将ONNX格式的ResNet50-v2模型转换为TVM Runtime支持的形式,并将其编译为一个共享库文件。以下是对代码的解释:

1. 导入库和模块

import onnx
import tvm
import tvm.relay as relay

2. 指定ONNX模型路径和加载模型

model_path = "resnet50-v2-7.onnx"

onnx_model = onnx.load(model_path)

????????指定了ONNX模型的路径,并使用onnx.load加载模型。

3. 设置TVM的目标和目标主机

target = tvm.target.mali(model='rk3588')

target_host = tvm.target.arm_cpu(model='rk3588')

????????设置TVM的目标设备和目标主机:
????????(1)目标设备为Mali GPU(mali(model='rk3588'))
????????(2)目标主机为ARM CPU(arm_cpu(model='rk3588'))

4. 获取输入名称和形状

input_name = "data"

input_shape = (1, 3, 224, 224)

shape_dict = {input_name: input_shape}

????????定义了输入张量的名称(input_name)和形状(input_shape

????????使用netron查看onnx模型的输入名和shape。

5. TVM Relay从ONNX创建模块和参数

mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

????????使用relay.frontend.from_onnx函数将ONNX模型转换为TVM Relay模块和参数。

6. 构建TVM Relay模块

with tvm.transform.PassContext(opt_level=3):
? ?graph, lib, params = relay.build(mod,?
? ?target=tvm.target.Target(target, host=target_host), params=params)

????????使用TVM Relay的relay.build函数将模块编译为图(graph)、库文件(lib),并使用指定的目标设备和目标主机。

7. 导出编译后的共享库文件

libpath = "./resnet.so"

lib.export_library(libpath)

????????将编译后的库文件保存为resnet.so

8. 导出神经网络结构到JSON文件

graph_json_path = "./resnet.json"

with open(graph_json_path, 'w') as fo:

? ? fo.write(graph)

????????将神经网络结构保存为JSON文件(resnet.json)。

9. 导出神经网络模型的权重参数到二进制文件

param_path = "./resnet.params"

with open(param_path, 'wb') as fo:

? ? fo.write(relay.save_param_dict(params))

????????将神经网络模型的权重参数保存为二进制文件(resnet.params)。

10.完整代码

import onnx
import tvm
import tvm.relay as relay

#ONNX model path
model_path = "resnet50-v2-7.onnx"
onnx_model = onnx.load(model_path)


target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')
# 使用netron查看onnx模型的输入名和shape
input_name = "data"
input_shape = (1, 3, 224, 224)
shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with tvm.transform.PassContext(opt_level=3):
   graph, lib, params = relay.build(mod,
                                    target=tvm.target.Target(target, host=target_host),
                                    params=params)

libpath = "./resnet.so"
lib.export_library(libpath)

# 下面的函数导出我们神经网络的结构,使用json文件保存
graph_json_path = "./resnet.json"
with open(graph_json_path, 'w') as fo:
   fo.write(graph)

# 下面的函数中我们导出神经网络模型的权重参数
param_path = "./resnet.params"
with open(param_path, 'wb') as fo:
   fo.write(relay.save_param_dict(params))
   

3. 模型推理

1.导入所需的库和模块

import onnx

import numpy as np

from scipy.special import softmax

from PIL import Image

import tvm

import tvm.relay as relay

from tvm.contrib import graph_executor

import timeit

????????导入了ONNX、NumPy、SciPy、PIL、TVM等相关库和模块。

2.准备输入图像

img_path = "kitten.jpg"

resized_image = Image.open(img_path).resize((224, 224))

img_data = np.asarray(resized_image).astype("float32")

????????从文件加载图像,并将其调整为224x224像素大小。

3. 图像预处理

img_data = np.transpose(img_data, (2, 0, 1))

imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))

norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev img_data = np.expand_dims(norm_img_data, axis=0)

????????调整图像的格式和进行归一化,以符合ResNet50模型的输入要求。

4. 设置TVM的目标和目标主机

target = tvm.target.mali(model='rk3588')

target_host = tvm.target.arm_cpu(model='rk3588')

????????这里设置了TVM的目标设备和目标主机,与上一个代码片段中相同。

5. 加载模型结构、权重和参数

libpath = "./resnet.so"

graph_json_path = "./resnet.json"

param_path = "./resnet.params"

loaded_json = open(graph_json_path).read()

loaded_lib = tvm.runtime.load_module(libpath)

loaded_params = bytearray(open(param_path, "rb").read())

????????从之前保存的文件中加载神经网络的结构(graph_json_path)、库文件(libpath)、权重参数(param_path)。

6.创建TVM图执行器

dev = tvm.device(str(target), 0)

ctx = dev

module = graph_executor.create(loaded_json, loaded_lib, ctx) module.load_params(loaded_params) module.set_input(input_name, img_data)

????????创建一个TVM图执行器,加载模型结构和权重,并设置输入数据。

7. 运行模型

ftimer = module.module.time_evaluator("run", dev, number=1, repeat=30)

prof_res = np.array(ftimer().results) * 1

????????使用TVM的时间评估器运行模型,并记录执行时间。

8. 输出模型推理结果

module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()

????????获取模型输出。

9. 加载标签文件

labels_path = "synset.txt"

with open(labels_path, "r") as f:

? ? labels = [l.rstrip() for l in f]

????????从文件加载标签。

10. 后处理和输出预测结果

scores = softmax(tvm_output)

scores = np.squeeze(scores)

ranks = np.argsort(scores)[::-1]

for rank in ranks[0:5]:

? ? print("class='%s' with probability=%f" % (labels[rank], scores[rank]))

????????使用Softmax进行后处理,并输出前五个类别的预测结果及其概率。

11.完整代码

import onnx
import numpy as np
from scipy.special import softmax
from PIL import Image
import tvm
import tvm.relay as relay
from tvm.contrib import graph_executor
import timeit

img_path  = "kitten.jpg"
# Resize it to 224x224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")

# Our input image is in HWC layout while ONNX expects CHW input, so convert the array
img_data = np.transpose(img_data, (2, 0, 1))

# Normalize according to the ImageNet input specification
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev

# Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(norm_img_data, axis=0)

target = tvm.target.mali(model='rk3588')
target_host = tvm.target.arm_cpu(model='rk3588')

input_name = "data"
shape_dict = {input_name: img_data.shape}

libpath = "./resnet.so"
# 下面的函数导出我们神经网络的结构,使用json文件保存
graph_json_path = "./resnet.json"
# 下面的函数中我们导出神经网络模型的权重参数
param_path = "./resnet.params"
    
loaded_json = open(graph_json_path).read()
loaded_lib = tvm.runtime.load_module(libpath)
loaded_params = bytearray(open(param_path, "rb").read())

dev = tvm.device(str(target), 0)
ctx = dev
module = graph_executor.create(loaded_json, loaded_lib, ctx)
module.load_params(loaded_params)
module.set_input(input_name, img_data)



ftimer = module.module.time_evaluator("run", dev, number=1, repeat=30)
prof_res = np.array(ftimer().results) * 1  # multiply 1000 for converting to millisecond
print(
    "%-20s %-19s (%s)" % ("resnet50", "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res))
)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()


labels_path = "synset.txt"

with open(labels_path, "r") as f:
    labels = [l.rstrip() for l in f]

# Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))

12.推理结果

4.完整的代码和模型下载

完整代码icon-default.png?t=N7T8https://download.csdn.net/download/weixin_43999691/88652823?spm=1001.2014.3001.5503

文章来源:https://blog.csdn.net/weixin_43999691/article/details/135154252
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。