CV

Model performance benchmark(FPS)

demomodel_nameinputs_shape    dtypeRK3566
RK3568
RK3562RK3588
@single_core
RK3576
@single_core
RV1109RV1126RK1808
mobilenetmobilenetv2-12[1, 3, 224, 224]INT8180.7281.3450.7467.0212.9322.3170.3
resnetresnet50-v2-7[1, 3, 224, 224]INT837.954.9110.199.024.436.237.1
yolov5yolov5s_relu[1, 3, 640, 640]INT825.533.266.165.020.229.237.2
yolov5n[1, 3, 640, 640]INT839.747.482.5112.736.353.261.2
yolov5s[1, 3, 640, 640]INT819.323.648.457.513.620.028.2
yolov5m[1, 3, 640, 640]INT88.610.820.923.75.88.513.3
yolov6yolov6n[1, 3, 640, 640]INT848.856.4106.4109.137.856.866.8
yolov6s[1, 3, 640, 640]INT815.217.336.435.010.816.324.1
yolov6m[1, 3, 640, 640]INT87.28.617.817.45.68.311.5
yolov7yolov7-tiny[1, 3, 640, 640]INT827.936.572.774.815.422.437.2
yolov7[1, 3, 640, 640]INT84.65.911.413.03.34.87.4
yolov8yolov8n[1, 3, 640, 640]INT834.040.973.590.224.035.442.3
yolov8s[1, 3, 640, 640]INT815.118.438.040.88.913.119.1
yolov8m[1, 3, 640, 640]INT86.58.216.216.73.95.89.1
yolov8_obbyolov8n-obb[1, 3, 640, 640]INT833.941.374.090.225.137.342.8
yolov10yolov10n[1, 3, 640, 640]INT820.734.161.280.2///
yolov10s[1, 3, 640, 640]INT810.316.933.839.9///
yolo11yolo11n[1, 3, 640, 640]INT820.634.060.077.911.717.017.6
yolo11s[1, 3, 640, 640]INT810.216.733.038.25.07.38.4
yolo11m[1, 3, 640, 640]INT84.66.512.714.62.84.05.1
yoloxyolox_s[1, 3, 640, 640]INT815.218.337.141.510.615.723.0
yolox_m[1, 3, 640, 640]INT86.68.216.017.64.66.810.7
ppyoloeppyoloe_s[1, 3, 640, 640]INT817.120.032.541.311.216.421.1
ppyoloe_m[1, 3, 640, 640]INT87.89.215.817.85.27.79.4
yolo_worldyolo_world_v2s[1, 3, 640, 640]INT87.49.622.122.3///
clip_text[1, 20]FP1629.867.495.863.5///
yolov8_poseyolov8n-pose[1, 3, 640, 640]INT822.631.055.966.8///
deeplabv3deeplab-v3-plus-mobilenet-v2[1, 513, 513, 1]INT810.921.434.039.410.113.04.4
yolov5_segyolov5n-seg[1, 3, 640, 640]INT832.238.569.388.328.642.249.6
yolov5s-seg[1, 3, 640, 640]INT815.018.136.841.69.614.022.5
yolov5m-seg[1, 3, 640, 640]INT86.88.416.418.04.76.810.8
yolov8_segyolov8n-seg[1, 3, 640, 640]INT827.833.060.871.118.627.632.9
yolov8s-seg[1, 3, 640, 640]INT811.714.128.930.86.69.814.6
yolov8m-seg[1, 3, 640, 640]INT85.26.412.612.73.14.66.9
ppsegppseg_lite_1024x512[1, 3, 512, 512]INT85.913.935.733.618.427.120.9
mobilesammobilesam_encoder_tiny[1, 3, 448, 448]FP161.06.610.011.9///
mobilesam_decoder[1, 1, 112, 112]FP1624.369.6116.4108.6///
RetinaFaceRetinaFace_mobile320[1, 3, 320, 320]INT8156.4300.8227.2470.5144.8212.5198.5
RetinaFace_resnet50_320[1, 3, 320, 320]INT818.726.949.256.614.620.824.6
LPRNetlprnet[1, 3, 24, 94]FP16143.2420.6586.4647.830.6(INT8)47.6(INT8)30.1(INT8)
PPOCR-Detppocrv4_det[1, 3, 480, 480]INT822.128.050.764.311.016.114.2
PPOCR-Recppocrv4_rec[1, 3, 48, 320]FP1619.554.373.996.81.01.66.7
lite_transformerlite-transformer-encoder-16embedding-256, token-16FP16337.5725.8867.6784.122.735.498.3
lite-transformer-decoder-16embedding-256, token-16FP16142.5252.0343.8272.348.065.8109.9
clipclip_images[1, 3, 224, 224]FP162.33.46.56.7///
clip_text[1, 20]FP1629.766.696.063.7///
wav2vec2wav2vec2_base_960h_20s20s audioFP16RTF
0.817
RTF
0.323
RTF
0.133
RTF
0.073
///
whisperwhisper_base_20s20s audioFP16RTF
1.178
RTF
0.420
RTF
0.215
RTF
0.218
///
zipformerzipformer-bilingual-zh-en-tstreaming audioFP16RTF
0.196
RTF
0.116
RTF
0.065
RTF
0.082
///
yamnetyamnet_3s3s audioFP16RTF
0.013
RTF
0.008
RTF
0.004
RTF
0.005
///
mms_ttsmms_tts_eng_200token-200FP16RTF
0.311
RTF
0.138
RTF
0.069
RTF
0.069
///
  • This performance data are collected based on the maximum NPU frequency of each platform.
  • This performance data calculate the time-consuming of model inference. Does not include the time-consuming of pre-processing and post-processing if not specified.
  • / means currently not support.