PP-DocLayoutV3版面区域检测模型部署-尧图企业网站定制

部署方法使用模型的onnx版本进行推理模型在此处下载。依赖安装pipinstallnumpy opencv-python onnxruntimepython推理此类封装了推理和可视化# DocLayoutV3模型推理和可视化importnumpyasnpimportcv2importonnxruntimeasortimportjsonclassDocLayoutV3Detector:LABEL_MAP[abstract,algorithm,aside_text,chart,content,display_formula,doc_title,figure_title,footer,footer_image,footnote,formula_number,header,header_image,image,inline_formula,number,paragraph_title,reference,reference_content,seal,table,text,vertical_text,vision_footnote]def__init__(self,model_path,input_size(800,800),conf_threshold0.5): Args: model_path: ONNX模型路径 input_size: 模型输入尺寸 conf_threshold: 置信度阈值 self.model_pathmodel_path self.input_sizeinput_size self.conf_thresholdconf_threshold self.sessionort.InferenceSession(model_path)self.input_names[i.nameforiinself.session.get_inputs()]self.output_names[o.nameforoinself.session.get_outputs()]self.colorsself._generate_colors(len(self.LABEL_MAP))def_generate_colors(self,num_classes):生成类别颜色np.random.seed(42)colors[]foriinrange(num_classes):hueint(255*i/num_classes)colorcv2.cvtColor(np.uint8([[[hue,255,255]]]),cv2.COLOR_HSV2BGR)[0][0]colors.append(tuple(int(c)forcincolor))returncolorsdefpreprocess(self,image):orig_h,orig_wimage.shape[:2]target_h,target_wself.input_size scale_htarget_h/orig_h scale_wtarget_w/orig_w resizedcv2.resize(image,(target_w,target_h),interpolationcv2.INTER_LINEAR)imgcv2.cvtColor(resized,cv2.COLOR_BGR2RGB)imgimg.astype(np.float32)/255.0meannp.array([0.485,0.456,0.406],dtypenp.float32)stdnp.array([0.229,0.224,0.225],dtypenp.float32)img(img-mean)/std imgimg.transpose(2,0,1)[np.newaxis,...]returnimg,scale_h,scale_wdefinfer(self,image):input_blob,scale_h,scale_wself.preprocess(image)preprocess_shape[np.array(self.input_size,dtypenp.float32)]input_feed{self.input_names[0]:preprocess_shape,self.input_names[1]:input_blob,self.input_names[2]:[[scale_h,scale_w]]}outputsself.session.run(self.output_names,input_feed)[0]returnoutputsdefpostprocess(self,outputs):boxesoutputs[outputs[:,1]self.conf_threshold]# 按阅读顺序排序boxesboxes[np.argsort(boxes[:,6])]returnboxesdefto_json(self,boxes):layout_results[]forboxinboxes:label_idxint(box[0])scorefloat(box[1])xmin,ymin,xmax,ymaxbox[2:6]label_name(self.LABEL_MAP[label_idx]iflabel_idxlen(self.LABEL_MAP)elseunknown)points[[float(xmin),float(ymin)],[float(xmax),float(ymin)],[float(xmax),float(ymax)],[float(xmin),float(ymax)]]layout_results.append({type:label_name,points:points,confidence:round(score,2)})return{layout_results:layout_results}defvisualize(self,image,boxes,output_pathNone,alpha0.35):vis_imageimage.copy()overlayvis_image.copy()h,wvis_image.shape[:2]font_scalemax(0.5,min(h,w)/1000)# 先绘制半透明框forboxinboxes:label_idxint(box[0])xmin,ymin,xmax,ymaxmap(int,box[2:6])colorself.colors[label_idx%len(self.colors)]# 实心矩形画在 overlaycv2.rectangle(overlay,(xmin,ymin),(xmax,ymax),color,-1)# 半透明融合vis_imagecv2.addWeighted(overlay,alpha,vis_image,1-alpha,0)# 绘制标签forboxinboxes:label_idxint(box[0])scorefloat(box[1])xmin,ymin,xmax,ymaxmap(int,box[2:6])read_orderint(box[6])label_nameself.LABEL_MAP[label_idx]textf{label_name}|{score:.2f}#{read_order}cv2.putText(vis_image,text,(xmin,max(ymin-5,20)),cv2.FONT_HERSHEY_SIMPLEX,font_scale,(255,0,0),1)ifoutput_path:cv2.imwrite(output_path,vis_image)returnvis_image# ---------------------------------------------------------# 对外接口defpredict(self,image):ifisinstance(image,str):imagecv2.imread(image)outputsself.infer(image)boxesself.postprocess(outputs)returnboxesdefpredict_json(self,image):boxesself.predict(image)returnself.to_json(boxes)defpredict_and_visualize(self,image,save_pathNone):ifisinstance(image,str):imagecv2.imread(image)boxesself.predict(image)visself.visualize(image,boxes,save_path)returnboxes,visif__name____main__:detectorDocLayoutV3Detector(./PP-DocLayoutV3.onnx)resultdetector.predict_json(./test_cases/complex_latex.png)print(json.dumps(result,indent2,ensure_asciiFalse))boxes,visdetector.predict_and_visualize(image./test_cases/complex_latex.png,save_path./vis.jpg)win7 兼容win7支持的onnxruntime版本最高到1.5其ONNX IR version 只支持到8不支持上述模型。故将上面的原始onnx模型进行转换使其兼容老的IR版本# pip install onnximportonnx# 加载模型modelonnx.load(./PP-DocLayoutV3.onnx)# 查看当前IR版本print(Original IR version:,model.ir_version)# 设置为IR 8model.ir_version8# 保存新模型onnx.save(model,PP-DocLayoutV3.onnx)转换后的模型在win7 SP1 python 3.7 onnxruntime 1.14.1环境下测试成功。

相关新闻

软件测试模型梳理总结

双证加持，能力翻倍｜PeopleCert SRE + DevOps双认证，开启数字化运维新征程！

测试文章测测测测试测试的测试从申城

构建会“思考”的API智能体：从任务分解到透明执行的工程实践

如何快速掌握PrusaSlicer：3D打印切片软件的终极完整指南

applera1n终极指南：iOS 15-16激活锁绕过的免费完整方案

3步解锁VideoCrafter：如何让AI帮你创作专业级视频？

Cursor Free VIP：轻松解决Cursor AI试用限制的专业工具

告别命令行恐惧：3分钟学会用图形界面将PowerShell脚本编译成EXE

大模型是“大脑“ Agent是“四肢“：AI智能体如何让AI从“空想家“变“实干家“？

AzurLaneAutoScript：碧蓝航线智能自动化脚本，彻底解放你的游戏时间

这次终于选对了！降AIGC工具测评：2026 最新好用推荐与对比分析

为什么你的AI Agent总在跨境清关环节“失语”？揭秘NLP+规则引擎混合推理的5个关键断点

【AI Agent行业落地黄金法则】：20年架构师亲授7大避坑指南与3个已验证千万级ROI场景

镜像视界浙江科技有限公司｜数字孪生・视频孪生・无感定位・跨镜追踪 技术地位与核心优势

从stress到stress-ng：一文搞懂Linux压力测试工具怎么选？实战对比CPU/内存/磁盘压测效果

从TTL到eDP：嵌入式工程师选屏接口的实战避坑指南（附信号实测对比）

实测 Taotoken 多模型路由的响应延迟与稳定性体感

镜像视界浙江科技有限公司｜数字孪生・视频孪生・无感定位・跨镜追踪技术地位与核心优势