盒子ocr检测

2025-10-16 17:18:10 +08:00
commit 1f880799a4
71 changed files with 10373 additions and 0 deletions
--- a/pp_onnx/predict_cls.py
+++ b/pp_onnx/predict_cls.py
@ -0,0 +1,86 @@
+import cv2
+import copy
+import numpy as np
+import math
+
+from pp_onnx.cls_postprocess import ClsPostProcess
+from pp_onnx.predict_base import PredictBase
+
+class TextClassifier(PredictBase):
+    def __init__(self, args):
+        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
+        self.cls_batch_num = args.cls_batch_num
+        self.cls_thresh = args.cls_thresh
+        self.postprocess_op = ClsPostProcess(label_list=args.label_list)
+
+        # 初始化模型
+        self.cls_onnx_session = self.get_onnx_session(args.cls_model_dir, args.use_gpu)
+        self.cls_input_name = self.get_input_name(self.cls_onnx_session)
+        self.cls_output_name = self.get_output_name(self.cls_onnx_session)
+
+    def resize_norm_img(self, img):
+        imgC, imgH, imgW = self.cls_image_shape
+        h = img.shape[0]
+        w = img.shape[1]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        if self.cls_image_shape[0] == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+
+    def __call__(self, img_list):
+        img_list = copy.deepcopy(img_list)
+        img_num = len(img_list)
+        # Calculate the aspect ratio of all text bars
+        width_list = []
+        for img in img_list:
+            width_list.append(img.shape[1] / float(img.shape[0]))
+        # Sorting can speed up the cls process
+        indices = np.argsort(np.array(width_list))
+
+        cls_res = [['', 0.0]] * img_num
+        batch_num = self.cls_batch_num
+
+        for beg_img_no in range(0, img_num, batch_num):
+
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            max_wh_ratio = 0
+
+            for ino in range(beg_img_no, end_img_no):
+                h, w = img_list[indices[ino]].shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[indices[ino]])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch)
+            norm_img_batch = norm_img_batch.copy()
+
+            input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
+            outputs = self.cls_onnx_session.run(self.cls_output_name, input_feed=input_feed)
+
+            prob_out = outputs[0]
+
+            cls_result = self.postprocess_op(prob_out)
+            for rno in range(len(cls_result)):
+                label, score = cls_result[rno]
+                cls_res[indices[beg_img_no + rno]] = [label, score]
+                if '180' in label and score > self.cls_thresh:
+                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                        img_list[indices[beg_img_no + rno]], 1)
+        return img_list, cls_res
+