Explorar el Código

ocr识别代码扩展

Guos hace 1 día
padre
commit
62b71b840f
Se han modificado 1 ficheros con 264 adiciones y 67 borrados
  1. 264 67
      fs-common/src/main/java/com/fs/common/utils/txocr/TxOcrClient.java

+ 264 - 67
fs-common/src/main/java/com/fs/common/utils/txocr/TxOcrClient.java

@@ -3,6 +3,7 @@ package com.fs.common.utils.txocr;
 import cn.hutool.json.JSONObject;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fs.common.enums.LicenseTypeEnum;
 import com.google.gson.Gson;
 import com.google.gson.JsonObject;
 import com.tencentcloudapi.common.AbstractModel;
@@ -95,31 +96,6 @@ public class TxOcrClient  {
         return null;
     }
 
-
-    /**
-     * 文档抽取(基础版)
-     * @param imageUrl
-     * @return
-     */
-    public static String ExtractDocBasic(String imageUrl){
-        try {
-            Credential cred = new Credential("AKIDviPyMZbRp24udCcpqjQxHOK4cx88ze6N", "97tVwEJE81sY0StDPPGukQ2ZvkU3QceY");
-            HttpProfile httpProfile = new HttpProfile();
-            httpProfile.setEndpoint("ocr.tencentcloudapi.com");
-            ClientProfile clientProfile = new ClientProfile();
-            clientProfile.setHttpProfile(httpProfile);
-            CommonClient client = new CommonClient("ocr", "2018-11-19", cred, "", clientProfile);
-            String params = "{\"ImageUrl\":\"" + imageUrl + "\"}";
-            String resp = client.call("ExtractDocBasic", params);
-            log.info("文档抽取成功!结果:{}", resp);
-            String ocrStr = extractBusinessScope(resp);
-            return ocrStr;
-        } catch (TencentCloudSDKException e) {
-            log.info("文档抽取失败:{}", e.getMessage());
-        }
-        return null;
-    }
-
     /**
      * BizLicenseOCR
      * 本接口支持快速精准识别营业执照上的字段,包括统一社会信用代码、公司名称、主体类型、法定代表人、注册资本、组成形式、成立日期、营业期限和经营范围等字段。
@@ -258,48 +234,6 @@ public class TxOcrClient  {
         }
     }
 
-    /**
-     * 从OCR结果中提取经营范围字符串
-     * @param jsonResponse OCR返回的JSON字符串
-     * @return 经营范围内容
-     */
-    public static String extractBusinessScope(String jsonResponse) {
-        try {
-            ObjectMapper mapper = new ObjectMapper();
-            JsonNode rootNode = mapper.readTree(jsonResponse);
-            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
-
-            // 遍历所有结构化文本块
-            for (JsonNode block : structuralList) {
-                JsonNode groups = block.path("Groups");
-                for (JsonNode group : groups) {
-                    JsonNode lines = group.path("Lines");
-                    for (JsonNode line : lines) {
-                        JsonNode key = line.path("Key");
-                        String keyName = key.path("AutoName").asText();
-
-                        // 查找"经营范围"相关的键
-                        if ("经营范围".equals(keyName)) {
-                            JsonNode value = line.path("Value");
-                            return value.path("AutoContent").asText();
-                        }
-                    }
-                }
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    public static void main(String[] args) {
-//        ExtractDocBasic("https://bjyjb-1362704775.cos.ap-chongqing.myqcloud.com/fs/20251118/d175ed9771324c56a126779ef89bb1e3.jpg");
-        IdCardVerification("郭顺", "111");
-    }
-
-
-
-
     /**
      * ClassifyDetectOCR
      * 支持身份证、护照、名片、银行卡、行驶证、驾驶证、港澳台通行证、户口本、港澳台来往内地通行证、港澳台居住证、不动产证、营业执照的智能分类。
@@ -332,4 +266,267 @@ public class TxOcrClient  {
 //    }
 
 
+    /**
+     * 核心方法:提取OCR结果并封装为ContainsResult
+     * @param imageUrl 图片URL
+     * @param keywords 校验关键词(可传null)
+     * @return 结构化的ContainsResult
+     */
+    public static ContainsResult extractAndWrapResult(String imageUrl, String keywords) {
+        ContainsResult result = new ContainsResult();
+        // 初始化基础字段
+        result.setImageUrl(imageUrl);
+        result.setKeywords(keywords);
+        result.setFlag(false); // 默认未匹配关键词
+
+        try {
+            // 1. 调用OCR接口获取原始JSON
+            String ocrJson = callTencentOcrApi(imageUrl);
+            if (ocrJson == null) {
+                result.setMessage("OCR接口调用失败,返回空结果");
+                log.warn(result.getMessage());
+                return result;
+            }
+
+            // 2. 识别证件类型
+            LicenseTypeEnum certType = getCertificateType(ocrJson);
+            log.info("识别到证件类型:{}(编码:{})", certType.getCode(), certType.getCode());
+            result.setMessage("识别到证件类型:" + certType.getCode());
+
+            // 3. 按证件类型提取字段并封装到ContainsResult
+            switch (certType) {
+                case BUSINESS_LICENSE:
+                    wrapBusinessLicenseResult(result, ocrJson);
+                    break;
+                case DRUG_LICENSE:
+                    wrapDrugLicenseResult(result, ocrJson);
+                    break;
+                case FOOD_LICENSE:
+                    wrapFoodLicenseResult(result, ocrJson);
+                    break;
+                case MEDICAL_DEVICE_2:
+                case MEDICAL_DEVICE_3:
+                    wrapMedicalDeviceResult(result, ocrJson, certType);
+                    break;
+                case UNKNOWN:
+                    result.setMessage("无法识别证件类型");
+                    log.warn(result.getMessage());
+                    break;
+            }
+
+            // 4. 校验关键词(如果传入关键词)
+            if (keywords != null && result.getBusinessStr() != null) {
+                boolean contains = result.getBusinessStr().contains(keywords);
+                result.setFlag(contains);
+                result.setMessage(contains ? "经营范围包含关键词:" + keywords : "经营范围未包含关键词:" + keywords);
+            }else if(keywords == null && result.getBusinessStr() != null){
+                result.setFlag(true);
+                result.setMessage("未上传关键字匹配,用不到时候无视该提示");
+            }
+        } catch (Exception e) {
+            result.setMessage("OCR解析失败:" + e.getMessage());
+            log.error("OCR解析失败", e);
+        }
+        return result;
+    }
+
+    // ====================== 核心私有方法 ======================
+
+    /**
+     * 调用腾讯云OCR接口
+     */
+    private static String callTencentOcrApi(String imageUrl) throws TencentCloudSDKException {
+        Credential cred = new Credential("AKIDviPyMZbRp24udCcpqjQxHOK4cx88ze6N", "97tVwEJE81sY0StDPPGukQ2ZvkU3QceY");
+        HttpProfile httpProfile = new HttpProfile();
+        httpProfile.setEndpoint("ocr.tencentcloudapi.com");
+        ClientProfile clientProfile = new ClientProfile();
+        clientProfile.setHttpProfile(httpProfile);
+        CommonClient client = new CommonClient("ocr", "2018-11-19", cred, "", clientProfile);
+
+        String params = "{\"ImageUrl\":\"" + imageUrl + "\"}";
+        String resp = client.call("ExtractDocBasic", params);
+        log.info("OCR原始响应:{}", resp);
+        return resp;
+    }
+
+    /**
+     * 识别证件类型
+     */
+    private static LicenseTypeEnum getCertificateType(String ocrJson) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(ocrJson);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+
+            // 提取标题字段
+            for (JsonNode block : structuralList) {
+                JsonNode groups = block.path("Groups");
+                for (JsonNode group : groups) {
+                    JsonNode lines = group.path("Lines");
+                    for (JsonNode line : lines) {
+                        String fieldName = line.path("Key").path("AutoName").asText().trim();
+                        if ("标题".equals(fieldName)) {
+                            String title = line.path("Value").path("AutoContent").asText().trim();
+                            return LicenseTypeEnum.matchByDescription(title);
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error("提取证件标题失败", e);
+        }
+        return LicenseTypeEnum.UNKNOWN;
+    }
+
+    /**
+     * 通用字段提取:根据字段名获取值
+     */
+    private static String getFieldValue(JsonNode structuralList, String fieldName) {
+        if (structuralList == null || fieldName == null) {
+            return null;
+        }
+        for (JsonNode block : structuralList) {
+            JsonNode groups = block.path("Groups");
+            for (JsonNode group : groups) {
+                JsonNode lines = group.path("Lines");
+                for (JsonNode line : lines) {
+                    String name = line.path("Key").path("AutoName").asText().trim();
+                    if (fieldName.equals(name)) {
+                        return line.path("Value").path("AutoContent").asText().trim();
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    // ====================== 各证件类型封装ContainsResult ======================
+
+    /**
+     * 封装营业执照结果到ContainsResult
+     */
+    private static void wrapBusinessLicenseResult(ContainsResult result, String ocrJson) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(ocrJson);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+
+            // 映射ContainsResult字段
+            result.setBusinessStr(getFieldValue(structuralList, "经营范围")); // 营业执照经营范围
+            result.setRegNum(getFieldValue(structuralList, "统一社会信用代码")); // 统一社会信用代码
+            result.setPerson(getFieldValue(structuralList, "法定代表人")); // 法人
+            result.setAddress(getFieldValue(structuralList, "住所")); // 地址
+            result.setPeriod(getFieldValue(structuralList, "有效期至")); // 有效期(营业执照可能无此字段,返回null)
+            result.setSetDate(getFieldValue(structuralList, "成立日期")); // 注册日期
+
+        } catch (Exception e) {
+            log.error("封装营业执照结果失败", e);
+            result.setMessage("封装营业执照结果失败:" + e.getMessage());
+        }
+    }
+
+    /**
+     * 封装药品经营许可证结果到ContainsResult
+     */
+    private static void wrapDrugLicenseResult(ContainsResult result, String ocrJson) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(ocrJson);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+
+            // 映射ContainsResult字段
+            result.setBusinessStr(getFieldValue(structuralList, "经营范围")); // 药品许可证经营范围
+            result.setRegNum(getFieldValue(structuralList, "统一社会信用代码")); // 统一社会信用代码
+            result.setPerson(getFieldValue(structuralList, "法定代表人(负责人)")); // 法人/负责人
+            result.setAddress(getFieldValue(structuralList, "经营地址")); // 经营地址
+            result.setPeriod(getFieldValue(structuralList, "有效期至")); // 有效期至
+            result.setSetDate(getFieldValue(structuralList, "日期")); // 签发日期
+
+        } catch (Exception e) {
+            log.error("封装药品经营许可证结果失败", e);
+            result.setMessage("封装药品经营许可证结果失败:" + e.getMessage());
+        }
+    }
+
+    /**
+     * 封装食品经营许可证结果(可扩展)
+     */
+    private static void wrapFoodLicenseResult(ContainsResult result, String ocrJson) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(ocrJson);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+
+            result.setBusinessStr(getFieldValue(structuralList, "经营范围"));
+            result.setRegNum(getFieldValue(structuralList, "统一社会信用代码"));
+            result.setPeriod(getFieldValue(structuralList, "有效期至"));
+            result.setAddress(getFieldValue(structuralList, "经营地址"));
+
+        } catch (Exception e) {
+            log.error("封装食品经营许可证结果失败", e);
+            result.setMessage("封装食品经营许可证结果失败:" + e.getMessage());
+        }
+    }
+
+    /**
+     * 封装医疗器械备案证书结果(可扩展)
+     */
+    private static void wrapMedicalDeviceResult(ContainsResult result, String ocrJson, LicenseTypeEnum certType) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(ocrJson);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+
+            result.setBusinessStr(getFieldValue(structuralList, "经营范围"));
+            result.setPeriod(getFieldValue(structuralList, "有效期至"));
+            result.setAddress(getFieldValue(structuralList, "经营地址"));
+            result.setMessage("识别到" + certType.getCode());
+
+        } catch (Exception e) {
+            log.error("封装{}结果失败", certType.getCode(), e);
+            result.setMessage("封装" + certType.getCode() + "结果失败:" + e.getMessage());
+        }
+    }
+
+    // ====================== 兼容原有方法 ======================
+
+    /**
+     * 原有方法:仅提取经营范围
+     */
+    public static String ExtractDocBasic(String imageUrl) {
+        ContainsResult result = extractAndWrapResult(imageUrl, null);
+        return result.getBusinessStr();
+    }
+
+    /**
+     * 原有方法:从JSON提取经营范围
+     */
+    public static String extractBusinessScope(String jsonResponse) {
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode rootNode = mapper.readTree(jsonResponse);
+            JsonNode structuralList = rootNode.path("Response").path("StructuralList");
+            return getFieldValue(structuralList, "经营范围");
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    // ====================== 测试示例 ======================
+    public static void main(String[] args) {
+        // 测试1:药品经营许可证提取 + 关键词校验
+        String drugUrl = "你的药品许可证URL";
+        ContainsResult drugResult = TxOcrClient.extractAndWrapResult(drugUrl, "处方药");
+        log.info("药品许可证提取结果:\n{}", drugResult.toString());
+
+        // 测试2:营业执照提取
+        String businessUrl = "你的营业执照URL";
+        ContainsResult businessResult = TxOcrClient.extractAndWrapResult(businessUrl, null);
+        log.info("\n营业执照提取结果:\n{}", businessResult.toString());
+    }
+
+
+
+
 }