|
|
@@ -3,6 +3,7 @@ package com.fs.common.utils.txocr;
|
|
|
import cn.hutool.json.JSONObject;
|
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
+import com.fs.common.enums.LicenseTypeEnum;
|
|
|
import com.google.gson.Gson;
|
|
|
import com.google.gson.JsonObject;
|
|
|
import com.tencentcloudapi.common.AbstractModel;
|
|
|
@@ -95,31 +96,6 @@ public class TxOcrClient {
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- /**
|
|
|
- * 文档抽取(基础版)
|
|
|
- * @param imageUrl
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String ExtractDocBasic(String imageUrl){
|
|
|
- try {
|
|
|
- Credential cred = new Credential("AKIDviPyMZbRp24udCcpqjQxHOK4cx88ze6N", "97tVwEJE81sY0StDPPGukQ2ZvkU3QceY");
|
|
|
- HttpProfile httpProfile = new HttpProfile();
|
|
|
- httpProfile.setEndpoint("ocr.tencentcloudapi.com");
|
|
|
- ClientProfile clientProfile = new ClientProfile();
|
|
|
- clientProfile.setHttpProfile(httpProfile);
|
|
|
- CommonClient client = new CommonClient("ocr", "2018-11-19", cred, "", clientProfile);
|
|
|
- String params = "{\"ImageUrl\":\"" + imageUrl + "\"}";
|
|
|
- String resp = client.call("ExtractDocBasic", params);
|
|
|
- log.info("文档抽取成功!结果:{}", resp);
|
|
|
- String ocrStr = extractBusinessScope(resp);
|
|
|
- return ocrStr;
|
|
|
- } catch (TencentCloudSDKException e) {
|
|
|
- log.info("文档抽取失败:{}", e.getMessage());
|
|
|
- }
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
/**
|
|
|
* BizLicenseOCR
|
|
|
* 本接口支持快速精准识别营业执照上的字段,包括统一社会信用代码、公司名称、主体类型、法定代表人、注册资本、组成形式、成立日期、营业期限和经营范围等字段。
|
|
|
@@ -258,48 +234,6 @@ public class TxOcrClient {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * 从OCR结果中提取经营范围字符串
|
|
|
- * @param jsonResponse OCR返回的JSON字符串
|
|
|
- * @return 经营范围内容
|
|
|
- */
|
|
|
- public static String extractBusinessScope(String jsonResponse) {
|
|
|
- try {
|
|
|
- ObjectMapper mapper = new ObjectMapper();
|
|
|
- JsonNode rootNode = mapper.readTree(jsonResponse);
|
|
|
- JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
-
|
|
|
- // 遍历所有结构化文本块
|
|
|
- for (JsonNode block : structuralList) {
|
|
|
- JsonNode groups = block.path("Groups");
|
|
|
- for (JsonNode group : groups) {
|
|
|
- JsonNode lines = group.path("Lines");
|
|
|
- for (JsonNode line : lines) {
|
|
|
- JsonNode key = line.path("Key");
|
|
|
- String keyName = key.path("AutoName").asText();
|
|
|
-
|
|
|
- // 查找"经营范围"相关的键
|
|
|
- if ("经营范围".equals(keyName)) {
|
|
|
- JsonNode value = line.path("Value");
|
|
|
- return value.path("AutoContent").asText();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- public static void main(String[] args) {
|
|
|
-// ExtractDocBasic("https://bjyjb-1362704775.cos.ap-chongqing.myqcloud.com/fs/20251118/d175ed9771324c56a126779ef89bb1e3.jpg");
|
|
|
- IdCardVerification("郭顺", "111");
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
/**
|
|
|
* ClassifyDetectOCR
|
|
|
* 支持身份证、护照、名片、银行卡、行驶证、驾驶证、港澳台通行证、户口本、港澳台来往内地通行证、港澳台居住证、不动产证、营业执照的智能分类。
|
|
|
@@ -332,4 +266,267 @@ public class TxOcrClient {
|
|
|
// }
|
|
|
|
|
|
|
|
|
+ /**
|
|
|
+ * 核心方法:提取OCR结果并封装为ContainsResult
|
|
|
+ * @param imageUrl 图片URL
|
|
|
+ * @param keywords 校验关键词(可传null)
|
|
|
+ * @return 结构化的ContainsResult
|
|
|
+ */
|
|
|
+ public static ContainsResult extractAndWrapResult(String imageUrl, String keywords) {
|
|
|
+ ContainsResult result = new ContainsResult();
|
|
|
+ // 初始化基础字段
|
|
|
+ result.setImageUrl(imageUrl);
|
|
|
+ result.setKeywords(keywords);
|
|
|
+ result.setFlag(false); // 默认未匹配关键词
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 1. 调用OCR接口获取原始JSON
|
|
|
+ String ocrJson = callTencentOcrApi(imageUrl);
|
|
|
+ if (ocrJson == null) {
|
|
|
+ result.setMessage("OCR接口调用失败,返回空结果");
|
|
|
+ log.warn(result.getMessage());
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 识别证件类型
|
|
|
+ LicenseTypeEnum certType = getCertificateType(ocrJson);
|
|
|
+ log.info("识别到证件类型:{}(编码:{})", certType.getCode(), certType.getCode());
|
|
|
+ result.setMessage("识别到证件类型:" + certType.getCode());
|
|
|
+
|
|
|
+ // 3. 按证件类型提取字段并封装到ContainsResult
|
|
|
+ switch (certType) {
|
|
|
+ case BUSINESS_LICENSE:
|
|
|
+ wrapBusinessLicenseResult(result, ocrJson);
|
|
|
+ break;
|
|
|
+ case DRUG_LICENSE:
|
|
|
+ wrapDrugLicenseResult(result, ocrJson);
|
|
|
+ break;
|
|
|
+ case FOOD_LICENSE:
|
|
|
+ wrapFoodLicenseResult(result, ocrJson);
|
|
|
+ break;
|
|
|
+ case MEDICAL_DEVICE_2:
|
|
|
+ case MEDICAL_DEVICE_3:
|
|
|
+ wrapMedicalDeviceResult(result, ocrJson, certType);
|
|
|
+ break;
|
|
|
+ case UNKNOWN:
|
|
|
+ result.setMessage("无法识别证件类型");
|
|
|
+ log.warn(result.getMessage());
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 4. 校验关键词(如果传入关键词)
|
|
|
+ if (keywords != null && result.getBusinessStr() != null) {
|
|
|
+ boolean contains = result.getBusinessStr().contains(keywords);
|
|
|
+ result.setFlag(contains);
|
|
|
+ result.setMessage(contains ? "经营范围包含关键词:" + keywords : "经营范围未包含关键词:" + keywords);
|
|
|
+ }else if(keywords == null && result.getBusinessStr() != null){
|
|
|
+ result.setFlag(true);
|
|
|
+ result.setMessage("未上传关键字匹配,用不到时候无视该提示");
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ result.setMessage("OCR解析失败:" + e.getMessage());
|
|
|
+ log.error("OCR解析失败", e);
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ // ====================== 核心私有方法 ======================
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 调用腾讯云OCR接口
|
|
|
+ */
|
|
|
+ private static String callTencentOcrApi(String imageUrl) throws TencentCloudSDKException {
|
|
|
+ Credential cred = new Credential("AKIDviPyMZbRp24udCcpqjQxHOK4cx88ze6N", "97tVwEJE81sY0StDPPGukQ2ZvkU3QceY");
|
|
|
+ HttpProfile httpProfile = new HttpProfile();
|
|
|
+ httpProfile.setEndpoint("ocr.tencentcloudapi.com");
|
|
|
+ ClientProfile clientProfile = new ClientProfile();
|
|
|
+ clientProfile.setHttpProfile(httpProfile);
|
|
|
+ CommonClient client = new CommonClient("ocr", "2018-11-19", cred, "", clientProfile);
|
|
|
+
|
|
|
+ String params = "{\"ImageUrl\":\"" + imageUrl + "\"}";
|
|
|
+ String resp = client.call("ExtractDocBasic", params);
|
|
|
+ log.info("OCR原始响应:{}", resp);
|
|
|
+ return resp;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 识别证件类型
|
|
|
+ */
|
|
|
+ private static LicenseTypeEnum getCertificateType(String ocrJson) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(ocrJson);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+
|
|
|
+ // 提取标题字段
|
|
|
+ for (JsonNode block : structuralList) {
|
|
|
+ JsonNode groups = block.path("Groups");
|
|
|
+ for (JsonNode group : groups) {
|
|
|
+ JsonNode lines = group.path("Lines");
|
|
|
+ for (JsonNode line : lines) {
|
|
|
+ String fieldName = line.path("Key").path("AutoName").asText().trim();
|
|
|
+ if ("标题".equals(fieldName)) {
|
|
|
+ String title = line.path("Value").path("AutoContent").asText().trim();
|
|
|
+ return LicenseTypeEnum.matchByDescription(title);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("提取证件标题失败", e);
|
|
|
+ }
|
|
|
+ return LicenseTypeEnum.UNKNOWN;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 通用字段提取:根据字段名获取值
|
|
|
+ */
|
|
|
+ private static String getFieldValue(JsonNode structuralList, String fieldName) {
|
|
|
+ if (structuralList == null || fieldName == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ for (JsonNode block : structuralList) {
|
|
|
+ JsonNode groups = block.path("Groups");
|
|
|
+ for (JsonNode group : groups) {
|
|
|
+ JsonNode lines = group.path("Lines");
|
|
|
+ for (JsonNode line : lines) {
|
|
|
+ String name = line.path("Key").path("AutoName").asText().trim();
|
|
|
+ if (fieldName.equals(name)) {
|
|
|
+ return line.path("Value").path("AutoContent").asText().trim();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ // ====================== 各证件类型封装ContainsResult ======================
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 封装营业执照结果到ContainsResult
|
|
|
+ */
|
|
|
+ private static void wrapBusinessLicenseResult(ContainsResult result, String ocrJson) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(ocrJson);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+
|
|
|
+ // 映射ContainsResult字段
|
|
|
+ result.setBusinessStr(getFieldValue(structuralList, "经营范围")); // 营业执照经营范围
|
|
|
+ result.setRegNum(getFieldValue(structuralList, "统一社会信用代码")); // 统一社会信用代码
|
|
|
+ result.setPerson(getFieldValue(structuralList, "法定代表人")); // 法人
|
|
|
+ result.setAddress(getFieldValue(structuralList, "住所")); // 地址
|
|
|
+ result.setPeriod(getFieldValue(structuralList, "有效期至")); // 有效期(营业执照可能无此字段,返回null)
|
|
|
+ result.setSetDate(getFieldValue(structuralList, "成立日期")); // 注册日期
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("封装营业执照结果失败", e);
|
|
|
+ result.setMessage("封装营业执照结果失败:" + e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 封装药品经营许可证结果到ContainsResult
|
|
|
+ */
|
|
|
+ private static void wrapDrugLicenseResult(ContainsResult result, String ocrJson) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(ocrJson);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+
|
|
|
+ // 映射ContainsResult字段
|
|
|
+ result.setBusinessStr(getFieldValue(structuralList, "经营范围")); // 药品许可证经营范围
|
|
|
+ result.setRegNum(getFieldValue(structuralList, "统一社会信用代码")); // 统一社会信用代码
|
|
|
+ result.setPerson(getFieldValue(structuralList, "法定代表人(负责人)")); // 法人/负责人
|
|
|
+ result.setAddress(getFieldValue(structuralList, "经营地址")); // 经营地址
|
|
|
+ result.setPeriod(getFieldValue(structuralList, "有效期至")); // 有效期至
|
|
|
+ result.setSetDate(getFieldValue(structuralList, "日期")); // 签发日期
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("封装药品经营许可证结果失败", e);
|
|
|
+ result.setMessage("封装药品经营许可证结果失败:" + e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 封装食品经营许可证结果(可扩展)
|
|
|
+ */
|
|
|
+ private static void wrapFoodLicenseResult(ContainsResult result, String ocrJson) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(ocrJson);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+
|
|
|
+ result.setBusinessStr(getFieldValue(structuralList, "经营范围"));
|
|
|
+ result.setRegNum(getFieldValue(structuralList, "统一社会信用代码"));
|
|
|
+ result.setPeriod(getFieldValue(structuralList, "有效期至"));
|
|
|
+ result.setAddress(getFieldValue(structuralList, "经营地址"));
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("封装食品经营许可证结果失败", e);
|
|
|
+ result.setMessage("封装食品经营许可证结果失败:" + e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 封装医疗器械备案证书结果(可扩展)
|
|
|
+ */
|
|
|
+ private static void wrapMedicalDeviceResult(ContainsResult result, String ocrJson, LicenseTypeEnum certType) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(ocrJson);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+
|
|
|
+ result.setBusinessStr(getFieldValue(structuralList, "经营范围"));
|
|
|
+ result.setPeriod(getFieldValue(structuralList, "有效期至"));
|
|
|
+ result.setAddress(getFieldValue(structuralList, "经营地址"));
|
|
|
+ result.setMessage("识别到" + certType.getCode());
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("封装{}结果失败", certType.getCode(), e);
|
|
|
+ result.setMessage("封装" + certType.getCode() + "结果失败:" + e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // ====================== 兼容原有方法 ======================
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 原有方法:仅提取经营范围
|
|
|
+ */
|
|
|
+ public static String ExtractDocBasic(String imageUrl) {
|
|
|
+ ContainsResult result = extractAndWrapResult(imageUrl, null);
|
|
|
+ return result.getBusinessStr();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 原有方法:从JSON提取经营范围
|
|
|
+ */
|
|
|
+ public static String extractBusinessScope(String jsonResponse) {
|
|
|
+ try {
|
|
|
+ ObjectMapper mapper = new ObjectMapper();
|
|
|
+ JsonNode rootNode = mapper.readTree(jsonResponse);
|
|
|
+ JsonNode structuralList = rootNode.path("Response").path("StructuralList");
|
|
|
+ return getFieldValue(structuralList, "经营范围");
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // ====================== 测试示例 ======================
|
|
|
+ public static void main(String[] args) {
|
|
|
+ // 测试1:药品经营许可证提取 + 关键词校验
|
|
|
+ String drugUrl = "你的药品许可证URL";
|
|
|
+ ContainsResult drugResult = TxOcrClient.extractAndWrapResult(drugUrl, "处方药");
|
|
|
+ log.info("药品许可证提取结果:\n{}", drugResult.toString());
|
|
|
+
|
|
|
+ // 测试2:营业执照提取
|
|
|
+ String businessUrl = "你的营业执照URL";
|
|
|
+ ContainsResult businessResult = TxOcrClient.extractAndWrapResult(businessUrl, null);
|
|
|
+ log.info("\n营业执照提取结果:\n{}", businessResult.toString());
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
}
|