更新时间:2024-11-07 GMT+08:00
分享

识别结果后处理

提取特定字段导入Excel

本示例调用身份证识别API,并从获取到的JSON结果中,提取所需的字段,填入至Excel。

  • 前提条件
    • 开通身份证识别
    • 参考本地调用,安装OCR Python SDK。并执行pip install xlsxwriter命令安装依赖包。
    • 登录访问密钥页面,获取AK、SK。可以新增访问密钥,或使用已有的访问密钥。访问密钥为credentials.csv文件,包含AK/SK信息。

  • 代码示例
    # -*- coding: utf-8 -*-
    import base64
    import xlsxwriter
    
    from huaweicloudsdkcore.auth.credentials import BasicCredentials
    from huaweicloudsdkocr.v1.region.ocr_region import OcrRegion
    from huaweicloudsdkcore.exceptions import exceptions
    from huaweicloudsdkocr.v1 import *
    from huaweicloudsdkcore.http.http_config import HttpConfig
    
    
    def recognize_id_card_request():
        try:
            request = RecognizeIdCardRequest()
            request.body = IdCardRequestBody(
                image=image_base64
            )
            response = client.recognize_id_card(request)
            return response
        except exceptions.ClientRequestException as e:
            print(e.status_code)
            print(e.request_id)
            print(e.error_code)
            print(e.error_msg)
    
    
    def get_credential():
        return BasicCredentials(ak, sk)
    
    
    def get_client():
        config = HttpConfig.get_default_config()
        config.ignore_ssl_verification = True
        return OcrClient.new_builder(OcrClient) \
            .with_credentials(credentials) \
            .with_region(OcrRegion.CN_NORTH_4) \
            .with_http_config(config) \
            .build()
    
    
    def image_to_base64(imagepath):
        """
        将本地图片转化为base64编码
        """
        with open(imagepath, "rb") as bin_data:
            image_data = bin_data.read()
        base64_data = base64.b64encode(image_data).decode("utf-8")
        return base64_data
    
    
    def response_to_execl(save_file, data):
        """
        :param save_file: 文件名
        :param data: result data
        """
    
        # 处理调用API返回的result数据
        keys_list = list(data["result"].keys())
        values_list = list(data["result"].values())
    
        options = {'in_memory': True}
        with xlsxwriter.Workbook(save_file, options) as workbook:
            worksheet = workbook.add_worksheet()
            worksheet.set_column('A1:A20', 23)
            worksheet.set_column('B1:B20', 100)
            worksheet.write_column('A1', keys_list)
            worksheet.write_column('B1', values_list)
        workbook.close()
    
    
    if __name__ == '__main__':
        # 填写访问密钥AK、SK
        ak = "填写AK"
        sk = "填写SK"
    
        # Init Auth Info
        credentials = get_credential()
    
        # Create OcrClient
        client = get_client()
    
        image_base64 = image_to_base64(r"图片的路径,例如D:\local\test.png")
    
        # request id card service
        response = recognize_id_card_request().to_dict()
    
        # 数据保存在execl上
        response_to_execl(r"excel路径,例如D:\local\test.xlsx", response)
    

使用PDF进行文字识别

本示例将PDF文件转换为图片,并调用网络图片识别API,获取识别结果。

  • 前提条件
    • 开通网络图片识别
    • 参考本地调用,安装OCR Python SDK。并执行pip install fitz命令和pip install PyMuPDF==1.18.0命令安装依赖包。
    • 登录访问密钥页面,获取AK、SK。可以新增访问密钥,或使用已有的访问密钥。访问密钥为credentials.csv文件,包含AK/SK信息。

  • 代码示例
    # -*- coding: utf-8 -*-
    import os
    import base64
    import fitz
    import io
    from PIL import Image
    from glob import glob
    
    
    from huaweicloudsdkcore.auth.credentials import BasicCredentials
    from huaweicloudsdkocr.v1.region.ocr_region import OcrRegion
    from huaweicloudsdkcore.exceptions import exceptions
    from huaweicloudsdkocr.v1 import *
    from huaweicloudsdkcore.http.http_config import HttpConfig
    
    
    class CovertPdfToJpg:
        def __init__(self, file_path, save_root):
            self.file_path = file_path
            self.save_root = save_root
    
        @staticmethod
        def open_pdf(file):
            return fitz.open(file)
    
        @staticmethod
        def get_trans(doc, page, min_side=0, max_side=0, rotate=0.0):
            """ 创建scale对象 """
            region = doc[page].rect
            scale = 1
            if max_side > min_side > 0:
                scale = min_side / min(region.width, region.height)
                if max(region.width, region.height) * scale > max_side:
                    scale = max_side / max(region.width, region.height)
            trans = fitz.Matrix(scale, scale).preRotate(rotate)
            return trans
    
        def page2pix(self, doc, page, trans):
            """ 根据给定的参数,解析当前页作为图片数据 """
            return doc[page].getPixmap(matrix=trans, alpha=False)
    
        def pdf_to_jpg(self, width=1024, height=1400):
            """ 将pdf格式图片,转化为jpg格式 """
            doc = self.open_pdf(self.file_path)
            save_dir = os.path.join(self.save_root)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            print("document", len(doc), doc.pageCount)
            for i in range(len(doc)):
                trans = self.get_trans(doc, i, width, height, rotate=0)
                try:
                    pdf = self.page2pix(doc, i, trans)
                except:
                    continue
                image = pdf.getPNGData()
                image = Image.open(io.BytesIO(image))
                print(os.path.join(
                    save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg'))
                image.save(
                    os.path.join(save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg'))
            return
    
    
    def recognize_general_text_request():
        try:
            request = RecognizeGeneralTextRequest()
            request.body = GeneralTextRequestBody(
                image=image_base64
            )
            response = client.recognize_general_text(request)
            print(response)
        except exceptions.ClientRequestException as e:
            print(e.status_code)
            print(e.request_id)
            print(e.error_code)
            print(e.error_msg)
    
    
    def get_credential():
        return BasicCredentials(ak, sk)
    
    
    def get_client():
        config = HttpConfig.get_default_config()
        config.ignore_ssl_verification = True
        return OcrClient.new_builder(OcrClient) \
            .with_credentials(credentials) \
            .with_region(OcrRegion.CN_NORTH_4) \
            .with_http_config(config) \
            .build()
    
    
    def url_to_base64(imagepath):
        with open(imagepath, "rb") as bin_data:
            image_data = bin_data.read()
        base64_data = base64.b64encode(image_data).decode("utf-8")
        return base64_data
    
    
    if __name__ == '__main__':
        # 填写访问密钥AK、SK
        ak = "填写AK"
        sk = "填写SK"
    
        # Init Auth Info
        credentials = get_credential()
    
        # Create OcrClient
        client = get_client()
    
        df_path = r"本地pdf文件路径,例如D:\local\test.pdf"
        save_path = r"转换后的图片路径,例如D:\local"
    
        covert_pdf_to_jpg = CovertPdfToJpg(df_path, save_path)
        covert_pdf_to_jpg.pdf_to_jpg()
    
        jpgs = glob(os.path.join(save_path, "*.jpg"))
        for jpg in jpgs:
            image_base64 = url_to_base64(jpg)
            recognize_general_text_request()

    如果运行过程出现“AttributeError: 'Document' object has no attribute 'pageCount'”报错,可以卸载原有的PyMuPDF依赖包,并安装1.18.0版本解决。

相关文档