更新时间:2022-07-26 GMT+08:00
pdf转图片方法
# -*- coding: utf-8 -*- import os import base64 import fitz import io from PIL import Image from glob import glob class CovertPdfToJpg: def __init__(self, file_path, save_root): self.file_path = file_path self.save_root = save_root @staticmethod def open_pdf(file): return fitz.open(file) @staticmethod def get_trans(doc, page, min_side=0, max_side=0, rotate=0.0): """ 创建scale对象 """ region = doc[page].rect scale = 1 if max_side > min_side > 0: scale = min_side / min(region.width, region.height) if max(region.width, region.height) * scale > max_side: scale = max_side / max(region.width, region.height) trans = fitz.Matrix(scale, scale).preRotate(rotate) return trans def page2pix(self, doc, page, trans): """ 根据给定的参数,解析当前页作为图片数据 """ # 获取指定页面的pdf格式,注意:页面参数是事先解析出来的,不然会存在问题。 return doc[page].getPixmap(matrix=trans, alpha=False) def pdf_to_jpg(self, width=1024, height=1400): doc = self.open_pdf(self.file_path) save_dir = os.path.join(self.save_root) if not os.path.exists(save_dir): os.makedirs(save_dir) print("document", len(doc), doc.pageCount) for i in range(len(doc)): trans = self.get_trans(doc, i, width, height, rotate=0) try: pdf = self.page2pix(doc, i, trans) except: continue image = pdf.getPNGData() image = Image.open(io.BytesIO(image)) print(os.path.join( save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg')) image.save( os.path.join(save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg')) return
父主题: API使用类