Updated on 2023-10-26 GMT+08:00
How Do I Convert a PDF to an Image?
# -*- coding: utf-8 -*- import os import base64 import fitz import io from PIL import Image from glob import glob class CovertPdfToJpg: def __init__(self, file_path, save_root): self.file_path = file_path self.save_root = save_root @staticmethod def open_pdf(file): return fitz.open(file) @staticmethod def get_trans(doc, page, min_side=0, max_side=0, rotate=0.0): """ Create a scale object. """ region = doc[page].rect scale = 1 if max_side > min_side > 0: scale = min_side / min(region.width, region.height) if max(region.width, region.height) * scale > max_side: scale = max_side / max(region.width, region.height) trans = fitz.Matrix(scale, scale).preRotate(rotate) return trans def page2pix(self, doc, page, trans): """ Parse the current page as image data based on given parameters.""" # Obtain the PDF format of a specified page. Note that page parameters need to be pre-parsed to avoid any issues. return doc[page].getPixmap(matrix=trans, alpha=False) def pdf_to_jpg(self, width=1024, height=1400): doc = self.open_pdf(self.file_path) save_dir = os.path.join(self.save_root) if not os.path.exists(save_dir): os.makedirs(save_dir) print("document", len(doc), doc.pageCount) for i in range(len(doc)): trans = self.get_trans(doc, i, width, height, rotate=0) try: pdf = self.page2pix(doc, i, trans) except: continue image = pdf.getPNGData() image = Image.open(io.BytesIO(image)) print(os.path.join( save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg')) image.save( os.path.join(save_dir, os.path.basename(self.file_path).replace('.pdf', '') + '_' + str(i + 1) + '.jpg')) return
Parent topic: API
API FAQs
- How Do I View the API Usage?
- Why Is the Actual Number of API Calls Inconsistent with the Record Displayed on the Management Console?
- Does OCR Support Batch Processing?
- Can the OCR Results Be Converted into Word, TXT, or PDF Files?
- How Do I Obtain the Base64 Code of an Image?
- What Is the Number of Concurrent OCR API Calls?
- How Do I Use OCR APIs?
- Can the Region Where OCR Is Called Be Different from the Region Where OBS Resources Are Located?
- Can the Input Data of OCR Be Stored Locally?
- How Do I Improve the Recognition Speed?
- How Do I Convert a PDF to an Image?
- Can OCR Read Images from Non-Huawei Cloud Storage?
Feedback
Was this page helpful?
Provide feedbackThank you very much for your feedback. We will continue working to improve the documentation.See the reply and handling status in My Cloud VOC.
The system is busy. Please try again later.
For any further questions, feel free to contact us through the chatbot.
Chatbotmore