Extract Images from PDF using Python
Contents
[
Hide
]
This code snippets extracts embedded images from a PDF document for separate analysis, editing, or reuse in other documents:
- Load the PDF Document
- Access the Image Resource
- Save the Image to a File
import aspose.pdf as apdf
from io import FileIO
from os import path
import json
from aspose.pycore import cast, is_assignable
path_infile = path.join(self.dataDir, infile)
path_outfile = path.join(self.dataDir, outfile)
document = apdf.Document(path_infile)
xImage = document.pages[1].resources.images[1]
with FileIO(path_outfile, "w") as output_image:
xImage.save(output_image)