Extract Images from PDF File using Python

Use this page when you need to reuse embedded graphics, archive image assets, or process image content outside the PDF.

  1. Load the source PDF with ap.Document(infile).
  2. Select the target page and image resource index.
  3. Save the image object to an output stream.
import aspose.pdf as ap
from io import FileIO


def extract_image(infile, outfile):
    document = ap.Document(infile)
    x_image = document.pages[1].resources.images[1]
    with FileIO(outfile, "wb") as output_image:
        x_image.save(output_image)

Extract Images from Specific Region in PDF

This example extracts images located within a specified rectangular region on a PDF page and saves them as separate files.

  1. Load the source PDF.
  2. Create ImagePlacementAbsorber and accept it on the target page.
  3. Define the target rectangle.
  4. Iterate through image placements and check whether each image bounds fit in the region.
  5. Save matched images to output files.
import aspose.pdf as ap
from io import FileIO


def extract_image_from_specific_region(infile, outfile):
    document = ap.Document(infile)
    rectangle = ap.Rectangle(0, 0, 590, 590, True)
    absorber = ap.ImagePlacementAbsorber()
    document.pages[1].accept(absorber)

    index = 1
    for image_placement in absorber.image_placements:
        point1 = ap.Point(image_placement.rectangle.llx, image_placement.rectangle.lly)
        point2 = ap.Point(image_placement.rectangle.urx, image_placement.rectangle.ury)

        if rectangle.contains(point1, True) and rectangle.contains(point2, True):
            with FileIO(outfile.replace("index", str(index)), "wb") as output_image:
                image_placement.image.save(output_image)
            index += 1