Find whether PDF contains images or text

[ ]


A PDF file can contain both text and images. Sometimes, a user might need to find out whether a PDF file contains only text, or it contains only images. We can also find whether it contain both or none.

 public static void CheckIfPdfContainsTextOrImages()
    // Instantiate a memoryStream object to hold the extracted text from Document
    MemoryStream ms = new MemoryStream();
    // Instantiate PdfExtractor object
    PdfExtractor extractor = new PdfExtractor();

    // Bind the input PDF document to extractor
    extractor.BindPdf(_dataDir + "FilledForm.pdf");
    // Extract text from the input PDF document
    // Save the extracted text to a text file
    // Check if the MemoryStream length is greater than or equal to 1

    bool containsText = ms.Length >= 1;

    // Extract images from the input PDF document

    // Calling HasNextImage method in while loop. When images will finish, loop will exit
    bool containsImage = extractor.HasNextImage();

    // Now find out whether this PDF is text only or image only

    if (containsText && !containsImage)
        Console.WriteLine("PDF contains text only");
    else if (!containsText && containsImage)
        Console.WriteLine("PDF contains image only");
    else if (containsText && containsImage)
        Console.WriteLine("PDF contains both text and image");
    else if (!containsText && !containsImage)
        Console.WriteLine("PDF contains neither text or nor image");