Extract Vector Data from a PDF file using C#
Access to Vector Data from PDF document
Since the the 24.2 release, Aspose.PDF for .NET library allows vector data extraction from a PDF file. The next code snippet creates a new Document object using some input data, initializes a ‘GraphicsAbsorber’(the GraphicsAbsorber returns the vector data) to handle graphic elements, and then visits the second page of the document to extract and analyze these elements. It retrieves various properties of the second graphic element, such as its associated operators, rectangle, and position.
// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET
private static void ProcessGraphicsInPDF()
{
// The path to the documents directory
var dataDir = RunExamples.GetDataDir_AsposePdf_WorkingDocuments();
// Open PDF document
using (var document = new Aspose.Pdf.Document(dataDir + "input.pdf"))
{
// Instantiate a new GraphicsAbsorber object to process graphic elements
using (var grAbsorber = new Aspose.Pdf.Vector.GraphicsAbsorber())
{
// Visit the second page of the document to extract graphic elements
grAbsorber.Visit(document.Pages[1]);
// Retrieve the list of graphic elements from the GraphicsAbsorber
var elements = grAbsorber.Elements;
// Access the operators associated with the second graphic element
var operations = elements[1].Operators;
// Retrieve the rectangle associated with the second graphic element
var rectangle = elements[1].Rectangle;
// Get the position of the second graphic element
var position = elements[1].Position;
}
}
}
Extract Vector Data from PDF document
For extraction of Vector Data from PDF, we can use SVG extractor:
// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET
private static void SaveVectorGraphicsFromPage()
{
// The path to the documents directory
var dataDir = RunExamples.GetDataDir_AsposePdf_WorkingDocuments();
// Open PDF document
using (var document = new Aspose.Pdf.Document(dataDir + "VectorGraphics.pdf"))
{
// Save vector graphics from the first page to an SVG file
document.Pages[1].TrySaveVectorGraphics(dataDir + "VectorGraphics_out.svg");
}
}
Extract all subpaths to images separately
// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET
private static void ExtractAllSubpathsToImagesSeparately()
{
// The path to the documents directory
var dataDir = RunExamples.GetDataDir_AsposePdf_WorkingDocuments();
// Path to the directory where SVGs will be saved
var svgDirPath = dataDir + "SvgOutput/";
// Create extraction options
var options = new Aspose.Pdf.Vector.SvgExtractionOptions
{
ExtractEverySubPathToSvg = true
};
// Open PDF document
using (var document = new Aspose.Pdf.Document(dataDir + "VectorGraphics.pdf"))
{
// Get the first page of the document
var page = document.Pages[1];
// Create SVG extractor
var extractor = new Aspose.Pdf.Vector.SvgExtractor(options);
// Extract SVGs from the page
extractor.Extract(page, svgDirPath);
}
}
Extract list of elements to single image
// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET
private static void ExtractListOfElementsToSingleImage()
{
// The path to the documents directory
var dataDir = RunExamples.GetDataDir_AsposePdf_WorkingDocuments();
// Initialize the list of graphic elements
var elements = new List<Aspose.Pdf.Vector.GraphicElement>();
// Example: Fill elements list with needed graphic elements (implement your logic here)
// Open PDF document
using (var document = new Aspose.Pdf.Document(dataDir + "VectorGraphics.pdf"))
{
// Get the first page of the document
var page = document.Pages[1];
// Use SvgExtractor to extract SVGs
var svgExtractor = new Aspose.Pdf.Vector.SvgExtractor();
// Extract SVGs from graphic elements on the page
svgExtractor.Extract(elements, page, Path.Combine(dataDir, "SvgOutput", "VectorGraphics_out.svg"));
}
}
Extract single element
// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET
private static void ExtractSingleElement()
{
// The path to the documents directory
var dataDir = RunExamples.GetDataDir_AsposePdf_WorkingDocuments();
// Open PDF document
using (var document = new Aspose.Pdf.Document(dataDir + "VectorGraphics.pdf"))
{
// Create a GraphicsAbsorber object to extract graphic elements
var graphicsAbsorber = new Aspose.Pdf.Vector.GraphicsAbsorber();
// Get the first page of the document
var page = document.Pages[1];
// Process the page to extract graphic elements
graphicsAbsorber.Visit(page);
// Extract the graphic element (XFormPlacement) and save it as SVG
var xFormPlacement = graphicsAbsorber.Elements[1] as Aspose.Pdf.Vector.XFormPlacement;
xFormPlacement.Elements[2].SaveToSvg(Path.Combine(dataDir, "SvgOutput", "VectorGraphics_out.svg"));
}
}