Integrate PDF Tables with Data Sources in Python
Contents
[
Hide
]
Create PDF from DataFrame
The create_pdf_from_dataframe function builds a new PDF and inserts a table generated from a pandas DataFrame. This approach is useful for reporting workflows where your data already exists in tabular form.
The function performs the following steps:
- Create an empty PDF document with
ap.Document(). - Add a page to the document.
- Convert the DataFrame into an Aspose.PDF table by calling
create_table_from_dataframe(df, max_rows). - Add the table to the page with
page.paragraphs.add(table). - Save the PDF to the output path.
from os import path
import sys
import pandas as pd
import aspose.pdf as ap
from config import set_license, initialize_data_dir
def create_pdf_from_dataframe(
outfile: str, df: pd.DataFrame, max_rows: int = 20
) -> None:
# Create new PDF document
document = ap.Document()
page = document.pages.add()
table = create_table_from_dataframe(df, max_rows)
# Add table object to first page of input document
page.paragraphs.add(table)
document.save(outfile)
Create Table from DataFrame
The create_table_from_dataframe function converts a DataFrame into an Aspose.PDF Table object that you can add to any page.
It does the following:
- Create an empty
ap.Table()instance. - Set table and cell borders for consistent formatting.
- Add a header row using DataFrame column names.
- Add data rows from
df.head(max_rows). - Return the populated table object.
from os import path
import sys
import pandas as pd
import aspose.pdf as ap
from config import set_license, initialize_data_dir
def create_table_from_dataframe(df: pd.DataFrame, max_rows: int = 20) -> ap.Table:
"""Create an Aspose.PDF table from a pandas DataFrame."""
# Initializes a new instance of the Table
table = ap.Table()
# Set the table border color as LightGray
table.border = ap.BorderInfo(ap.BorderSide.ALL, 1, ap.Color.light_gray)
# Set the border for table cells
table.default_cell_border = ap.BorderInfo(
ap.BorderSide.BOTTOM, 1, ap.Color.light_gray
)
# Add header row with column names
header_row = table.rows.add()
header_row.is_row_broken = False # Prevent header row from being split across pages
for column_name in df.columns:
cell = header_row.cells.add(str(column_name))
cell.background_color = ap.Color.light_gray
# Add data rows
for row_data in df.head(max_rows).itertuples(index=False):
row = table.rows.add()
for value in row_data:
row.cells.add(str(value))
return table