Using LightCells API with Python via .NET
Event Driven Architecture
Aspose.Cells for Python via .NET implements LightCells API through an event-driven model that processes cells individually rather than loading the entire workbook into memory. Key features:
- Processes one cell at a time during read/write operations
- Discards processed cells immediately after handling
- Particularly memory-efficient for XLSX files (50%+ memory savings)
- Effective for XLS files (20-40% memory reduction)
Writing Large Excel Files
To create massive spreadsheets efficiently:
- Implement the
LightCellsDataProvider
abstract base class - Use sequential processing of rows and cells
- Handle data generation through callback methods
Python.NET Implementation Example
This example creates a 10,000x30 cell spreadsheet:
import os
from aspose.cells import Workbook, OoxmlSaveOptions, LightCellsDataProvider
class TestDataProvider(LightCellsDataProvider):
def __init__(self, workbook, max_rows, max_columns):
self._workbook = workbook
self.max_rows = max_rows
self.max_columns = max_columns
self._row = -1
self._column = -1
def is_gather_string(self):
return False
def next_cell(self):
self._column += 1
if self._column < self.max_columns:
return self._column
else:
self._column = -1
return -1
def next_row(self):
self._row += 1
if self._row < self.max_rows:
self._column = -1
return self._row
else:
return -1
def start_cell(self, cell):
cell.put_value(self._row + self._column)
if self._row != 1:
cell.formula = "=Rand() + A2"
def start_row(self, row):
pass
def start_sheet(self, sheet_index):
return sheet_index == 0
def run():
# The path to the documents directory
current_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(current_dir, "data")
if not os.path.exists(data_dir):
os.makedirs(data_dir)
# Specify matrix dimensions
rows_count = 10000
cols_count = 30
workbook = Workbook()
ooxml_save_options = OoxmlSaveOptions()
ooxml_save_options.light_cells_data_provider = TestDataProvider(
workbook, rows_count, cols_count
)
# Save workbook with light cells processing
output_path = os.path.join(data_dir, "output.out.xlsx")
workbook.save(output_path, ooxml_save_options)
if __name__ == "__main__":
run()
Reading Large Excel Files
For efficient processing of massive files:
- Implement the
LightCellsDataHandler
abstract base class - Process rows and cells through event callbacks
- Access cell data incrementally
Python.NET Implementation Example
This example analyzes cell statistics:
import os
from aspose.cells import LoadOptions, Workbook, LightCellsDataHandler, CellValueType
def run():
# The path to the documents directory.
current_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(current_dir, "data")
opts = LoadOptions()
v = LightCellsDataHandlerVisitCells()
opts.light_cells_data_handler = v
wb = Workbook(os.path.join(data_dir, "LargeBook1.xlsx"), opts)
sheet_count = wb.worksheets.count
print(f"Total sheets: {sheet_count}, cells: {v.cell_count}, strings: {v.string_count}, formulas: {v.formula_count}")
class LightCellsDataHandlerVisitCells(LightCellsDataHandler):
def __init__(self):
self._cell_count = 0
self._formula_count = 0
self._string_count = 0
@property
def cell_count(self):
return self._cell_count
@property
def formula_count(self):
return self._formula_count
@property
def string_count(self):
return self._string_count
def start_sheet(self, sheet):
print(f"Processing sheet[{sheet.name}]")
return True
def start_row(self, row_index):
return True
def process_row(self, row):
return True
def start_cell(self, column_index):
return True
def process_cell(self, cell):
self._cell_count += 1
if cell.is_formula:
self._formula_count += 1
elif cell.type == CellValueType.IS_STRING:
self._string_count += 1
return False
Key Considerations
- Process cells in strict row/column order
- Maintain state between callback methods
- Use appropriate memory management for very large datasets
- Prefer XLSX format for optimal memory efficiency
- Handle temporary storage for XLS files when needed