Using LightCells API
Event Driven Architecture
Aspose.Cells provides the LightCells API, mainly designed to manipulate cell data one by one without building a complete data model block (using the Cell collection etc.) into memory. It works in an event-driven mode.
To save workbooks, provide the cell content cell by cell when saving, and the component saves it to the output file directly.
When reading template files, the component parses every cell and provides their value one by one.
In both procedures, one Cell object is processed and then discarded, the Workbook object does not hold the collection. In this mode, therefore, memory is saved when importing and exporting Microsoft Excel file that has a large data set which would otherwise use a lot of memory.
Even though the LightCells API processes the cells in the same way for XLSX and XLS files (it does not actually load all cells in memory but processes one cell and then discards it), it saves memory more effectively for XLSX files than XLS files because of the different data models and structures of the two formats.
However, for XLS files, to save more memory, developers can specify a temporary location for saving temporary data generated during the Save process. Commonly, using LightCells API to save XLSX file may save 50% or more memory than using the common way, saving XLS may save about 20-40% memory.
Writing Large Excel Files
Aspose.Cells provides an interface, LightCellsDataProvider, that needs to be implemented in your program. The interface represents Data provider for saving large spreadsheet files in light-weight mode.
When saving a workbook by this mode, startSheet(int) is checked when saving every worksheet in the workbook. For one sheet, if startSheet(int) is true, then all the data and properties of rows and cells of this sheet to be saved is provided by this implementation. In the first place, nextRow() is called to get the next row index to be saved. If a valid row index is returned (the row index must be in ascending order for the rows to be saved), then a Row object representing this row is provided for implementation to set its properties by startRow(Row).
For one row, nextCell() is checked first. If a valid column index is returned (the column index must be in ascending order for all cells of one row to be saved), then a Cell object representing this cell is provided to set the data and properties by startCell(Cell). After the data of this cell is set, this cell is saved directly to the generated spreadsheet file and the next cell will be checked and processed.
The following example shows ho the LightCells API works.
The following program creates a huge file with 100,000 records in a worksheet, filled with data. We have added some hyperlinks, string values, numeric values and also formulas to certain cells in the worksheet. Moreover, we have formatted a range of cells as well.
// For complete examples and data files, please go to https://github.com/aspose-cells/Aspose.Cells-for-Java | |
public class LightCellsDataProviderDemo implements LightCellsDataProvider { | |
private final int sheetCount; | |
private final int maxRowIndex; | |
private final int maxColIndex; | |
private int rowIndex; | |
private int colIndex; | |
private final Style style1; | |
private final Style style2; | |
public LightCellsDataProviderDemo(Workbook wb, int sheetCount, int rowCount, int colCount) { | |
// set the variables/objects | |
this.sheetCount = sheetCount; | |
this.maxRowIndex = rowCount - 1; | |
this.maxColIndex = colCount - 1; | |
// add new style object with specific formattings | |
style1 = wb.createStyle(); | |
Font font = style1.getFont(); | |
font.setName("MS Sans Serif"); | |
font.setSize(10); | |
font.setBold(true); | |
font.setItalic(true); | |
font.setUnderline(FontUnderlineType.SINGLE); | |
font.setColor(Color.fromArgb(0xffff0000)); | |
style1.setHorizontalAlignment(TextAlignmentType.CENTER); | |
// create another style | |
style2 = wb.createStyle(); | |
style2.setCustom("#,##0.00"); | |
font = style2.getFont(); | |
font.setName("Copperplate Gothic Bold"); | |
font.setSize(8); | |
style2.setPattern(BackgroundType.SOLID); | |
style2.setForegroundColor(Color.fromArgb(0xff0000ff)); | |
style2.setBorder(BorderType.TOP_BORDER, CellBorderType.THICK, Color.getBlack()); | |
style2.setVerticalAlignment(TextAlignmentType.CENTER); | |
} | |
public boolean isGatherString() { | |
return false; | |
} | |
public int nextCell() { | |
if (colIndex < maxColIndex) { | |
colIndex++; | |
return colIndex; | |
} | |
return -1; | |
} | |
public int nextRow() { | |
if (rowIndex < maxRowIndex) { | |
rowIndex++; | |
colIndex = -1; // reset column index | |
if (rowIndex % 1000 == 0) { | |
System.out.println("Row " + rowIndex); | |
} | |
return rowIndex; | |
} | |
return -1; | |
} | |
public void startCell(Cell cell) { | |
if (rowIndex % 50 == 0 && (colIndex == 0 || colIndex == 3)) { | |
// do not change the content of hyperlink. | |
return; | |
} | |
if (colIndex < 10) { | |
cell.putValue("test_" + rowIndex + "_" + colIndex); | |
cell.setStyle(style1); | |
} else { | |
if (colIndex == 19) { | |
cell.setFormula("=Rand() + test!L1"); | |
} else { | |
cell.putValue(rowIndex * colIndex); | |
} | |
cell.setStyle(style2); | |
} | |
} | |
public void startRow(Row row) { | |
row.setHeight(25); | |
} | |
public boolean startSheet(int sheetIndex) { | |
if (sheetIndex < sheetCount) { | |
// reset row/column index | |
rowIndex = -1; | |
colIndex = -1; | |
return true; | |
} | |
return false; | |
} | |
} |
// For complete examples and data files, please go to https://github.com/aspose-cells/Aspose.Cells-for-Java | |
public class Demo { | |
private static final String OUTPUT_FILE_PATH = Utils.getDataDir(LightCellsDataProviderDemo.class); | |
public static void main(String[] args) throws Exception { | |
// Instantiate a new Workbook | |
Workbook wb = new Workbook(); | |
// set the sheet count | |
int sheetCount = 1; | |
// set the number of rows for the big matrix | |
int rowCount = 100000; | |
// specify the worksheet | |
for (int k = 0; k < sheetCount; k++) { | |
Worksheet sheet = null; | |
if (k == 0) { | |
sheet = wb.getWorksheets().get(k); | |
sheet.setName("test"); | |
} else { | |
int sheetIndex = wb.getWorksheets().add(); | |
sheet = wb.getWorksheets().get(sheetIndex); | |
sheet.setName("test" + sheetIndex); | |
} | |
Cells cells = sheet.getCells(); | |
// set the columns width | |
for (int j = 0; j < 15; j++) { | |
cells.setColumnWidth(j, 15); | |
} | |
// traverse the columns for adding hyperlinks and merging | |
for (int i = 0; i < rowCount; i++) { | |
// The first 10 columns | |
for (int j = 0; j < 10; j++) { | |
if (j % 3 == 0) { | |
cells.merge(i, j, 1, 2, false, false); | |
} | |
if (i % 50 == 0) { | |
if (j == 0) { | |
sheet.getHyperlinks().add(i, j, 1, 1, "test!A1"); | |
} else if (j == 3) { | |
sheet.getHyperlinks().add(i, j, 1, 1, "http://www.google.com"); | |
} | |
} | |
} | |
// The second 10 columns | |
for (int j = 10; j < 20; j++) { | |
if (j == 12) { | |
cells.merge(i, j, 1, 3, false, false); | |
} | |
} | |
} | |
} | |
// Create an object with respect to LightCells data provider | |
LightCellsDataProviderDemo dataProvider = new LightCellsDataProviderDemo(wb, 1, rowCount, 20); | |
// Specify the XLSX file's Save options | |
OoxmlSaveOptions opt = new OoxmlSaveOptions(); | |
// Set the data provider for the file | |
opt.setLightCellsDataProvider(dataProvider); | |
// Save the big file | |
wb.save(OUTPUT_FILE_PATH + "/DemoTest.xlsx", opt); | |
} | |
} |
Reading Large Excel Files
Aspose.Cells provide an interface, LightCellsDataHandler, that needs to be implemented in your program. The interface represents the data provider for reading large spreadsheet files in a light-weight mode.
When reading a workbook in this mode, startSheet() is checked when reading every worksheet in the workbook. For a sheet, if startSheet() returns true, then all the data and properties of the cells in the sheet’s rows and columns is checked and processed. For every row, startRow() is called to check whether it needs to be processed. If a row needs to be processed, the row’s properties are read first and developers can access its properties with processRow().
If the row’s cells also need to be processed, then processRow() returns true and startCell() is called for every existing cell in the row to check whether it needs to be processed. If it does, processCell() is called.
The following sample code illustrates this process. The program reads a large file with millions of records. It takes a little time to read each sheet in the workbook. The sample code reads the file and retrieves the total number of cells, strings count and formulas count for each worksheet.
// For complete examples and data files, please go to https://github.com/aspose-cells/Aspose.Cells-for-Java | |
public class LightCellsTest1 { | |
public static void main(String[] args) throws Exception { | |
String dataDir = Utils.getDataDir(LightCellsTest1.class); | |
LoadOptions opts = new LoadOptions(); | |
LightCellsDataHandlerVisitCells v = new LightCellsDataHandlerVisitCells(); | |
opts.setLightCellsDataHandler((LightCellsDataHandler) v); | |
Workbook wb = new Workbook(dataDir + "LargeBook1.xlsx", opts); | |
int sheetCount = wb.getWorksheets().getCount(); | |
System.out.println("Total sheets: " + sheetCount + ", cells: " + v.cellCount + ", strings: " + v.stringCount | |
+ ", formulas: " + v.formulaCount); | |
} | |
} |
A class that implements LightCellsDataHandler interface
// For complete examples and data files, please go to https://github.com/aspose-cells/Aspose.Cells-for-Java | |
public class LightCellsDataHandlerVisitCells implements LightCellsDataHandler { | |
public int cellCount; | |
public int formulaCount; | |
public int stringCount; | |
public LightCellsDataHandlerVisitCells() { | |
this.cellCount = 0; | |
this.formulaCount = 0; | |
this.stringCount = 0; | |
} | |
public int cellCount() { | |
return cellCount; | |
} | |
public int formulaCount() { | |
return formulaCount; | |
} | |
public int stringCount() { | |
return stringCount; | |
} | |
public boolean startSheet(Worksheet sheet) { | |
System.out.println("Processing sheet[" + sheet.getName() + "]"); | |
return true; | |
} | |
public boolean startRow(int rowIndex) { | |
return true; | |
} | |
public boolean processRow(Row row) { | |
return true; | |
} | |
public boolean startCell(int column) { | |
return true; | |
} | |
public boolean processCell(Cell cell) { | |
this.cellCount = this.cellCount + 1; | |
if (cell.isFormula()) { | |
this.formulaCount = this.formulaCount + 1; | |
} else if (cell.getType() == CellValueType.IS_STRING) { | |
this.stringCount = this.stringCount + 1; | |
} | |
return false; | |
} | |
} |