Load Html to Excel with StreamProvider
When loading html fiels which contain external resources, we offen face the following two issues:
- When the html stream is loaded, the images and external resources referenced by the html file cannot be obtained through relative paths.
- External resource paths referenced in html files need to be mapped
This article explains how to implement IStreamProvider interface for setting the HtmlLoadOptions.StreamProvider property. By implementing this interface, you will be able to load external resources during loading Html streams or these external resources are relative.
This is the main code showing the usage of HtmlLoadOptions.StreamProvider property
internal class HtmlAttachedStreamProvider : Aspose.Cells.IStreamProvider | |
{ | |
internal static bool IsHRef(string picPath) | |
{ | |
//This handles http://,https:// file:// and probably ftp://. | |
if (picPath.StartsWith("http://") | |
|| picPath.StartsWith("https://") | |
|| picPath.StartsWith("file://") | |
|| picPath.StartsWith("ftp://")) | |
{ | |
return true; | |
} | |
return false; | |
} | |
internal static Stream GetStreamFromHref(string href) | |
{ | |
try | |
{ | |
WebRequest request = WebRequest.Create(href); | |
WebResponse response = request.GetResponse(); | |
// FIX: Do not specify capacity since the length of the response is sometimes not known. | |
MemoryStream dstStream = new MemoryStream(); | |
using (Stream responseStream = response.GetResponseStream()) | |
{ | |
byte[] buf = new byte[4096]; | |
while (true) | |
{ | |
int bytesRead = responseStream.Read(buf, 0, buf.Length); | |
// Read returns 0 when reached end of stream. Checking for negative too to make it conceptually close to Java. | |
if (bytesRead <= 0) | |
break; | |
else | |
dstStream.Write(buf, 0, bytesRead); | |
} | |
} | |
dstStream.Position = 0; | |
return dstStream; | |
} | |
catch | |
{ | |
//ignored | |
} | |
return null; | |
} | |
public void InitStream(StreamProviderOptions options) | |
{ | |
string absolutePath = null; | |
switch (options.DefaultPath) | |
{ | |
case "/Files/Image1.png": | |
absolutePath = @"D:/filetemp/G1.png"; | |
break; | |
case "/Files/Image2.png": | |
absolutePath = @"D:/filetemp/E1.png"; | |
break; | |
case "https://www.aspose.com/templates/aspose/img/products/cells/aspose_cells-for-net.svg": | |
absolutePath = @"D:/filetemp/F1.png"; | |
break; | |
default: | |
break; | |
} | |
if(absolutePath == null) | |
{ | |
if(IsHRef(options.DefaultPath)) | |
{ | |
options.Stream = GetStreamFromHref(options.DefaultPath); | |
} | |
else if(File.Exists(options.DefaultPath)) | |
{ | |
options.Stream = File.OpenRead(options.DefaultPath); | |
} | |
return; | |
} | |
options.Stream = File.OpenRead(absolutePath); | |
} | |
public void CloseStream(StreamProviderOptions options) | |
{ | |
if(options.Stream != null) | |
{ | |
options.Stream.Close(); | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
HtmlAttachedStreamProvider attachedStreamProvider = new HtmlAttachedStreamProvider(); | |
HtmlLoadOptions options = new HtmlLoadOptions(); | |
options.StreamProvider = attachedStreamProvider; | |
var workbook = new Workbook(@"html1.html", options); | |
workbook.Save("dest.xlsx"); | |
} |