PowerShell script to create Excel spreadsheet from HTML file











up vote
6
down vote

favorite
1












This script is designed to create an Excel spreadsheet from each HTML table from auto-generated index.html file.



It needs to be very robust but unfortunately I don't have many example files to test it against. Any feedback on style, possible points of failure, or vulnerabilities would be very appreciated.



Powershell



function Main
{

# find each file called "index.html" in the folders below the directory of the script
$Directory = $PSScriptRoot
$htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html

# for each html file, create an excel file in the directory of the script
# called Exceptions Log <date of HTML file creation>.xlsx
ForEach($htmlFile in $htmlFiles)
{
$fileCreationDate = (Get-ChildItem $htmlFile).CreationTime
$fileString = $fileCreationDate.ToString("yyyy-mm-dd hh.mm.ss")
$outputFile = "$DirectoryExceptions Log $fileString.xlsx"

Write-Host "Parsing HTML file: $htmlFile"
$document = ParseHtml $htmlFile

Write-Host "Creating Excel file: $outputFile"
CreateExcelFromHtml $document $outputFile
}

}


# load local HTML file and return COM object that we can query
function ParseHtml([string]$fileIn)
{
$html = New-Object -ComObject "HTMLFile"
$source = Get-Content $fileIn -Raw
$html.IHTMLDocument2_write($source)
return $html
}


# create excel spreadsheet from html document
function CreateExcelFromHtml($document, [string]$excelFile)
{
$excel = New-Object -ComObject Excel.Application
$excel.Visible = $false # $true
$excel.DisplayAlerts = $false
$workbook = $excel.Workbooks.Add()
$sheet = $workbook.ActiveSheet

Write-Host "Filling in Excel file"
$row = 1;
# iterate over tables
$tables = $document.getElementsByTagName("table")
$captions = $document.getElementsByTagName("caption")

for($t=0; $t -lt $tables.length; $t++){

# create 2 row space between tables
$row += 2

#iterate over rows
$rows = $tables[$t].getElementsByTagName("tr")
for($i=0; $i -lt $rows.length; $i++)
{

# iterate over cells
# every other cell is a blank cell, however cells with content are always nested in a "font" tag
$cells = $rows[$i].getElementsByTagName("font")
for($j=0; $j -lt $cells.length; $j++)
{
# load each cell contents into an excel cell
$sheet.Cells.Item($row + $i, $j+1) = $cells[$j].innerText.Trim()
}
}

# add corrective action column to first table
if($t -eq 0){
$j += 1 # increment table size
$sheet.Cells.Item($row, $j) = "Corrective Action";
}

# get table title from corresponding caption
$title = [string]$captions[$t].innerText

StyleTable $sheet $title $row $i $j

$row += $i

}

Write-Host "Formating Excel file"
$sheet.UsedRange.ColumnWidth = 18

Write-Host "Saving Excel file"
$workbook.SaveAs($excelFile, 51)
$workbook.Close();
$excel.Quit()

}


Function StyleTable($sheet, $tableTitle, $startRow, $numberOfRows, $endColumn){

# give the table a title and style it
$titleCell = $sheet.Cells($startRow - 1, 1)
$titleCell.Value = $tableTitle
$titleCell.Font.Name = "Cambria"
$titleCell.Font.Size = 14
$titleCell.Font.Bold = $true
$titleCell.Font.ThemeColor = 5

# give the table a border
$tableRange = $sheet.Range($sheet.Cells($startRow, 1), `
$sheet.Cells($startRow + $numberOfRows - 1, $endColumn))

$tableRange.Borders.Weight = 2

# make the table header bold and blue
$headerRange = $sheet.Range($sheet.Cells($startRow, 1), `
$sheet.Cells($startRow, $endColumn))

$headerRange.Interior.ColorIndex = 37
$headerRange.Font.Bold = $true
}


Main


Example index.html file




<html>
<head>
<title>FooBar</title>
</head>

<body>
<center>
<p>
<b><font size=6>Bar</font>
</b><br>
<b><font size=4>Foo</font>
</b><br>
<font size=3>FooBar</font>
</p>

<table>
<caption><font size=3><b>Title</b></font></caption>
<thead>
<th align=left valign=bottom><font size=2><u>A</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>B</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>C</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>D</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>E</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>F</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>G</u></font></th><th>&nbsp;&nbsp;</th>
</thead>

<tbody>
<tr>
<td valign=top><font size=2>1</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>2</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>3</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>4</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>5</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>6</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>7</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
</tr>

</tbody>
</table>

<br>
<table>
<caption><font size=3><b>Title 2</b></font></caption>
<thead>
<th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>FoobBar</u></font></th><th>&nbsp;&nbsp;</th>
</thead>

<tbody>
<tr>
<td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>FizzBuzz<br>
&nbsp;<br>
&nbsp;</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>FooBar<br>
&nbsp;<br>
&nbsp;</td><td>&nbsp;&nbsp;</td>
</tr>

</tbody>
</table>

<br>
<table>
<caption><font size=3><b>Title 3</b></font></caption>
<thead>
<th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>FizzBuzz</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>FooBar</u></font></th><th>&nbsp;&nbsp;</th>
</thead>

<tbody>
<tr>
<td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>FooBar</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>FizzBuzz</td><td>&nbsp;&nbsp;</td>
</tr>

</tbody>
</table>

<br>
<p>
<b><font size=4>Something</font>
</b><br>
</p>

<table>
<caption><font size=3><b>Title 4</b></font></caption>
<thead>
<th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
<th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
</thead>

<tbody>
<tr>
<td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
</tr>

<tr>
<b><td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
</b><td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
<td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
</tr>

</tbody>
</table>

<br>
</center>
</body>
</html>



Output



enter image description here










share|improve this question




























    up vote
    6
    down vote

    favorite
    1












    This script is designed to create an Excel spreadsheet from each HTML table from auto-generated index.html file.



    It needs to be very robust but unfortunately I don't have many example files to test it against. Any feedback on style, possible points of failure, or vulnerabilities would be very appreciated.



    Powershell



    function Main
    {

    # find each file called "index.html" in the folders below the directory of the script
    $Directory = $PSScriptRoot
    $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html

    # for each html file, create an excel file in the directory of the script
    # called Exceptions Log <date of HTML file creation>.xlsx
    ForEach($htmlFile in $htmlFiles)
    {
    $fileCreationDate = (Get-ChildItem $htmlFile).CreationTime
    $fileString = $fileCreationDate.ToString("yyyy-mm-dd hh.mm.ss")
    $outputFile = "$DirectoryExceptions Log $fileString.xlsx"

    Write-Host "Parsing HTML file: $htmlFile"
    $document = ParseHtml $htmlFile

    Write-Host "Creating Excel file: $outputFile"
    CreateExcelFromHtml $document $outputFile
    }

    }


    # load local HTML file and return COM object that we can query
    function ParseHtml([string]$fileIn)
    {
    $html = New-Object -ComObject "HTMLFile"
    $source = Get-Content $fileIn -Raw
    $html.IHTMLDocument2_write($source)
    return $html
    }


    # create excel spreadsheet from html document
    function CreateExcelFromHtml($document, [string]$excelFile)
    {
    $excel = New-Object -ComObject Excel.Application
    $excel.Visible = $false # $true
    $excel.DisplayAlerts = $false
    $workbook = $excel.Workbooks.Add()
    $sheet = $workbook.ActiveSheet

    Write-Host "Filling in Excel file"
    $row = 1;
    # iterate over tables
    $tables = $document.getElementsByTagName("table")
    $captions = $document.getElementsByTagName("caption")

    for($t=0; $t -lt $tables.length; $t++){

    # create 2 row space between tables
    $row += 2

    #iterate over rows
    $rows = $tables[$t].getElementsByTagName("tr")
    for($i=0; $i -lt $rows.length; $i++)
    {

    # iterate over cells
    # every other cell is a blank cell, however cells with content are always nested in a "font" tag
    $cells = $rows[$i].getElementsByTagName("font")
    for($j=0; $j -lt $cells.length; $j++)
    {
    # load each cell contents into an excel cell
    $sheet.Cells.Item($row + $i, $j+1) = $cells[$j].innerText.Trim()
    }
    }

    # add corrective action column to first table
    if($t -eq 0){
    $j += 1 # increment table size
    $sheet.Cells.Item($row, $j) = "Corrective Action";
    }

    # get table title from corresponding caption
    $title = [string]$captions[$t].innerText

    StyleTable $sheet $title $row $i $j

    $row += $i

    }

    Write-Host "Formating Excel file"
    $sheet.UsedRange.ColumnWidth = 18

    Write-Host "Saving Excel file"
    $workbook.SaveAs($excelFile, 51)
    $workbook.Close();
    $excel.Quit()

    }


    Function StyleTable($sheet, $tableTitle, $startRow, $numberOfRows, $endColumn){

    # give the table a title and style it
    $titleCell = $sheet.Cells($startRow - 1, 1)
    $titleCell.Value = $tableTitle
    $titleCell.Font.Name = "Cambria"
    $titleCell.Font.Size = 14
    $titleCell.Font.Bold = $true
    $titleCell.Font.ThemeColor = 5

    # give the table a border
    $tableRange = $sheet.Range($sheet.Cells($startRow, 1), `
    $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))

    $tableRange.Borders.Weight = 2

    # make the table header bold and blue
    $headerRange = $sheet.Range($sheet.Cells($startRow, 1), `
    $sheet.Cells($startRow, $endColumn))

    $headerRange.Interior.ColorIndex = 37
    $headerRange.Font.Bold = $true
    }


    Main


    Example index.html file




    <html>
    <head>
    <title>FooBar</title>
    </head>

    <body>
    <center>
    <p>
    <b><font size=6>Bar</font>
    </b><br>
    <b><font size=4>Foo</font>
    </b><br>
    <font size=3>FooBar</font>
    </p>

    <table>
    <caption><font size=3><b>Title</b></font></caption>
    <thead>
    <th align=left valign=bottom><font size=2><u>A</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>B</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>C</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>D</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>E</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>F</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>G</u></font></th><th>&nbsp;&nbsp;</th>
    </thead>

    <tbody>
    <tr>
    <td valign=top><font size=2>1</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>2</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>3</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>4</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>5</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>6</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>7</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
    </tr>

    </tbody>
    </table>

    <br>
    <table>
    <caption><font size=3><b>Title 2</b></font></caption>
    <thead>
    <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>FoobBar</u></font></th><th>&nbsp;&nbsp;</th>
    </thead>

    <tbody>
    <tr>
    <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>FizzBuzz<br>
    &nbsp;<br>
    &nbsp;</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>FooBar<br>
    &nbsp;<br>
    &nbsp;</td><td>&nbsp;&nbsp;</td>
    </tr>

    </tbody>
    </table>

    <br>
    <table>
    <caption><font size=3><b>Title 3</b></font></caption>
    <thead>
    <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>FizzBuzz</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>FooBar</u></font></th><th>&nbsp;&nbsp;</th>
    </thead>

    <tbody>
    <tr>
    <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>FooBar</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>FizzBuzz</td><td>&nbsp;&nbsp;</td>
    </tr>

    </tbody>
    </table>

    <br>
    <p>
    <b><font size=4>Something</font>
    </b><br>
    </p>

    <table>
    <caption><font size=3><b>Title 4</b></font></caption>
    <thead>
    <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
    <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
    </thead>

    <tbody>
    <tr>
    <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
    </tr>

    <tr>
    <b><td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
    </b><td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
    <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
    </tr>

    </tbody>
    </table>

    <br>
    </center>
    </body>
    </html>



    Output



    enter image description here










    share|improve this question


























      up vote
      6
      down vote

      favorite
      1









      up vote
      6
      down vote

      favorite
      1






      1





      This script is designed to create an Excel spreadsheet from each HTML table from auto-generated index.html file.



      It needs to be very robust but unfortunately I don't have many example files to test it against. Any feedback on style, possible points of failure, or vulnerabilities would be very appreciated.



      Powershell



      function Main
      {

      # find each file called "index.html" in the folders below the directory of the script
      $Directory = $PSScriptRoot
      $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html

      # for each html file, create an excel file in the directory of the script
      # called Exceptions Log <date of HTML file creation>.xlsx
      ForEach($htmlFile in $htmlFiles)
      {
      $fileCreationDate = (Get-ChildItem $htmlFile).CreationTime
      $fileString = $fileCreationDate.ToString("yyyy-mm-dd hh.mm.ss")
      $outputFile = "$DirectoryExceptions Log $fileString.xlsx"

      Write-Host "Parsing HTML file: $htmlFile"
      $document = ParseHtml $htmlFile

      Write-Host "Creating Excel file: $outputFile"
      CreateExcelFromHtml $document $outputFile
      }

      }


      # load local HTML file and return COM object that we can query
      function ParseHtml([string]$fileIn)
      {
      $html = New-Object -ComObject "HTMLFile"
      $source = Get-Content $fileIn -Raw
      $html.IHTMLDocument2_write($source)
      return $html
      }


      # create excel spreadsheet from html document
      function CreateExcelFromHtml($document, [string]$excelFile)
      {
      $excel = New-Object -ComObject Excel.Application
      $excel.Visible = $false # $true
      $excel.DisplayAlerts = $false
      $workbook = $excel.Workbooks.Add()
      $sheet = $workbook.ActiveSheet

      Write-Host "Filling in Excel file"
      $row = 1;
      # iterate over tables
      $tables = $document.getElementsByTagName("table")
      $captions = $document.getElementsByTagName("caption")

      for($t=0; $t -lt $tables.length; $t++){

      # create 2 row space between tables
      $row += 2

      #iterate over rows
      $rows = $tables[$t].getElementsByTagName("tr")
      for($i=0; $i -lt $rows.length; $i++)
      {

      # iterate over cells
      # every other cell is a blank cell, however cells with content are always nested in a "font" tag
      $cells = $rows[$i].getElementsByTagName("font")
      for($j=0; $j -lt $cells.length; $j++)
      {
      # load each cell contents into an excel cell
      $sheet.Cells.Item($row + $i, $j+1) = $cells[$j].innerText.Trim()
      }
      }

      # add corrective action column to first table
      if($t -eq 0){
      $j += 1 # increment table size
      $sheet.Cells.Item($row, $j) = "Corrective Action";
      }

      # get table title from corresponding caption
      $title = [string]$captions[$t].innerText

      StyleTable $sheet $title $row $i $j

      $row += $i

      }

      Write-Host "Formating Excel file"
      $sheet.UsedRange.ColumnWidth = 18

      Write-Host "Saving Excel file"
      $workbook.SaveAs($excelFile, 51)
      $workbook.Close();
      $excel.Quit()

      }


      Function StyleTable($sheet, $tableTitle, $startRow, $numberOfRows, $endColumn){

      # give the table a title and style it
      $titleCell = $sheet.Cells($startRow - 1, 1)
      $titleCell.Value = $tableTitle
      $titleCell.Font.Name = "Cambria"
      $titleCell.Font.Size = 14
      $titleCell.Font.Bold = $true
      $titleCell.Font.ThemeColor = 5

      # give the table a border
      $tableRange = $sheet.Range($sheet.Cells($startRow, 1), `
      $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))

      $tableRange.Borders.Weight = 2

      # make the table header bold and blue
      $headerRange = $sheet.Range($sheet.Cells($startRow, 1), `
      $sheet.Cells($startRow, $endColumn))

      $headerRange.Interior.ColorIndex = 37
      $headerRange.Font.Bold = $true
      }


      Main


      Example index.html file




      <html>
      <head>
      <title>FooBar</title>
      </head>

      <body>
      <center>
      <p>
      <b><font size=6>Bar</font>
      </b><br>
      <b><font size=4>Foo</font>
      </b><br>
      <font size=3>FooBar</font>
      </p>

      <table>
      <caption><font size=3><b>Title</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>A</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>B</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>C</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>D</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>E</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>F</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>G</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>1</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>2</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>3</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>4</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>5</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>6</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>7</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <table>
      <caption><font size=3><b>Title 2</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FoobBar</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FizzBuzz<br>
      &nbsp;<br>
      &nbsp;</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FooBar<br>
      &nbsp;<br>
      &nbsp;</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <table>
      <caption><font size=3><b>Title 3</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FizzBuzz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FooBar</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FooBar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FizzBuzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <p>
      <b><font size=4>Something</font>
      </b><br>
      </p>

      <table>
      <caption><font size=3><b>Title 4</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <b><td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      </b><td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      </center>
      </body>
      </html>



      Output



      enter image description here










      share|improve this question















      This script is designed to create an Excel spreadsheet from each HTML table from auto-generated index.html file.



      It needs to be very robust but unfortunately I don't have many example files to test it against. Any feedback on style, possible points of failure, or vulnerabilities would be very appreciated.



      Powershell



      function Main
      {

      # find each file called "index.html" in the folders below the directory of the script
      $Directory = $PSScriptRoot
      $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html

      # for each html file, create an excel file in the directory of the script
      # called Exceptions Log <date of HTML file creation>.xlsx
      ForEach($htmlFile in $htmlFiles)
      {
      $fileCreationDate = (Get-ChildItem $htmlFile).CreationTime
      $fileString = $fileCreationDate.ToString("yyyy-mm-dd hh.mm.ss")
      $outputFile = "$DirectoryExceptions Log $fileString.xlsx"

      Write-Host "Parsing HTML file: $htmlFile"
      $document = ParseHtml $htmlFile

      Write-Host "Creating Excel file: $outputFile"
      CreateExcelFromHtml $document $outputFile
      }

      }


      # load local HTML file and return COM object that we can query
      function ParseHtml([string]$fileIn)
      {
      $html = New-Object -ComObject "HTMLFile"
      $source = Get-Content $fileIn -Raw
      $html.IHTMLDocument2_write($source)
      return $html
      }


      # create excel spreadsheet from html document
      function CreateExcelFromHtml($document, [string]$excelFile)
      {
      $excel = New-Object -ComObject Excel.Application
      $excel.Visible = $false # $true
      $excel.DisplayAlerts = $false
      $workbook = $excel.Workbooks.Add()
      $sheet = $workbook.ActiveSheet

      Write-Host "Filling in Excel file"
      $row = 1;
      # iterate over tables
      $tables = $document.getElementsByTagName("table")
      $captions = $document.getElementsByTagName("caption")

      for($t=0; $t -lt $tables.length; $t++){

      # create 2 row space between tables
      $row += 2

      #iterate over rows
      $rows = $tables[$t].getElementsByTagName("tr")
      for($i=0; $i -lt $rows.length; $i++)
      {

      # iterate over cells
      # every other cell is a blank cell, however cells with content are always nested in a "font" tag
      $cells = $rows[$i].getElementsByTagName("font")
      for($j=0; $j -lt $cells.length; $j++)
      {
      # load each cell contents into an excel cell
      $sheet.Cells.Item($row + $i, $j+1) = $cells[$j].innerText.Trim()
      }
      }

      # add corrective action column to first table
      if($t -eq 0){
      $j += 1 # increment table size
      $sheet.Cells.Item($row, $j) = "Corrective Action";
      }

      # get table title from corresponding caption
      $title = [string]$captions[$t].innerText

      StyleTable $sheet $title $row $i $j

      $row += $i

      }

      Write-Host "Formating Excel file"
      $sheet.UsedRange.ColumnWidth = 18

      Write-Host "Saving Excel file"
      $workbook.SaveAs($excelFile, 51)
      $workbook.Close();
      $excel.Quit()

      }


      Function StyleTable($sheet, $tableTitle, $startRow, $numberOfRows, $endColumn){

      # give the table a title and style it
      $titleCell = $sheet.Cells($startRow - 1, 1)
      $titleCell.Value = $tableTitle
      $titleCell.Font.Name = "Cambria"
      $titleCell.Font.Size = 14
      $titleCell.Font.Bold = $true
      $titleCell.Font.ThemeColor = 5

      # give the table a border
      $tableRange = $sheet.Range($sheet.Cells($startRow, 1), `
      $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))

      $tableRange.Borders.Weight = 2

      # make the table header bold and blue
      $headerRange = $sheet.Range($sheet.Cells($startRow, 1), `
      $sheet.Cells($startRow, $endColumn))

      $headerRange.Interior.ColorIndex = 37
      $headerRange.Font.Bold = $true
      }


      Main


      Example index.html file




      <html>
      <head>
      <title>FooBar</title>
      </head>

      <body>
      <center>
      <p>
      <b><font size=6>Bar</font>
      </b><br>
      <b><font size=4>Foo</font>
      </b><br>
      <font size=3>FooBar</font>
      </p>

      <table>
      <caption><font size=3><b>Title</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>A</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>B</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>C</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>D</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>E</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>F</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>G</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>1</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>2</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>3</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>4</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>5</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>6</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>7</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Do</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Re</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Mi</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fa</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>So</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>La</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Ti</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <table>
      <caption><font size=3><b>Title 2</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FoobBar</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FizzBuzz<br>
      &nbsp;<br>
      &nbsp;</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FooBar<br>
      &nbsp;<br>
      &nbsp;</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <table>
      <caption><font size=3><b>Title 3</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FizzBuzz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>FooBar</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FooBar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>FizzBuzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      <p>
      <b><font size=4>Something</font>
      </b><br>
      </p>

      <table>
      <caption><font size=3><b>Title 4</b></font></caption>
      <thead>
      <th align=left valign=bottom><font size=2><u>Foo</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Bar</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Fizz</u></font></th><th>&nbsp;&nbsp;</th>
      <th align=left valign=bottom><font size=2><u>Buzz</u></font></th><th>&nbsp;&nbsp;</th>
      </thead>

      <tbody>
      <tr>
      <td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      <tr>
      <b><td valign=top><font size=2>Foo</td><td>&nbsp;&nbsp;</td>
      </b><td valign=top><font size=2>Bar</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Fizz</td><td>&nbsp;&nbsp;</td>
      <td valign=top><font size=2>Buzz</td><td>&nbsp;&nbsp;</td>
      </tr>

      </tbody>
      </table>

      <br>
      </center>
      </body>
      </html>



      Output



      enter image description here







      html excel powershell






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Aug 29 '16 at 14:32









      janos

      96.5k12122349




      96.5k12122349










      asked Aug 26 '16 at 2:23









      Charles Clayton

      202212




      202212






















          2 Answers
          2






          active

          oldest

          votes

















          up vote
          6
          down vote



          +50










          Your code looks fine. Just some notes:



          Excel



          The main weakness of your design as I see it is its reliance on the Excel application. Since you say robustness is important, Excel would be the main weak point. Microsoft says don't use Office as a server. From my own bitter experience, I think they are right to say that.



          It's not so bad in your case because you are doing the same thing over and over, so there should be no surprises, but you never know. Excel can be flakey. It can pop up dialogs that will halt processing. It's especially flakey if you are processing multiple documents simultaneously because Excel was never designed to handle that case.



          As an alternative, you could use a library to produce your Excel, such as EPPlus. I've never used that, so I can't endorse it, but it looks good. There may be others out there too. Remember that it's easy to use .Net libraries from PowerShell.



          HTML



          Your HTML is very nearly XHTML. It looks like just the <br> tags would need to be fixed. If you have control over the thing that produces the HTML, then you could change it to produce XHTML instead, and then you could consume that with Import-XML. I'm guessing that's not the case, but I thought it worth mentioning.



          Error Handling



          You don't have any error handling. If you want your code to be robust you will have to think about what it should do when things go wrong.



          Write-Host



          Write-Host "Formating Excel file"


          If you want to capture these messages in a log, then it would be better to use Write-Output.



          Line Continuation



          $tableRange = $sheet.Range($sheet.Cells($startRow, 1), ` 
          $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))


          I don't believe you need to use the backtick character there.



          -Include



          $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html


          One oddity of PowerShell is that -Filter is much faster than -Include. If the number of folders and files you are searching is small, then it doesn't matter, but if you are searching through lots of them, it can make a huge difference.



          (What is the difference in functionality between the two? None, except that -Include is more general and will work in non-filesystems, but that isn't relevant here.)






          share|improve this answer





















          • There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
            – Adamar
            Aug 31 '16 at 0:07






          • 1




            @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
            – Dangph
            Aug 31 '16 at 12:40


















          up vote
          1
          down vote













          There is a much simpler approach. Excel has native HTML conversion capabilities that work fine. Using the Microsoft.Office.Interop.Excel in powershell:



          function SaveAs-Excel2010{
          param([string] $infile, [string] $outfile, [string] $tabname = $null)

          $Excel = New-Object -Com Excel.Application
          $Excel.DisplayAlerts = $false
          $Workbook = $Excel.Workbooks.Open($infile)

          if ($tabname) {
          $Sheet = $Workbook.Worksheets.Item(1)
          $Sheet.Name = $tabname;
          }
          $Workbook.SaveAs($outfile, $xlFixedFormat)
          $Workbook.Close()
          $Excel.quit()
          }

          #sample call
          SaveAs-Excel2010 $htmlFile $xlsxFile $myTabName





          share|improve this answer










          New contributor




          Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
          Check out our Code of Conduct.


















            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "196"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














             

            draft saved


            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f139670%2fpowershell-script-to-create-excel-spreadsheet-from-html-file%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            2 Answers
            2






            active

            oldest

            votes








            2 Answers
            2






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes








            up vote
            6
            down vote



            +50










            Your code looks fine. Just some notes:



            Excel



            The main weakness of your design as I see it is its reliance on the Excel application. Since you say robustness is important, Excel would be the main weak point. Microsoft says don't use Office as a server. From my own bitter experience, I think they are right to say that.



            It's not so bad in your case because you are doing the same thing over and over, so there should be no surprises, but you never know. Excel can be flakey. It can pop up dialogs that will halt processing. It's especially flakey if you are processing multiple documents simultaneously because Excel was never designed to handle that case.



            As an alternative, you could use a library to produce your Excel, such as EPPlus. I've never used that, so I can't endorse it, but it looks good. There may be others out there too. Remember that it's easy to use .Net libraries from PowerShell.



            HTML



            Your HTML is very nearly XHTML. It looks like just the <br> tags would need to be fixed. If you have control over the thing that produces the HTML, then you could change it to produce XHTML instead, and then you could consume that with Import-XML. I'm guessing that's not the case, but I thought it worth mentioning.



            Error Handling



            You don't have any error handling. If you want your code to be robust you will have to think about what it should do when things go wrong.



            Write-Host



            Write-Host "Formating Excel file"


            If you want to capture these messages in a log, then it would be better to use Write-Output.



            Line Continuation



            $tableRange = $sheet.Range($sheet.Cells($startRow, 1), ` 
            $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))


            I don't believe you need to use the backtick character there.



            -Include



            $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html


            One oddity of PowerShell is that -Filter is much faster than -Include. If the number of folders and files you are searching is small, then it doesn't matter, but if you are searching through lots of them, it can make a huge difference.



            (What is the difference in functionality between the two? None, except that -Include is more general and will work in non-filesystems, but that isn't relevant here.)






            share|improve this answer





















            • There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
              – Adamar
              Aug 31 '16 at 0:07






            • 1




              @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
              – Dangph
              Aug 31 '16 at 12:40















            up vote
            6
            down vote



            +50










            Your code looks fine. Just some notes:



            Excel



            The main weakness of your design as I see it is its reliance on the Excel application. Since you say robustness is important, Excel would be the main weak point. Microsoft says don't use Office as a server. From my own bitter experience, I think they are right to say that.



            It's not so bad in your case because you are doing the same thing over and over, so there should be no surprises, but you never know. Excel can be flakey. It can pop up dialogs that will halt processing. It's especially flakey if you are processing multiple documents simultaneously because Excel was never designed to handle that case.



            As an alternative, you could use a library to produce your Excel, such as EPPlus. I've never used that, so I can't endorse it, but it looks good. There may be others out there too. Remember that it's easy to use .Net libraries from PowerShell.



            HTML



            Your HTML is very nearly XHTML. It looks like just the <br> tags would need to be fixed. If you have control over the thing that produces the HTML, then you could change it to produce XHTML instead, and then you could consume that with Import-XML. I'm guessing that's not the case, but I thought it worth mentioning.



            Error Handling



            You don't have any error handling. If you want your code to be robust you will have to think about what it should do when things go wrong.



            Write-Host



            Write-Host "Formating Excel file"


            If you want to capture these messages in a log, then it would be better to use Write-Output.



            Line Continuation



            $tableRange = $sheet.Range($sheet.Cells($startRow, 1), ` 
            $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))


            I don't believe you need to use the backtick character there.



            -Include



            $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html


            One oddity of PowerShell is that -Filter is much faster than -Include. If the number of folders and files you are searching is small, then it doesn't matter, but if you are searching through lots of them, it can make a huge difference.



            (What is the difference in functionality between the two? None, except that -Include is more general and will work in non-filesystems, but that isn't relevant here.)






            share|improve this answer





















            • There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
              – Adamar
              Aug 31 '16 at 0:07






            • 1




              @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
              – Dangph
              Aug 31 '16 at 12:40













            up vote
            6
            down vote



            +50







            up vote
            6
            down vote



            +50




            +50




            Your code looks fine. Just some notes:



            Excel



            The main weakness of your design as I see it is its reliance on the Excel application. Since you say robustness is important, Excel would be the main weak point. Microsoft says don't use Office as a server. From my own bitter experience, I think they are right to say that.



            It's not so bad in your case because you are doing the same thing over and over, so there should be no surprises, but you never know. Excel can be flakey. It can pop up dialogs that will halt processing. It's especially flakey if you are processing multiple documents simultaneously because Excel was never designed to handle that case.



            As an alternative, you could use a library to produce your Excel, such as EPPlus. I've never used that, so I can't endorse it, but it looks good. There may be others out there too. Remember that it's easy to use .Net libraries from PowerShell.



            HTML



            Your HTML is very nearly XHTML. It looks like just the <br> tags would need to be fixed. If you have control over the thing that produces the HTML, then you could change it to produce XHTML instead, and then you could consume that with Import-XML. I'm guessing that's not the case, but I thought it worth mentioning.



            Error Handling



            You don't have any error handling. If you want your code to be robust you will have to think about what it should do when things go wrong.



            Write-Host



            Write-Host "Formating Excel file"


            If you want to capture these messages in a log, then it would be better to use Write-Output.



            Line Continuation



            $tableRange = $sheet.Range($sheet.Cells($startRow, 1), ` 
            $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))


            I don't believe you need to use the backtick character there.



            -Include



            $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html


            One oddity of PowerShell is that -Filter is much faster than -Include. If the number of folders and files you are searching is small, then it doesn't matter, but if you are searching through lots of them, it can make a huge difference.



            (What is the difference in functionality between the two? None, except that -Include is more general and will work in non-filesystems, but that isn't relevant here.)






            share|improve this answer












            Your code looks fine. Just some notes:



            Excel



            The main weakness of your design as I see it is its reliance on the Excel application. Since you say robustness is important, Excel would be the main weak point. Microsoft says don't use Office as a server. From my own bitter experience, I think they are right to say that.



            It's not so bad in your case because you are doing the same thing over and over, so there should be no surprises, but you never know. Excel can be flakey. It can pop up dialogs that will halt processing. It's especially flakey if you are processing multiple documents simultaneously because Excel was never designed to handle that case.



            As an alternative, you could use a library to produce your Excel, such as EPPlus. I've never used that, so I can't endorse it, but it looks good. There may be others out there too. Remember that it's easy to use .Net libraries from PowerShell.



            HTML



            Your HTML is very nearly XHTML. It looks like just the <br> tags would need to be fixed. If you have control over the thing that produces the HTML, then you could change it to produce XHTML instead, and then you could consume that with Import-XML. I'm guessing that's not the case, but I thought it worth mentioning.



            Error Handling



            You don't have any error handling. If you want your code to be robust you will have to think about what it should do when things go wrong.



            Write-Host



            Write-Host "Formating Excel file"


            If you want to capture these messages in a log, then it would be better to use Write-Output.



            Line Continuation



            $tableRange = $sheet.Range($sheet.Cells($startRow, 1), ` 
            $sheet.Cells($startRow + $numberOfRows - 1, $endColumn))


            I don't believe you need to use the backtick character there.



            -Include



            $htmlFiles = Get-ChildItem -path $Directory -Recurse -Include index.html


            One oddity of PowerShell is that -Filter is much faster than -Include. If the number of folders and files you are searching is small, then it doesn't matter, but if you are searching through lots of them, it can make a huge difference.



            (What is the difference in functionality between the two? None, except that -Include is more general and will work in non-filesystems, but that isn't relevant here.)







            share|improve this answer












            share|improve this answer



            share|improve this answer










            answered Aug 27 '16 at 4:01









            Dangph

            1,523510




            1,523510












            • There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
              – Adamar
              Aug 31 '16 at 0:07






            • 1




              @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
              – Dangph
              Aug 31 '16 at 12:40


















            • There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
              – Adamar
              Aug 31 '16 at 0:07






            • 1




              @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
              – Dangph
              Aug 31 '16 at 12:40
















            There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
            – Adamar
            Aug 31 '16 at 0:07




            There is no Import-Xml. The only close alternative it Import-CliXml, and that doesn't work like you'd expect. Instead you have to do something like this: [xml]$XmlDocument = Get-Content -Path C:Cars.xml after which u can use it like objects. See here.
            – Adamar
            Aug 31 '16 at 0:07




            1




            1




            @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
            – Dangph
            Aug 31 '16 at 12:40




            @Adamar, yes, sorry, that's what I meant. XML is a native datatype in PowerShell. Once you cast to an [xml] object, you can pipe that through Select-Xml and use XPath expressions to extract the data you want. My point was simply that working with XML may be more straightforward than working with HTML.
            – Dangph
            Aug 31 '16 at 12:40












            up vote
            1
            down vote













            There is a much simpler approach. Excel has native HTML conversion capabilities that work fine. Using the Microsoft.Office.Interop.Excel in powershell:



            function SaveAs-Excel2010{
            param([string] $infile, [string] $outfile, [string] $tabname = $null)

            $Excel = New-Object -Com Excel.Application
            $Excel.DisplayAlerts = $false
            $Workbook = $Excel.Workbooks.Open($infile)

            if ($tabname) {
            $Sheet = $Workbook.Worksheets.Item(1)
            $Sheet.Name = $tabname;
            }
            $Workbook.SaveAs($outfile, $xlFixedFormat)
            $Workbook.Close()
            $Excel.quit()
            }

            #sample call
            SaveAs-Excel2010 $htmlFile $xlsxFile $myTabName





            share|improve this answer










            New contributor




            Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
            Check out our Code of Conduct.






















              up vote
              1
              down vote













              There is a much simpler approach. Excel has native HTML conversion capabilities that work fine. Using the Microsoft.Office.Interop.Excel in powershell:



              function SaveAs-Excel2010{
              param([string] $infile, [string] $outfile, [string] $tabname = $null)

              $Excel = New-Object -Com Excel.Application
              $Excel.DisplayAlerts = $false
              $Workbook = $Excel.Workbooks.Open($infile)

              if ($tabname) {
              $Sheet = $Workbook.Worksheets.Item(1)
              $Sheet.Name = $tabname;
              }
              $Workbook.SaveAs($outfile, $xlFixedFormat)
              $Workbook.Close()
              $Excel.quit()
              }

              #sample call
              SaveAs-Excel2010 $htmlFile $xlsxFile $myTabName





              share|improve this answer










              New contributor




              Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
              Check out our Code of Conduct.




















                up vote
                1
                down vote










                up vote
                1
                down vote









                There is a much simpler approach. Excel has native HTML conversion capabilities that work fine. Using the Microsoft.Office.Interop.Excel in powershell:



                function SaveAs-Excel2010{
                param([string] $infile, [string] $outfile, [string] $tabname = $null)

                $Excel = New-Object -Com Excel.Application
                $Excel.DisplayAlerts = $false
                $Workbook = $Excel.Workbooks.Open($infile)

                if ($tabname) {
                $Sheet = $Workbook.Worksheets.Item(1)
                $Sheet.Name = $tabname;
                }
                $Workbook.SaveAs($outfile, $xlFixedFormat)
                $Workbook.Close()
                $Excel.quit()
                }

                #sample call
                SaveAs-Excel2010 $htmlFile $xlsxFile $myTabName





                share|improve this answer










                New contributor




                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                There is a much simpler approach. Excel has native HTML conversion capabilities that work fine. Using the Microsoft.Office.Interop.Excel in powershell:



                function SaveAs-Excel2010{
                param([string] $infile, [string] $outfile, [string] $tabname = $null)

                $Excel = New-Object -Com Excel.Application
                $Excel.DisplayAlerts = $false
                $Workbook = $Excel.Workbooks.Open($infile)

                if ($tabname) {
                $Sheet = $Workbook.Worksheets.Item(1)
                $Sheet.Name = $tabname;
                }
                $Workbook.SaveAs($outfile, $xlFixedFormat)
                $Workbook.Close()
                $Excel.quit()
                }

                #sample call
                SaveAs-Excel2010 $htmlFile $xlsxFile $myTabName






                share|improve this answer










                New contributor




                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                share|improve this answer



                share|improve this answer








                edited Nov 16 at 17:49









                Sᴀᴍ Onᴇᴌᴀ

                7,71061748




                7,71061748






                New contributor




                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                answered Nov 16 at 17:20









                Chris B

                111




                111




                New contributor




                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.





                New contributor





                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






                Chris B is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






























                     

                    draft saved


                    draft discarded



















































                     


                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f139670%2fpowershell-script-to-create-excel-spreadsheet-from-html-file%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Morgemoulin

                    Scott Moir

                    Souastre