I am using the following script to insert a large XML file (3.5Gb) into my local SQL Server table 'files_index
using powershell as the file is over the 2 GB limit for SQL.
Table structure is as follows and PowerShell script is after. The file contains around 5000000 rows but takes a long time to insert, whilst this works and does the job im looking for any way to speed it up (It currently takes around 15 mins).
Any suggestions for speeding up the process, I have tried plahing with the batch size but it doesn't seem to make much difference and I got this powershell script here at stackoverflow a while ago but I am just trying to streamline the process. Thanks for any assistance or suggestions.
CREATE TABLE [dbo].[files_index]
(
[Product_ID] [int] NOT NULL,
[path] [varchar](100) NULL,
[Updated] [varchar](50) NULL,
[Quality] [varchar](50) NULL,
[Supplier_id] [int] NULL,
[Prod_ID] [varchar](100) NULL,
[Catid] [int] NULL,
[On_Market] [int] NULL,
[Model_Name] [varchar](250) NULL,
[Product_View] [varchar](250) NULL,
[HighPic] [varchar](250) NULL,
[HighPicSize] [int] NULL,
[HighPicWidth] [int] NULL,
[HighPicHeight] [int] NULL,
[Date_Added] [varchar](150) NULL,
CONSTRAINT [PK_files_index]
PRIMARY KEY CLUSTERED ([Product_ID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,
IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON,
ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
PowerShell script:
Set-ExecutionPolicy Unrestricted -scope LocalMachine
[String]$global:connectionString = "Data Source=Apps2\Apps2;Initial
Catalog=DTEDATA;Integrated Security=SSPI";
[System.Data.DataTable]$global:dt = New-Object System.Data.DataTable;
[System.Xml.XmlTextReader]$global:xmlReader = New-Object
System.Xml.XmlTextReader("C:\Scripts\icecat\files.index.xml");
[Int32]$global:batchSize = 100000;
Function Add-FileRow() {
$newRow = $dt.NewRow();
$null = $dt.Rows.Add($newRow);
$newRow["Product_ID"] = $global:xmlReader.GetAttribute("Product_ID");
$newRow["path"] = $global:xmlReader.GetAttribute("path");
$newRow["Updated"] = $global:xmlReader.GetAttribute("Updated");
$newRow["Quality"] = $global:xmlReader.GetAttribute("Quality");
$newRow["Supplier_id"] = $global:xmlReader.GetAttribute("Supplier_id");
$newRow["Prod_ID"] = $global:xmlReader.GetAttribute("Prod_ID");
$newRow["Catid"] = $global:xmlReader.GetAttribute("Catid");
$newRow["On_Market"] = $global:xmlReader.GetAttribute("On_Market");
$newRow["Model_Name"] = $global:xmlReader.GetAttribute("Model_Name");
$newRow["Product_View"] = $global:xmlReader.GetAttribute("Product_View");
$newRow["HighPic"] = $global:xmlReader.GetAttribute("HighPic");
$newRow["HighPicSize"] = $global:xmlReader.GetAttribute("HighPicSize");
$newRow["HighPicWidth"] = $global:xmlReader.GetAttribute("HighPicWidth");
$newRow["HighPicHeight"] = $global:xmlReader.GetAttribute("HighPicHeight");
$newRow["Date_Added"] = $global:xmlReader.GetAttribute("Date_Added");
}
# init data table schema
$da = New-Object System.Data.SqlClient.SqlDataAdapter("SELECT * FROM
files_index WHERE 0 = 1", $global:connectionString);
$null = $da.Fill($global:dt);
$bcp = New-Object System.Data.SqlClient.SqlBulkCopy($global:connectionString);
$bcp.DestinationTableName = "dbo.files_index";
$recordCount = 0;
while($xmlReader.Read() -eq $true)
{
if(($xmlReader.NodeType -eq [System.Xml.XmlNodeType]::Element) -and
($xmlReader.Name -eq "file"))
{
Add-FileRow -xmlReader $xmlReader;
$recordCount += 1;
if(($recordCount % $global:batchSize) -eq 0)
{
$bcp.WriteToServer($dt);
$dt.Rows.Clear();
Write-Host "$recordCount file elements processed so far";
}
}
}
if($dt.Rows.Count -gt 0)
{
$bcp.WriteToServer($dt);
}
$bcp.Close();
$xmlReader.Close();
Write-Host "$recordCount file elements imported ";
catch
{
throw;
}