これでかなり遠くまで行けるはずです。完全にテストされていないため、コードを読んで理解し、適切な変更を加えて動作させてください。
function
を削除しました 代わりにすべてのコードをループにインライン化すると、関数は私の好みには大きすぎました。これで、何が起こっているかをより明確に確認できるはずです。
コメントで説明されているように、実質的にまったく同じコードが 2 回ありますが、自己参照を追加する小さな追加ステップがあるため、プライマリ ID とセカンダリ ID を介してすべての製品を同じ方法でクエリできます。
$connectionString = "Data Source=Apps2\Apps2;Initial Catalog=ICECAT;Integrated Security=SSPI"
$batchSize = 50000
# set up [files_index] datatable & read schema from DB
$files_index_table = New-Object System.Data.DataTable
$files_index_adapter = New-Object System.Data.SqlClient.SqlDataAdapter("SELECT * FROM files_index WHERE 0 = 1", $connectionString)
$files_index_adapter.Fill($files_index_table) | Out-Null
$files_index_bcp = New-Object SqlBulkCopy($connectionString)
$files_index_bcp.DestinationTableName = "dbo.files_index"
$files_index_count = 0
# set up [product_ids] datatable & read schema from DB
$product_ids_table = New-Object System.Data.DataTable
$product_ids_adapter = New-Object System.Data.SqlClient.SqlDataAdapter("SELECT * FROM product_ids WHERE 0 = 1", $connectionString)
$product_ids_adapter.Fill($product_ids_table) | Out-Null
$product_ids_bcp = New-Object System.Data.SqlClient.SqlBulkCopy($connectionString)
$product_ids_bcp.DestinationTableName = "dbo.product_ids"
$product_ids_count = 0
# main import loop
$xmlReader = New-Object System.Xml.XmlTextReader("C:\Scripts\icecat\files.index.xml")
while ($xmlReader.Read()) {
# skip any XML nodes that aren't elements
if ($xmlReader.NodeType -ne [System.Xml.XmlNodeType]::Element) { continue }
# handle <file> elements
if ($xmlReader.Name -eq "file") {
$files_index_count++
# remember current product ID, we'll need it when we hit the next <M_Prod_ID> element
$curr_product_id = $xmlReader.GetAttribute("Product_ID")
$is_new_file = $true
$newRow = $files_index_table.NewRow()
$newRow["Product_ID"] = $xmlReader.GetAttribute("Product_ID")
$newRow["path"] = $xmlReader.GetAttribute("path")
$newRow["Updated"] = $xmlReader.GetAttribute("Updated")
$newRow["Quality"] = $xmlReader.GetAttribute("Quality")
$newRow["Supplier_id"] = $xmlReader.GetAttribute("Supplier_id")
$newRow["Prod_ID"] = $xmlReader.GetAttribute("Prod_ID")
$newRow["Catid"] = $xmlReader.GetAttribute("Catid")
$newRow["On_Market"] = $xmlReader.GetAttribute("On_Market")
$newRow["Model_Name"] = $xmlReader.GetAttribute("Model_Name")
$newRow["Product_View"] = $xmlReader.GetAttribute("Product_View")
$newRow["HighPic"] = $xmlReader.GetAttribute("HighPic")
$newRow["HighPicSize"] = $xmlReader.GetAttribute("HighPicSize")
$newRow["HighPicWifiles_index_tableh"] = $xmlReader.GetAttribute("HighPicWifiles_index_tableh")
$newRow["HighPicHeight"] = $xmlReader.GetAttribute("HighPicHeight")
$newRow["Date_Added"] = $xmlReader.GetAttribute("Date_Added")
$files_index_table.Rows.Add($newRow) | Out-Null
if ($files_index_table.Rows.Count -eq $batchSize) {
$files_index_bcp.WriteToServer($files_index_table)
$files_index_table.Rows.Clear()
Write-Host "$files_index_count <file> elements processed so far"
}
# handle <M_Prod_ID> elements
} elseif ($xmlReader.Name -eq "M_Prod_ID") {
$product_ids_count++
# add self-reference row to the [product_ids] table
# only for the first <M_Prod_ID> per <file> we need to do this
if ($is_new_file) {
$newRow = $product_ids_table.NewRow()
$newRow["Product_ID"] = $curr_product_id # from above
$newRow["Alternative_ID"] = $curr_product_id
$product_ids_table.Rows.Add($newRow) | Out-Null
$is_new_file = $false
}
$newRow = $product_ids_table.NewRow()
$newRow["Product_ID"] = $curr_product_id # from above
$newRow["Alternative_ID"] = $xmlReader.Value
$product_ids_table.Rows.Add($newRow) | Out-Null
if ($product_ids_table.Rows.Count -eq $batchSize) {
$product_ids_bcp.WriteToServer($files_index_table)
$product_ids_table.Rows.Clear()
Write-Host "$product_ids_count <M_Prod_ID> elements processed so far"
}
}
}
# write any remaining rows to the server
if ($files_index_table.Rows.Count -gt 0) {
$files_index_bcp.WriteToServer($files_index_table)
$files_index_table.Rows.Clear()
}
Write-Host "$files_index_count <file> elements processed overall"
if ($product_ids_table.Rows.Count -gt 0) {
$product_ids_bcp.WriteToServer($product_ids_table)
$product_ids_table.Rows.Clear()
}
Write-Host "$product_ids_count <M_Prod_ID> elements processed overall"