param(
[Parameter(Mandatory=$true)]
[string]$DirectoryPath,
[Parameter(Mandatory=$false)]
[string]$OutputCsvPath = "xml_extraction_results.csv",
[Parameter(Mandatory=$false)]
[switch]$DebugOutput
)
# Check if directory exists
if (-not (Test-Path $DirectoryPath -PathType Container)) {
Write-Error "Directory not found: $DirectoryPath"
exit 1
}
# Get all XML files in the directory
$xmlFiles = Get-ChildItem -Path $DirectoryPath -Filter "*.xml" -File
if ($xmlFiles.Count -eq 0) {
Write-Warning "No XML files found in directory: $DirectoryPath"
exit 0
}
Write-Host "Found $($xmlFiles.Count) XML files to process..." -ForegroundColor Yellow
# Initialize array to store results
$results = @()
foreach ($file in $xmlFiles) {
Write-Host "Processing: $($file.Name)" -ForegroundColor Cyan
try {
# Try multiple methods to load XML
$xmlContent = $null
$rawContent = $null
# Method 1: Read raw content first
$rawContent = Get-Content $file.FullName -Raw -Encoding UTF8
if ($DebugOutput) {
Write-Host " Raw content length: $($rawContent.Length)" -ForegroundColor Gray
Write-Host " First 200 chars: $($rawContent.Substring(0, [Math]::Min(200, $rawContent.Length)))" -ForegroundColor Gray
}
if ([string]::IsNullOrWhiteSpace($rawContent)) {
throw "File appears to be empty or contains only whitespace"
}
# Method 2: Create XmlDocument object and load from string
$xmlContent = New-Object System.Xml.XmlDocument
# Handle potential BOM and encoding issues
$cleanContent = $rawContent.Trim()
if ($cleanContent.StartsWith("")) {
$cleanContent = $cleanContent.Substring(3) # Remove UTF-8 BOM
}
$xmlContent.LoadXml($cleanContent)
if ($DebugOutput) {
Write-Host " XML loaded successfully. Root element: $($xmlContent.DocumentElement.Name)" -ForegroundColor Gray
Write-Host " Child nodes count: $($xmlContent.DocumentElement.ChildNodes.Count)" -ForegroundColor Gray
Write-Host " XML OuterXml preview: $($xmlContent.OuterXml.Substring(0, [Math]::Min(300, $xmlContent.OuterXml.Length)))" -ForegroundColor Gray
}
# Initialize variables
$entityID = $null
$location = $null
$shortName = $null
$errorMessage = $null
# Extract EntityID from EntityDescriptor node
# Try with and without namespace considerations
$entityDescriptor = $xmlContent.SelectSingleNode("//EntityDescriptor")
if (-not $entityDescriptor) {
# Try with namespace manager if needed
$nsManager = New-Object System.Xml.XmlNamespaceManager($xmlContent.NameTable)
$nsManager.AddNamespace("md", "urn:oasis:names:tc:SAML:2.0:metadata")
$entityDescriptor = $xmlContent.SelectSingleNode("//md:EntityDescriptor", $nsManager)
}
if ($entityDescriptor) {
if ($Verbose) {
Write-Host " Found EntityDescriptor node" -ForegroundColor Gray
}
if ($entityDescriptor.HasAttribute("entityID")) {
$entityID = $entityDescriptor.GetAttribute("entityID")
} elseif ($entityDescriptor.Attributes["entityID"]) {
$entityID = $entityDescriptor.Attributes["entityID"].Value
}
}
# Extract ShortName from entityID
if ($entityID) {
try {
$domain = $null
if ($entityID.StartsWith("https://")) {
# Remove https:// and get the domain
$withoutProtocol = $entityID.Substring(8) # Remove "https://"
$firstSlashIndex = $withoutProtocol.IndexOf("/")
if ($firstSlashIndex -gt 0) {
$domain = $withoutProtocol.Substring(0, $firstSlashIndex)
} else {
$domain = $withoutProtocol
}
} elseif ($entityID.StartsWith("http://")) {
# Handle http:// as well
$withoutProtocol = $entityID.Substring(7) # Remove "http://"
$firstSlashIndex = $withoutProtocol.IndexOf("/")
if ($firstSlashIndex -gt 0) {
$domain = $withoutProtocol.Substring(0, $firstSlashIndex)
} else {
$domain = $withoutProtocol
}
} else {
# If no protocol, try to extract domain-like portion
$firstSlashIndex = $entityID.IndexOf("/")
if ($firstSlashIndex -gt 0) {
$domain = $entityID.Substring(0, $firstSlashIndex)
} else {
$domain = $entityID
}
}
# Extract just the first part before the first dot
if ($domain) {
$firstDotIndex = $domain.IndexOf(".")
if ($firstDotIndex -gt 0) {
$shortName = $domain.Substring(0, $firstDotIndex)
} else {
$shortName = $domain
}
}
if ($DebugOutput) {
Write-Host " Extracted domain: '$domain', ShortName: '$shortName' from EntityID: '$entityID'" -ForegroundColor Gray
}
} catch {
if ($DebugOutput) {
Write-Host " Error extracting ShortName: $($_.Exception.Message)" -ForegroundColor Yellow
}
$shortName = $null
}
}
# Extract Location from AssertionConsumerService node
$assertionConsumerService = $xmlContent.SelectSingleNode("//AssertionConsumerService")
if (-not $assertionConsumerService) {
# Try with namespace
$nsManager = New-Object System.Xml.XmlNamespaceManager($xmlContent.NameTable)
$nsManager.AddNamespace("md", "urn:oasis:names:tc:SAML:2.0:metadata")
$assertionConsumerService = $xmlContent.SelectSingleNode("//md:AssertionConsumerService", $nsManager)
}
if ($assertionConsumerService) {
if ($DebugOutput) {
Write-Host " Found AssertionConsumerService node. Attributes: $($assertionConsumerService.Attributes.Count)" -ForegroundColor Gray
Write-Host " ACS attributes: $($assertionConsumerService.Attributes | ForEach-Object { "$($_.Name)='$($_.Value)'" } | Join-String -Separator ', ')" -ForegroundColor Gray
}
if ($assertionConsumerService.HasAttribute("Location")) {
$location = $assertionConsumerService.GetAttribute("Location")
} elseif ($assertionConsumerService.Attributes["Location"]) {
$location = $assertionConsumerService.Attributes["Location"].Value
}
}
# Create result object
$result = [PSCustomObject]@{
FileName = $file.Name
FilePath = $file.FullName
FileSize = $file.Length
EntityID = $entityID
ShortName = $shortName
AssertionConsumerServiceLocation = $location
ProcessedSuccessfully = $true
ErrorMessage = $null
RootElement = $xmlContent.DocumentElement.Name
}
$results += $result
# Display progress
if ($entityID) {
Write-Host " ✓ EntityID: $entityID" -ForegroundColor Green
if ($shortName) {
Write-Host " ✓ ShortName: $shortName" -ForegroundColor Green
}
} else {
Write-Host " ⚠ EntityID not found" -ForegroundColor Yellow
}
if ($location) {
Write-Host " ✓ Location: $location" -ForegroundColor Green
} else {
Write-Host " ⚠ AssertionConsumerService Location not found" -ForegroundColor Yellow
}
} catch {
Write-Host " ✗ Error processing file: $($_.Exception.Message)" -ForegroundColor Red
if ($DebugOutput) {
Write-Host " Full error: $($_.Exception.ToString())" -ForegroundColor Red
}
# Create error result object
$result = [PSCustomObject]@{
FileName = $file.Name
FilePath = $file.FullName
FileSize = if ($file.Length) { $file.Length } else { 0 }
EntityID = $null
ShortName = $null
AssertionConsumerServiceLocation = $null
ProcessedSuccessfully = $false
ErrorMessage = $_.Exception.Message
RootElement = $null
}
$results += $result
}
}
# Export results to CSV
try {
$results | Export-Csv -Path $OutputCsvPath -NoTypeInformation -Encoding UTF8
Write-Host "`nResults exported to: $OutputCsvPath" -ForegroundColor Green
# Display summary
$successCount = ($results | Where-Object { $_.ProcessedSuccessfully }).Count
$errorCount = ($results | Where-Object { -not $_.ProcessedSuccessfully }).Count
$foundEntityID = ($results | Where-Object { $_.EntityID -ne $null }).Count
$foundShortName = ($results | Where-Object { $_.ShortName -ne $null }).Count
$foundLocation = ($results | Where-Object { $_.AssertionConsumerServiceLocation -ne $null }).Count
Write-Host "`n--- Summary ---" -ForegroundColor Magenta
Write-Host "Total files processed: $($results.Count)"
Write-Host "Successfully processed: $successCount"
Write-Host "Errors encountered: $errorCount"
Write-Host "Files with EntityID: $foundEntityID"
Write-Host "Files with ShortName: $foundShortName"
Write-Host "Files with ACS Location: $foundLocation"
if ($errorCount -gt 0) {
Write-Host "`nFiles with errors:" -ForegroundColor Red
$results | Where-Object { -not $_.ProcessedSuccessfully } | ForEach-Object {
Write-Host " $($_.FileName): $($_.ErrorMessage)" -ForegroundColor Red
}
}
} catch {
Write-Error "Failed to export CSV: $($_.Exception.Message)"
exit 1
}