xml

param(
    [Parameter(Mandatory=$true)]
    [string]$DirectoryPath,
    
    [Parameter(Mandatory=$false)]
    [string]$OutputCsvPath = "xml_extraction_results.csv",
    
    [Parameter(Mandatory=$false)]
    [switch]$DebugOutput
)

# Check if directory exists
if (-not (Test-Path $DirectoryPath -PathType Container)) {
    Write-Error "Directory not found: $DirectoryPath"
    exit 1
}

# Get all XML files in the directory
$xmlFiles = Get-ChildItem -Path $DirectoryPath -Filter "*.xml" -File

if ($xmlFiles.Count -eq 0) {
    Write-Warning "No XML files found in directory: $DirectoryPath"
    exit 0
}

Write-Host "Found $($xmlFiles.Count) XML files to process..." -ForegroundColor Yellow

# Initialize array to store results
$results = @()

foreach ($file in $xmlFiles) {
    Write-Host "Processing: $($file.Name)" -ForegroundColor Cyan
    
    try {
        # Try multiple methods to load XML
        $xmlContent = $null
        $rawContent = $null
        
        # Method 1: Read raw content first
        $rawContent = Get-Content $file.FullName -Raw -Encoding UTF8
        
        if ($DebugOutput) {
            Write-Host "  Raw content length: $($rawContent.Length)" -ForegroundColor Gray
            Write-Host "  First 200 chars: $($rawContent.Substring(0, [Math]::Min(200, $rawContent.Length)))" -ForegroundColor Gray
        }
        
        if ([string]::IsNullOrWhiteSpace($rawContent)) {
            throw "File appears to be empty or contains only whitespace"
        }
        
        # Method 2: Create XmlDocument object and load from string
        $xmlContent = New-Object System.Xml.XmlDocument
        
        # Handle potential BOM and encoding issues
        $cleanContent = $rawContent.Trim()
        if ($cleanContent.StartsWith("")) {
            $cleanContent = $cleanContent.Substring(3)  # Remove UTF-8 BOM
        }
        
        $xmlContent.LoadXml($cleanContent)
        
        if ($DebugOutput) {
            Write-Host "  XML loaded successfully. Root element: $($xmlContent.DocumentElement.Name)" -ForegroundColor Gray
            Write-Host "  Child nodes count: $($xmlContent.DocumentElement.ChildNodes.Count)" -ForegroundColor Gray
            Write-Host "  XML OuterXml preview: $($xmlContent.OuterXml.Substring(0, [Math]::Min(300, $xmlContent.OuterXml.Length)))" -ForegroundColor Gray
        }
        
        # Initialize variables
        $entityID = $null
        $location = $null
        $shortName = $null
        $errorMessage = $null
        
        # Extract EntityID from EntityDescriptor node
        # Try with and without namespace considerations
        $entityDescriptor = $xmlContent.SelectSingleNode("//EntityDescriptor") 
        if (-not $entityDescriptor) {
            # Try with namespace manager if needed
            $nsManager = New-Object System.Xml.XmlNamespaceManager($xmlContent.NameTable)
            $nsManager.AddNamespace("md", "urn:oasis:names:tc:SAML:2.0:metadata")
            $entityDescriptor = $xmlContent.SelectSingleNode("//md:EntityDescriptor", $nsManager)
        }
        
        if ($entityDescriptor) {
            if ($Verbose) {
                Write-Host "  Found EntityDescriptor node" -ForegroundColor Gray
            }
            if ($entityDescriptor.HasAttribute("entityID")) {
                $entityID = $entityDescriptor.GetAttribute("entityID")
            } elseif ($entityDescriptor.Attributes["entityID"]) {
                $entityID = $entityDescriptor.Attributes["entityID"].Value
            }
        }
        
        # Extract ShortName from entityID
        if ($entityID) {
            try {
                $domain = $null
                if ($entityID.StartsWith("https://")) {
                    # Remove https:// and get the domain
                    $withoutProtocol = $entityID.Substring(8)  # Remove "https://"
                    $firstSlashIndex = $withoutProtocol.IndexOf("/")
                    if ($firstSlashIndex -gt 0) {
                        $domain = $withoutProtocol.Substring(0, $firstSlashIndex)
                    } else {
                        $domain = $withoutProtocol
                    }
                } elseif ($entityID.StartsWith("http://")) {
                    # Handle http:// as well
                    $withoutProtocol = $entityID.Substring(7)  # Remove "http://"
                    $firstSlashIndex = $withoutProtocol.IndexOf("/")
                    if ($firstSlashIndex -gt 0) {
                        $domain = $withoutProtocol.Substring(0, $firstSlashIndex)
                    } else {
                        $domain = $withoutProtocol
                    }
                } else {
                    # If no protocol, try to extract domain-like portion
                    $firstSlashIndex = $entityID.IndexOf("/")
                    if ($firstSlashIndex -gt 0) {
                        $domain = $entityID.Substring(0, $firstSlashIndex)
                    } else {
                        $domain = $entityID
                    }
                }
                
                # Extract just the first part before the first dot
                if ($domain) {
                    $firstDotIndex = $domain.IndexOf(".")
                    if ($firstDotIndex -gt 0) {
                        $shortName = $domain.Substring(0, $firstDotIndex)
                    } else {
                        $shortName = $domain
                    }
                }
                
                if ($DebugOutput) {
                    Write-Host "  Extracted domain: '$domain', ShortName: '$shortName' from EntityID: '$entityID'" -ForegroundColor Gray
                }
            } catch {
                if ($DebugOutput) {
                    Write-Host "  Error extracting ShortName: $($_.Exception.Message)" -ForegroundColor Yellow
                }
                $shortName = $null
            }
        }
        
        # Extract Location from AssertionConsumerService node
        $assertionConsumerService = $xmlContent.SelectSingleNode("//AssertionConsumerService")
        if (-not $assertionConsumerService) {
            # Try with namespace
            $nsManager = New-Object System.Xml.XmlNamespaceManager($xmlContent.NameTable)
            $nsManager.AddNamespace("md", "urn:oasis:names:tc:SAML:2.0:metadata")
            $assertionConsumerService = $xmlContent.SelectSingleNode("//md:AssertionConsumerService", $nsManager)
        }
        
        if ($assertionConsumerService) {
            if ($DebugOutput) {
                Write-Host "  Found AssertionConsumerService node. Attributes: $($assertionConsumerService.Attributes.Count)" -ForegroundColor Gray
                Write-Host "  ACS attributes: $($assertionConsumerService.Attributes | ForEach-Object { "$($_.Name)='$($_.Value)'" } | Join-String -Separator ', ')" -ForegroundColor Gray
            }
            if ($assertionConsumerService.HasAttribute("Location")) {
                $location = $assertionConsumerService.GetAttribute("Location")
            } elseif ($assertionConsumerService.Attributes["Location"]) {
                $location = $assertionConsumerService.Attributes["Location"].Value
            }
        }
        
        # Create result object
        $result = [PSCustomObject]@{
            FileName = $file.Name
            FilePath = $file.FullName
            FileSize = $file.Length
            EntityID = $entityID
            ShortName = $shortName
            AssertionConsumerServiceLocation = $location
            ProcessedSuccessfully = $true
            ErrorMessage = $null
            RootElement = $xmlContent.DocumentElement.Name
        }
        
        $results += $result
        
        # Display progress
        if ($entityID) {
            Write-Host "  ✓ EntityID: $entityID" -ForegroundColor Green
            if ($shortName) {
                Write-Host "  ✓ ShortName: $shortName" -ForegroundColor Green
            }
        } else {
            Write-Host "  ⚠ EntityID not found" -ForegroundColor Yellow
        }
        
        if ($location) {
            Write-Host "  ✓ Location: $location" -ForegroundColor Green
        } else {
            Write-Host "  ⚠ AssertionConsumerService Location not found" -ForegroundColor Yellow
        }
        
    } catch {
        Write-Host "  ✗ Error processing file: $($_.Exception.Message)" -ForegroundColor Red
        
        if ($DebugOutput) {
            Write-Host "  Full error: $($_.Exception.ToString())" -ForegroundColor Red
        }
        
        # Create error result object
        $result = [PSCustomObject]@{
            FileName = $file.Name
            FilePath = $file.FullName
            FileSize = if ($file.Length) { $file.Length } else { 0 }
            EntityID = $null
            ShortName = $null
            AssertionConsumerServiceLocation = $null
            ProcessedSuccessfully = $false
            ErrorMessage = $_.Exception.Message
            RootElement = $null
        }
        
        $results += $result
    }
}

# Export results to CSV
try {
    $results | Export-Csv -Path $OutputCsvPath -NoTypeInformation -Encoding UTF8
    Write-Host "`nResults exported to: $OutputCsvPath" -ForegroundColor Green
    
    # Display summary
    $successCount = ($results | Where-Object { $_.ProcessedSuccessfully }).Count
    $errorCount = ($results | Where-Object { -not $_.ProcessedSuccessfully }).Count
    $foundEntityID = ($results | Where-Object { $_.EntityID -ne $null }).Count
    $foundShortName = ($results | Where-Object { $_.ShortName -ne $null }).Count
    $foundLocation = ($results | Where-Object { $_.AssertionConsumerServiceLocation -ne $null }).Count
    
    Write-Host "`n--- Summary ---" -ForegroundColor Magenta
    Write-Host "Total files processed: $($results.Count)"
    Write-Host "Successfully processed: $successCount"
    Write-Host "Errors encountered: $errorCount"
    Write-Host "Files with EntityID: $foundEntityID"
    Write-Host "Files with ShortName: $foundShortName"
    Write-Host "Files with ACS Location: $foundLocation"
    
    if ($errorCount -gt 0) {
        Write-Host "`nFiles with errors:" -ForegroundColor Red
        $results | Where-Object { -not $_.ProcessedSuccessfully } | ForEach-Object {
            Write-Host "  $($_.FileName): $($_.ErrorMessage)" -ForegroundColor Red
        }
    }
    
} catch {
    Write-Error "Failed to export CSV: $($_.Exception.Message)"
    exit 1
}
Facebook
Twitter
LinkedIn
Book a call