From 17ed0617914c3884d17c7203e15e9baf3287365f Mon Sep 17 00:00:00 2001 From: ztrhgf Date: Thu, 12 Feb 2026 13:47:52 +0100 Subject: [PATCH 1/3] issue 112 fixes (rework how long JSON paths are managed) --- src/EntraExporter.psd1 | 2 + src/Export-Entra.ps1 | 5 +- src/command/Get-AzurePIMDirectoryRoles.ps1 | 5 +- src/command/Get-AzurePIMGroups.ps1 | 5 +- src/command/Get-AzurePIMResources.ps1 | 20 ++++---- .../Get-AzureResourceAccessPolicies.ps1 | 13 +++-- src/command/Get-AzureResourceIAMData.ps1 | 47 ++++++++----------- src/internal/Invoke-FilePathCheck.ps1 | 11 +++++ ...ibilityScheduleRequestIdSimplification.ps1 | 19 ++++++++ 9 files changed, 72 insertions(+), 55 deletions(-) create mode 100644 src/internal/Invoke-FilePathCheck.ps1 create mode 100644 src/internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1 diff --git a/src/EntraExporter.psd1 b/src/EntraExporter.psd1 index 38203a3..32eb7d2 100644 --- a/src/EntraExporter.psd1 +++ b/src/EntraExporter.psd1 @@ -73,6 +73,8 @@ 'internal\Search-AzGraph2.ps1' 'internal\Get-MgGraphAllPages.ps1' 'internal\Get-AzureDirectoryObject.ps1' + 'internal\Invoke-FilePathCheck.ps1' + 'internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1' 'command\Get-AccessPackageAssignmentPolicies.ps1' 'command\Get-AccessPackageAssignments.ps1' 'command\Get-AccessPackageResourceScopes.ps1' diff --git a/src/Export-Entra.ps1 b/src/Export-Entra.ps1 index 8d4875e..d6c0ace 100644 --- a/src/Export-Entra.ps1 +++ b/src/Export-Entra.ps1 @@ -523,9 +523,8 @@ $outputFileName = Join-Path (Join-Path -Path $outputFileName -ChildPath $itemId) -ChildPath "$itemId.json" } - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } $item | Select-Object * -ExcludeProperty RequestId | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) diff --git a/src/command/Get-AzurePIMDirectoryRoles.ps1 b/src/command/Get-AzurePIMDirectoryRoles.ps1 index 5ceaa05..324e1d7 100644 --- a/src/command/Get-AzurePIMDirectoryRoles.ps1 +++ b/src/command/Get-AzurePIMDirectoryRoles.ps1 @@ -229,9 +229,8 @@ $outputFileName = Join-Path -Path $rootFolder -ChildPath "$itemId.json" - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } $item | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) diff --git a/src/command/Get-AzurePIMGroups.ps1 b/src/command/Get-AzurePIMGroups.ps1 index f50fc9c..2a6f326 100644 --- a/src/command/Get-AzurePIMGroups.ps1 +++ b/src/command/Get-AzurePIMGroups.ps1 @@ -129,9 +129,8 @@ $outputFileName = Join-Path -Path $rootFolder -ChildPath "$itemId.json" - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } # Hide warning for depth when converting to JSON diff --git a/src/command/Get-AzurePIMResources.ps1 b/src/command/Get-AzurePIMResources.ps1 index f18e9e8..e2b4e92 100644 --- a/src/command/Get-AzurePIMResources.ps1 +++ b/src/command/Get-AzurePIMResources.ps1 @@ -269,35 +269,31 @@ } #endregion functions - $joinChar = "&" - Get-PIMManagementGroupEligibleAssignment | % { $item = $_ - $itemId = $item.roleEligibilityScheduleRequestId -replace "/", $joinChar + $itemId = Invoke-RoleEligibilityScheduleRequestIdSimplification -id $item.roleEligibilityScheduleRequestId $outputFileName = Join-Path -Path (Join-Path -Path $rootFolder -ChildPath "ManagementGroups") -ChildPath "$itemId.json" - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } - $item | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) } Get-PIMSubscriptionEligibleAssignment | ? { $_ } | % { $item = $_ - $itemId = $item.roleEligibilityScheduleRequestId -replace "/", $joinChar + $itemId = Invoke-RoleEligibilityScheduleRequestIdSimplification -id $item.roleEligibilityScheduleRequestId $outputFileName = Join-Path -Path (Join-Path -Path $rootFolder -ChildPath "Subscriptions") -ChildPath "$itemId.json" - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } - $item | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) } } \ No newline at end of file diff --git a/src/command/Get-AzureResourceAccessPolicies.ps1 b/src/command/Get-AzureResourceAccessPolicies.ps1 index 6b875a3..7c5397f 100644 --- a/src/command/Get-AzureResourceAccessPolicies.ps1 +++ b/src/command/Get-AzureResourceAccessPolicies.ps1 @@ -53,17 +53,16 @@ Search-AzGraph2 -query $query } - $joinChar = "&" + $joinChar = [System.IO.Path]::DirectorySeparatorChar Get-AzureResourceAccessPolicy | % { $result = $_ - $scopeId = $result.subscriptionId - $id = $result.id -replace "/", $joinChar + $id = $result.id + $id = $id -replace "/subscriptions/", "" + $id = $id -replace "/", $joinChar - $outputPath = Join-Path -Path (Join-Path -Path $rootFolder -ChildPath "Subscriptions") -ChildPath $scopeId + $outputFileName = Join-Path -Path $rootFolder -ChildPath "$id.json" - $outputFileName = Join-Path -Path $outputPath -ChildPath "$id.json" - - $result | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $result | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) } } \ No newline at end of file diff --git a/src/command/Get-AzureResourceIAMData.ps1 b/src/command/Get-AzureResourceIAMData.ps1 index 5c605fb..2745c61 100644 --- a/src/command/Get-AzureResourceIAMData.ps1 +++ b/src/command/Get-AzureResourceIAMData.ps1 @@ -72,7 +72,7 @@ authorizationresources $kqlResult = Search-AzGraph2 -query $query # there can be duplicates with different createdOn/updatedOn, keep just the latest one - $kqlResult = $kqlResult | Group-Object -Property ($property | ? {$_ -notin "createdOn", "updatedOn"}) | % {if ($_.count -eq 1) {$_.group} else {$_.group | sort updatedOn | select -First 1}} + $kqlResult = $kqlResult | Group-Object -Property ($property | ? {$_ -notin "createdOn", "updatedOn"}) | % {if ($_.count -eq 1) {$_.group} else {$_.group | Sort-Object updatedOn | select -First 1}} if (!$kqlResult) { return } #endregion run the query @@ -80,43 +80,36 @@ authorizationresources # get the principal name from its id $idToNameList = Get-AzureDirectoryObject -id ($kqlResult.principalId | select -Unique) - $joinChar = "&" # output the final results $kqlResult | select @{n = 'PrincipalName'; e = { $id = $_.PrincipalId; $result = $idToNameList | ? Id -EQ $id; if ($result.DisplayName) { $result.DisplayName } else { $result.mailNickname } } }, PrincipalId, PrincipalType, RoleDefinitionName, RoleDefinitionId, Scope, @{ n = 'ScopeType'; e = { _scopeType $_.scope } }, ManagementGroupId, SubscriptionId, SubscriptionName, ResourceGroup, CreatedOn, UpdatedOn | % { $item = $_ - switch ($item.scopeType) { - 'root' { - $outputPath = Join-Path -Path $assignmentsFolder -ChildPath "Root" - } - 'managementGroup' { - $outputPath = Join-Path -Path (Join-Path -Path $assignmentsFolder -ChildPath "ManagementGroups") -ChildPath $item.ManagementGroupId - } - 'subscription' { - $outputPath = Join-Path -Path (Join-Path -Path $assignmentsFolder -ChildPath "Subscriptions") -ChildPath $item.SubscriptionId - } - 'resourceGroup' { - $outputPath = Join-Path -Path (Join-Path -Path (Join-Path -Path $assignmentsFolder -ChildPath "Subscriptions") -ChildPath $item.SubscriptionId) -ChildPath $item.ResourceGroup - } - 'resource' { - # $folder = ($item.Scope.Split("/")[-3..-1] -join $joinChar) - $folder = $item.Scope -replace "/", $joinChar - $outputPath = Join-Path -Path (Join-Path -Path (Join-Path -Path (Join-Path -Path $assignmentsFolder -ChildPath "Subscriptions") -ChildPath $item.SubscriptionId) -ChildPath $item.ResourceGroup) -ChildPath $folder - } - default { - Write-Warning "Undefined scope type $($item.scopeType)" - return - } + if ($item.scopeType -eq 'root') + { + $outputPath = Join-Path -Path $assignmentsFolder -ChildPath "root" + } + else + { + $joinChar = [System.IO.Path]::DirectorySeparatorChar + + # simplify the scope to create more readable file names and avoid too long path issues + $folder = $item.Scope + $folder = $folder -replace "/providers/Microsoft.Management/", "" + + # replace remaining "/" with directory separator char to create folder structure based on the scope + $folder = $folder -replace "/", $joinChar + + $outputPath = Join-Path -Path $assignmentsFolder -ChildPath $folder } + $joinChar = "&" $itemId = $item.principalId + $joinChar + ($item.roleDefinitionId).split("/")[-1] $outputFileName = Join-Path -Path $outputPath -ChildPath "$itemId.json" - if ($outputFileName.Length -gt 255 -and (Get-ItemPropertyValue HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1) { - Write-Warning "Output file path '$outputFileName' is longer than 255 characters. Enable long path support to continue!" - return + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue } if (Test-Path $outputFileName -ErrorAction SilentlyContinue) { diff --git a/src/internal/Invoke-FilePathCheck.ps1 b/src/internal/Invoke-FilePathCheck.ps1 new file mode 100644 index 0000000..699746f --- /dev/null +++ b/src/internal/Invoke-FilePathCheck.ps1 @@ -0,0 +1,11 @@ +function Invoke-FilePathCheck { + [CmdletBinding()] + param ( + [Parameter(Mandatory = $true)] + [string] $FilePath + ) + + if ($env:OS -eq "Windows_NT" -and $FilePath.Length -gt 255 -and (Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1){ + throw "Output file path '$FilePath' is longer than 255 characters. Enable long path support to continue!" + } +} \ No newline at end of file diff --git a/src/internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1 b/src/internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1 new file mode 100644 index 0000000..020bdd9 --- /dev/null +++ b/src/internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1 @@ -0,0 +1,19 @@ +function Invoke-RoleEligibilityScheduleRequestIdSimplification { + [CmdletBinding()] + param ( + [Parameter(Mandatory = $true)] + [string] $id + ) + + $joinChar = [System.IO.Path]::DirectorySeparatorChar + + # simplify the id to create more readable file names and avoid too long path issues + $id = $id -replace "/subscriptions/", "" + $id = $id -replace "/providers/Microsoft.Management/managementGroups/", "" + + $id = $id -replace "/providers/Microsoft.Authorization/roleEligibilityScheduleRequests", "" + # replace remaining "/" with directory separator char to create folder structure based on scope and assignment id + $id = $id -replace "/", $joinChar + + $id +} \ No newline at end of file From 4fbcfbdfe1756b34e438b6d15be0a5526d96aa04 Mon Sep 17 00:00:00 2001 From: ztrhgf Date: Fri, 10 Apr 2026 16:46:11 +0200 Subject: [PATCH 2/3] process schema item & children one by one and flush to disk immediately optimization for larger tenants --- src/Export-Entra.ps1 | 255 +++++++++++++------------- src/internal/Invoke-FilePathCheck.ps1 | 5 +- 2 files changed, 128 insertions(+), 132 deletions(-) diff --git a/src/Export-Entra.ps1 b/src/Export-Entra.ps1 index d6c0ace..3779828 100644 --- a/src/Export-Entra.ps1 +++ b/src/Export-Entra.ps1 @@ -221,12 +221,80 @@ } } + function _flushResultsToDisk { + <# + Writes each result item to disk immediately and records the item's Id in the + parent-ID index so that it can be used for children lookups later. + This keeps memory usage low: full objects are discarded after writing. + #> + param( + [array]$batchResults, + [hashtable]$parentIdIndex + ) + + foreach ($item in $batchResults) { + # --- populate parent-id index (only RequestId + Id needed afterwards) --- + $normalizedId = _normalizeRequestId $item.RequestId + if (!$parentIdIndex.ContainsKey($normalizedId)) { + $parentIdIndex[$normalizedId] = [System.Collections.Generic.List[string]]::new() + } + $indexId = Get-ObjectProperty $item 'Id' + if ($indexId) { + if (!$parentIdIndex[$normalizedId].Contains($indexId)) { + $parentIdIndex[$normalizedId].Add($indexId) + } + } + + # --- write item to disk --- + if (!(Get-ObjectProperty $item 'Id')) { + <# + In some special cases it can happen that 'id' property is missing like: + + isEnabled : True + notifyReviewers : True + remindersEnabled : False + requestDurationInDays : 14 + version : 0 + reviewers : {...} + RequestId : C:/temp/bkp3/Policies/AdminConsentRequestPolicy + #> + + $itemId = ($item.RequestId -split "/")[-1] + # remove the random number added to avoid duplicated ids in batch requests + $itemId = _normalizeRequestId $itemId + + Write-Verbose ($item | convertto-json -WarningAction SilentlyContinue) + Write-Verbose "Result without 'id' property, using '$itemId' instead (RequestId '$($item.RequestId)')!" + } else { + $itemId = $item.id + } + + if (!$item.RequestId) { + $item + Write-Warning "Item without RequestId. Shouldn't happen!" + } + + $outputFileName = $item.RequestId -replace "/", "\" + # remove the random number added to avoid duplicated ids in batch requests + $outputFileName = _normalizeRequestId $outputFileName + + if ($outputFileName -notmatch "\.json$") { + $outputFileName = Join-Path (Join-Path -Path $outputFileName -ChildPath $itemId) -ChildPath "$itemId.json" + } + + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { + continue + } + + $item | Select-Object * -ExcludeProperty RequestId | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + } + } + function _processChildrenRecursive { param( [array]$schemaItems, [string]$basePath, [array]$parentIds, - [ref]$results, [ref]$batchRequestStableApi, [ref]$batchRequestBetaApi ) @@ -318,7 +386,7 @@ param( [ref]$batchRequestStableApi, [ref]$batchRequestBetaApi, - [ref]$results, + [ref]$parentIdIndex, [array]$requestedExportSchema ) @@ -328,7 +396,8 @@ $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestStableApi.Value -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue if ($batchResults) { - $results.Value.AddRange(@($batchResults)) + # flush to disk immediately and record IDs for parent-child lookups + _flushResultsToDisk -batchResults @($batchResults) -parentIdIndex $parentIdIndex.Value } _processBatchErrors -requestErrors $requestErrors -requestedExportSchema $requestedExportSchema @@ -341,7 +410,8 @@ $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestBetaApi.Value -graphVersion beta -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue if ($batchResults) { - $results.Value.AddRange(@($batchResults)) + # flush to disk immediately and record IDs for parent-child lookups + _flushResultsToDisk -batchResults @($batchResults) -parentIdIndex $parentIdIndex.Value } _processBatchErrors -requestErrors $requestErrors -requestedExportSchema $requestedExportSchema @@ -350,16 +420,20 @@ } #endregion helper functions - #region process all schema items recursively - $results = [System.Collections.Generic.List[Object]]::new() + #region process all schema items one by one + # Each root schema item is fully processed (request -> flush to disk -> all children at all + # depths) before moving to the next one. $parentIdIndex and $script:childrenToProcess are + # reset per root item so lookups stay scoped to the current item's result set. $batchRequestStableApi = [System.Collections.Generic.List[Object]]::new() - $batchRequestBetaApi = [System.Collections.Generic.List[Object]]::new() - $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() + $batchRequestBetaApi = [System.Collections.Generic.List[Object]]::new() $requestedExportSchema = $ExportSchema | ? { Compare-Object $_.Tag $Type -ExcludeDifferent -IncludeEqual } - # process root level items foreach ($item in $requestedExportSchema) { + # reset per-item state so child lookups don't match results from a previous schema item + $parentIdIndex = @{} + $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() + $outputFileName = Join-Path -Path $Path -ChildPath $item.Path Write-Host "➡️ $($item.GraphUri)" @@ -369,43 +443,24 @@ continue } - $command = Get-ObjectProperty $item 'Command' - $graphUri = Get-ObjectProperty $item 'GraphUri' + $command = Get-ObjectProperty $item 'Command' + $graphUri = Get-ObjectProperty $item 'GraphUri' $apiVersion = Get-ObjectProperty $item 'ApiVersion' - $ignoreError = Get-ObjectProperty $item 'IgnoreError' - $children = Get-ObjectProperty $item 'Children' + $children = Get-ObjectProperty $item 'Children' if (!$apiVersion) { $apiVersion = 'v1.0' } - if($command) { + if ($command) { $commandParams = @{} switch ($command) { - 'Get-AzureResourceIAMData' { - $commandParams.RootFolder = $outputFileName - } - - 'Get-AzurePIMDirectoryRoles' { - $commandParams.RootFolder = $outputFileName - } - - 'Get-AzurePIMResources' { - $commandParams.RootFolder = $outputFileName - } - - 'Get-AzurePIMGroups' { - $commandParams.RootFolder = $outputFileName - } - - 'Get-AzureResourceAccessPolicies' { - $commandParams.RootFolder = $outputFileName - } - - default { - Write-Warning "Unknown command '$command'" - } + 'Get-AzureResourceIAMData' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMDirectoryRoles' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMResources' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMGroups' { $commandParams.RootFolder = $outputFileName } + 'Get-AzureResourceAccessPolicies' { $commandParams.RootFolder = $outputFileName } + default { Write-Warning "Unknown command '$command'" } } - # invoke the command with splatting & $command @commandParams } else { @@ -421,113 +476,51 @@ $request = New-GraphBatchRequest -Url $uri -Id $id -header @{ ConsistencyLevel = 'eventual' } - if ($apiVersion -eq 'beta') { - $batchRequestBetaApi.Add($request) - } - else { - $batchRequestStableApi.Add($request) - } + if ($apiVersion -eq 'beta') { $batchRequestBetaApi.Add($request) } + else { $batchRequestStableApi.Add($request) } + + # execute and flush this item's results to disk before processing its children + _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -parentIdIndex ([ref]$parentIdIndex) -requestedExportSchema @($item) } - # track children for later processing + # queue children for this item if ($children) { $script:childrenToProcess.Add(@{ - Children = $children - BasePath = Join-Path -Path $Path -ChildPath $item.Path + Children = $children + BasePath = Join-Path -Path $Path -ChildPath $item.Path ParentPath = $item.Path }) } - } - - # execute root level batch requests - _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -results ([ref]$results) -requestedExportSchema $requestedExportSchema - # process children recursively - while ($script:childrenToProcess.Count -gt 0) { - $currentBatch = $script:childrenToProcess - $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() + # drain all children (and grandchildren) for this item before moving to the next root item + while ($script:childrenToProcess.Count -gt 0) { + $currentBatch = $script:childrenToProcess + $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() - foreach ($childGroup in $currentBatch) { - Write-Verbose "Looking for results for parent with path '$($childGroup.ParentPath)'" + foreach ($childGroup in $currentBatch) { + Write-Verbose "Looking for results for parent with path '$($childGroup.ParentPath)'" - $parentResult = $results | Where-Object { - $normalizedRequestId = _normalizeRequestId $_.RequestId - $normalizedRequestId -eq ($childGroup.BasePath -replace "\\", "/") -or - $normalizedRequestId -like ("$($childGroup.ParentPath)*" -replace "\\", "/") - } + $basePathNormalized = $childGroup.BasePath -replace "\\", "/" + $parentPathPattern = "$($childGroup.ParentPath)*" -replace "\\", "/" - if (!$parentResult) { - Write-Verbose "Parent '$($childGroup.ParentPath)' doesn't contain any data, skipping children retrieval" - continue - } + # look up parent IDs from the index (keyed by normalized RequestId) + $parentIds = $parentIdIndex.Keys | Where-Object { + $_ -eq $basePathNormalized -or $_ -like $parentPathPattern + } | ForEach-Object { $parentIdIndex[$_] } | ForEach-Object { $_ } | Select-Object -Unique - # there can be multiple parent items with same Path, remove duplicates just in case - $parentIds = $parentResult.Id | select -Unique - Write-Verbose "Processing children results for parent '$($childGroup.ParentPath)' ($($parentIds.count))" - - _processChildrenRecursive -schemaItems $childGroup.Children -basePath $childGroup.BasePath -parentIds $parentIds -results ([ref]$results) -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) - } - - # execute batch requests for this level - _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -results ([ref]$results) -requestedExportSchema $requestedExportSchema - } - #endregion process all schema items recursively - - #region output results - foreach ($item in $results) { - if (!(Get-ObjectProperty $item 'Id')){ - <# - In some special cases it can happen that 'id' property is missing like: - - isEnabled : True - notifyReviewers : True - remindersEnabled : False - requestDurationInDays : 14 - version : 0 - reviewers : {@{query=/v1.0/groups/b3dbfaaa-4447-4ebe-8d28-c885c851828b/transitiveMembers/microsoft.graph.user; queryType=MicrosoftGraph; queryRoot=}, @{query=/beta/roleManagement/directory/roleAssignments?$filter=roleDefinitionId eq '62e90394-69f5-4237-9190-012177145e10'; queryType=MicrosoftGraph; queryRoot=}} - RequestId : C:/temp/bkp3/Policies/AdminConsentRequestPolicy - - tenantId : 6abd85ef-c27c-4e71-b000-4c68074a6f7b - isServiceProvider : True - isInMultiTenantOrganization : False - inboundTrust : - b2bCollaborationOutbound : - b2bCollaborationInbound : - b2bDirectConnectOutbound : - b2bDirectConnectInbound : - tenantRestrictions : - automaticUserConsentSettings : @{inboundAllowed=; outboundAllowed=} - RequestId : C:/temp/bkp3/Policies/CrossTenantAccessPolicy/Partners - #> - - $itemId = ($item.RequestId -split "/")[-1] - # remove the random number added to avoid duplicated ids in batch requests - $itemId = _normalizeRequestId $itemId - - Write-Verbose ($item | convertto-json -WarningAction SilentlyContinue) - Write-Verbose "Result without 'id' property, using '$itemId' instead (RequestId '$($item.RequestId)')!" - } else { - $itemId = $item.id - } - - if (!$item.RequestId) { - $item - Write-Warning "Item without RequestId. Shouldn't happen!" - } + if (!$parentIds) { + Write-Verbose "Parent '$($childGroup.ParentPath)' doesn't contain any data, skipping children retrieval" + continue + } - $outputFileName = $item.RequestId -replace "/", "\" - # remove the random number added to avoid duplicated ids in batch requests - $outputFileName = _normalizeRequestId $outputFileName + Write-Verbose "Processing children results for parent '$($childGroup.ParentPath)' ($(@($parentIds).count))" - if ($outputFileName -notmatch "\.json$") { - $outputFileName = Join-Path (Join-Path -Path $outputFileName -ChildPath $itemId) -ChildPath "$itemId.json" - } + _processChildrenRecursive -schemaItems $childGroup.Children -basePath $childGroup.BasePath -parentIds $parentIds -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) + } - if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { - continue + # execute and flush this level's child requests to disk immediately + _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -parentIdIndex ([ref]$parentIdIndex) -requestedExportSchema $requestedExportSchema } - - $item | Select-Object * -ExcludeProperty RequestId | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) } - #endregion output results -} \ No newline at end of file + #endregion process all schema items one by one +} diff --git a/src/internal/Invoke-FilePathCheck.ps1 b/src/internal/Invoke-FilePathCheck.ps1 index 699746f..d70a96e 100644 --- a/src/internal/Invoke-FilePathCheck.ps1 +++ b/src/internal/Invoke-FilePathCheck.ps1 @@ -6,6 +6,9 @@ function Invoke-FilePathCheck { ) if ($env:OS -eq "Windows_NT" -and $FilePath.Length -gt 255 -and (Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -ErrorAction SilentlyContinue) -ne 1){ - throw "Output file path '$FilePath' is longer than 255 characters. Enable long path support to continue!" + Write-Warning "Output file path '$FilePath' is longer than 255 characters. Enable long path support to enable export!" + return $false + } else { + return $true } } \ No newline at end of file From 2396dab03657627fef2193979afd3f9adb10a2de Mon Sep 17 00:00:00 2001 From: ztrhgf Date: Mon, 13 Apr 2026 12:22:35 +0200 Subject: [PATCH 3/3] 3.0.1 --- src/EntraExporter.psd1 | 61 +- src/Export-Entra.ps1 | 460 +++++++++------ src/Get-EEDefaultSchema.ps1 | 6 +- src/command/Get-AzurePIMDirectoryRoles.ps1 | 7 +- src/command/Get-AzurePIMGroups.ps1 | 3 +- src/command/Get-AzurePIMResources.ps1 | 4 +- .../Get-AzureResourceAccessPolicies.ps1 | 2 +- src/command/Get-AzureResourceIAMData.ps1 | 6 +- src/internal/ConvertTo-OrderedDictionary.ps1 | 352 +++++++++++- src/internal/Invoke-GraphBatchRequest.ps1 | 543 ++++++++++-------- src/internal/SaveAs-SortedJSON.ps1 | 28 + 11 files changed, 1001 insertions(+), 471 deletions(-) create mode 100644 src/internal/SaveAs-SortedJSON.ps1 diff --git a/src/EntraExporter.psd1 b/src/EntraExporter.psd1 index 32eb7d2..6378711 100644 --- a/src/EntraExporter.psd1 +++ b/src/EntraExporter.psd1 @@ -1,31 +1,31 @@ @{ # Script module or binary module file associated with this manifest. - RootModule = 'EntraExporter.psm1' + RootModule = 'EntraExporter.psm1' # Version number of this module. - ModuleVersion = '3.0.0' - + ModuleVersion = '3.0.1' + # Supported PSEditions - CompatiblePSEditions = 'Core','Desktop' + CompatiblePSEditions = 'Core', 'Desktop' # ID used to uniquely identify this module - GUID = 'd6c15273-d343-4556-a30d-b333eca3c1ab' + GUID = 'd6c15273-d343-4556-a30d-b333eca3c1ab' # Author of this module - Author = 'Microsoft Identity' + Author = 'Microsoft Identity' # Company or vendor of this module - CompanyName = 'Microsoft Corporation' + CompanyName = 'Microsoft Corporation' # Copyright statement for this module - Copyright = 'Microsoft Corporation. All rights reserved.' + Copyright = 'Microsoft Corporation. All rights reserved.' # Description of the functionality provided by this module - Description = 'This module exports an Entra tenant''s identity related configuration settings and objects and writes them to json files.' + Description = 'This module exports an Entra tenant''s identity related configuration settings and objects and writes them to json files.' # Minimum version of the Windows PowerShell engine required by this module - PowerShellVersion = '5.1' + PowerShellVersion = '5.1' # Name of the Windows PowerShell host required by this module # PowerShellHostName = '' @@ -43,7 +43,7 @@ # ProcessorArchitecture = '' # Modules that must be imported into the global environment prior to importing this module - RequiredModules = @( + RequiredModules = @( @{ ModuleName = 'Az.Accounts'; Guid = '17a2feff-488b-47f9-8729-e2cec094624c'; ModuleVersion = '3.0.2' }, @{ ModuleName = 'Microsoft.Graph.Authentication'; Guid = '883916f2-9184-46ee-b1f8-b6a2fb784cee'; ModuleVersion = '2.8.0' } ) @@ -51,7 +51,7 @@ # RequiredAssemblies = @() # Script files (.ps1) that are run in the caller's environment prior to importing this module. - ScriptsToProcess = @("EntraExporterEnums.ps1") + ScriptsToProcess = @("EntraExporterEnums.ps1") # Type files (.ps1xml) to be loaded when importing this module # TypesToProcess = @() @@ -60,7 +60,7 @@ # FormatsToProcess = @() # Modules to import as nested modules of the module specified in RootModule/ModuleToProcess - NestedModules = @( + NestedModules = @( 'internal\New-FinalUri.ps1' 'internal\Get-ObjectProperty.ps1' 'internal\ConvertTo-OrderedDictionary.ps1' @@ -74,7 +74,8 @@ 'internal\Get-MgGraphAllPages.ps1' 'internal\Get-AzureDirectoryObject.ps1' 'internal\Invoke-FilePathCheck.ps1' - 'internal/Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1' + 'internal\Invoke-RoleEligibilityScheduleRequestIdSimplification.ps1' + 'internal\SaveAs-SortedJSON.ps1' 'command\Get-AccessPackageAssignmentPolicies.ps1' 'command\Get-AccessPackageAssignments.ps1' 'command\Get-AccessPackageResourceScopes.ps1' @@ -92,7 +93,7 @@ ) # Functions to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no functions to export. - FunctionsToExport = @( + FunctionsToExport = @( 'Connect-EntraExporter' 'Export-Entra' 'Get-EERequiredScopes' @@ -100,13 +101,13 @@ ) # Cmdlets to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no cmdlets to export. - CmdletsToExport = @() + CmdletsToExport = @() # Variables to export from this module - VariablesToExport = @() + VariablesToExport = @() # Aliases to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no aliases to export. - AliasesToExport = @() + AliasesToExport = @() # DSC resources to export from this module # DscResourcesToExport = @() @@ -118,24 +119,36 @@ # FileList = @() # Private data to pass to the module specified in RootModule/ModuleToProcess. This may also contain a PSData hashtable with additional module metadata used by PowerShell. - PrivateData = @{ + PrivateData = @{ PSData = @{ # Tags applied to this module. These help with module discovery in online galleries. - Tags = 'Microsoft', 'Identity', 'Azure', 'Entra', 'AzureAD', 'AAD', 'PSEdition_Desktop', 'Windows', 'Export', 'Backup', 'DR' + Tags = 'Microsoft', 'Identity', 'Azure', 'Entra', 'AzureAD', 'AAD', 'PSEdition_Desktop', 'Windows', 'Export', 'Backup', 'DR' # A URL to the license for this module. - LicenseUri = 'https://raw.githubusercontent.com/microsoft/entraexporter/main/LICENSE' + LicenseUri = 'https://raw.githubusercontent.com/microsoft/entraexporter/main/LICENSE' # A URL to the main website for this project. - ProjectUri = 'https://github.com/microsoft/entraexporter' + ProjectUri = 'https://github.com/microsoft/entraexporter' # A URL to an icon representing this module. # IconUri = '' # ReleaseNotes of this module ReleaseNotes = ' + 3.0.1 + FIXED + - path processing on linux + - sort issues on linux machines (replaced "sort" with "Sort-Object") + + CHANGED + - sort JSONs properties before export to avoid clutter in git history when just order of the object properties was changed + - optimized batch request processing by running multiple batch requests in parallel (PowerShell Core only!) + - added ThrottleLimit parameter to Export-Entra to control the number of concurrent batch requests (PowerShell Core only) + - RAM optimization: one schema item (and children) processed at once. Instead of all parent schema (and all children items) items at once. + - IAM and AccessPolicies export: creating directory structure to mimic the scope (to minimize long path issues) + - reworked how PIM assignments for Resources are being exported to avoid long path issues 3.0.0 CHANGED - Replaced sequential API calls with batch requests where possible to significantly improve performance @@ -160,7 +173,7 @@ - Removed internal function Invoke-Graph (all calls are made via batching now) - Removed module Strict Mode restrictions ' - + } # End of PSData hashtable } # End of PrivateData hashtable @@ -171,4 +184,4 @@ # Default prefix for commands exported from this module. Override the default prefix using Import-Module -Prefix. # DefaultCommandPrefix = '' - } +} diff --git a/src/Export-Entra.ps1 b/src/Export-Entra.ps1 index 3779828..04e1b51 100644 --- a/src/Export-Entra.ps1 +++ b/src/Export-Entra.ps1 @@ -1,4 +1,5 @@ -function Export-Entra { +function Export-Entra +{ <# .SYNOPSIS Exports Entra's configuration and settings for a tenant. @@ -24,6 +25,15 @@ .PARAMETER CloudUsersAndGroupsOnly Excludes synched on-premises users and groups from the export. Only cloud-managed users and groups will be included. + .PARAMETER ExportSchema + Specifies the schema to use for the export. If not specified, the default schema will be used (generated by Get-EEDefaultSchema). + + .PARAMETER ThrottleLimit + Specifies the maximum number of concurrent batch requests when running the export (PowerShell Core only). + Adjust based on your environment and needs. Higher values may speed up the export but can lead to throttling by the Graph API and higher resource consumption, while lower values may reduce the chances of throttling but will take longer to complete. + + Default is 10. + .EXAMPLE Export-Entra -Path 'C:\EntraBackup\' @@ -101,45 +111,57 @@ # Specifies the schema to use for the export. If not specified, the default schema will be used. [Parameter(Mandatory = $false, ParameterSetName = 'AllTypes')] [Parameter(Mandatory = $false, ParameterSetName = 'SelectTypes')] - [object]$ExportSchema + [object]$ExportSchema, + + [int]$ThrottleLimit = 10 ) $mgContext = Get-MgContext - if (!$mgContext) { + if (!$mgContext) + { throw 'No active connection. Run Connect-EntraExporter or Connect-MgGraph to sign in and then retry.' } if ($All) { $Type = @('All') } $global:Type = $Type #Used in places like Groups where Config flag will limit the resultset to just dynamic groups. - if (!$ExportSchema) { + if (!$ExportSchema) + { $ExportSchema = Get-EEDefaultSchema } $authScope = $mgContext.AuthType - if ($authScope -eq "Delegated") { + if ($authScope -eq "Delegated") + { $schemaScopeType = "DelegatedPermission" - } else { + } + else + { $schemaScopeType = "ApplicationPermission" } # modify schema filter property if needed - foreach ($entry in $ExportSchema) { + foreach ($entry in $ExportSchema) + { $graphUri = Get-ObjectProperty $entry "GraphUri" # filter out synced users or groups - if ($CloudUsersAndGroupsOnly -and ($graphUri -in "users","groups")) { - if([string]::IsNullOrEmpty($entry.Filter)){ + if ($CloudUsersAndGroupsOnly -and ($graphUri -in "users", "groups")) + { + if ([string]::IsNullOrEmpty($entry.Filter)) + { $entry.Filter = "onPremisesSyncEnabled ne true" } - else { + else + { $entry.Filter = $entry.Filter + " and (onPremisesSyncEnabled ne true)" } } } #region helper functions - function _randomizeRequestId { + function _randomizeRequestId + { <# Adds a random number to the request ID to avoid duplicates in batch requests. @@ -158,7 +180,8 @@ $requestId + "%%%" + (Get-Random) + "%%%" } - function _normalizeRequestId { + function _normalizeRequestId + { <# Removes the randomization string (added to the request ID to avoid duplicates in batch requests). #> @@ -173,19 +196,23 @@ $requestId -replace "\%\%\%\d+\%\%\%", "" } - function _processBatchErrors { + function _processBatchErrors + { param( [array]$requestErrors, [array]$requestedExportSchema ) - foreach ($err in $requestErrors) { - if ($err.Exception.Source -eq "BatchRequest") { + foreach ($err in $requestErrors) + { + if ($err.Exception.Source -eq "BatchRequest") + { # batch request errors # it happens that before starting to retrieve app details, the app is deleted # in this case we get 404 error which we can safely ignore - if ($err.TargetObject.response.status -in 400,404) { + if ($err.TargetObject.response.status -in 400, 404) + { Write-Verbose "Ignoring request with id '$($err.TargetObject.request.id)' as it returned status code $($err.TargetObject.response.status)" continue } @@ -197,7 +224,8 @@ # ignore errors specified in the schema $requestedExportSchema.IgnoreError | select -Unique | % { - if ($err.Exception.Message -like "*$_*") { + if ($err.Exception.Message -like "*$_*") + { Write-Verbose "Ignoring request with id '$($err.TargetObject.request.id)' as it returned error to ignore '$_'" continue } @@ -205,7 +233,8 @@ # ignore custom errors "The request did not have a subscription or a valid tenant level resource provider", "The filter 'applicableToScope eq ''' is not supported" | % { - if ($err.Exception.Message -like "*$_*") { + if ($err.Exception.Message -like "*$_*") + { Write-Verbose "Ignoring request with id '$($err.TargetObject.request.id)' as it returned error to ignore '$_'" continue } @@ -213,7 +242,9 @@ Write-Error $err break - } else { + } + else + { # other non-batch-related errors Write-Error $err break @@ -221,88 +252,23 @@ } } - function _flushResultsToDisk { - <# - Writes each result item to disk immediately and records the item's Id in the - parent-ID index so that it can be used for children lookups later. - This keeps memory usage low: full objects are discarded after writing. - #> - param( - [array]$batchResults, - [hashtable]$parentIdIndex - ) - - foreach ($item in $batchResults) { - # --- populate parent-id index (only RequestId + Id needed afterwards) --- - $normalizedId = _normalizeRequestId $item.RequestId - if (!$parentIdIndex.ContainsKey($normalizedId)) { - $parentIdIndex[$normalizedId] = [System.Collections.Generic.List[string]]::new() - } - $indexId = Get-ObjectProperty $item 'Id' - if ($indexId) { - if (!$parentIdIndex[$normalizedId].Contains($indexId)) { - $parentIdIndex[$normalizedId].Add($indexId) - } - } - - # --- write item to disk --- - if (!(Get-ObjectProperty $item 'Id')) { - <# - In some special cases it can happen that 'id' property is missing like: - - isEnabled : True - notifyReviewers : True - remindersEnabled : False - requestDurationInDays : 14 - version : 0 - reviewers : {...} - RequestId : C:/temp/bkp3/Policies/AdminConsentRequestPolicy - #> - - $itemId = ($item.RequestId -split "/")[-1] - # remove the random number added to avoid duplicated ids in batch requests - $itemId = _normalizeRequestId $itemId - - Write-Verbose ($item | convertto-json -WarningAction SilentlyContinue) - Write-Verbose "Result without 'id' property, using '$itemId' instead (RequestId '$($item.RequestId)')!" - } else { - $itemId = $item.id - } - - if (!$item.RequestId) { - $item - Write-Warning "Item without RequestId. Shouldn't happen!" - } - - $outputFileName = $item.RequestId -replace "/", "\" - # remove the random number added to avoid duplicated ids in batch requests - $outputFileName = _normalizeRequestId $outputFileName - - if ($outputFileName -notmatch "\.json$") { - $outputFileName = Join-Path (Join-Path -Path $outputFileName -ChildPath $itemId) -ChildPath "$itemId.json" - } - - if (!(Invoke-FilePathCheck -FilePath $outputFileName)) { - continue - } - - $item | Select-Object * -ExcludeProperty RequestId | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) - } - } - - function _processChildrenRecursive { + function _processChildrenRecursive + { param( [array]$schemaItems, [string]$basePath, [array]$parentIds, + [ref]$results, [ref]$batchRequestStableApi, [ref]$batchRequestBetaApi ) - foreach ($item in $schemaItems) { + foreach ($item in $schemaItems) + { Write-Host "⏩→ $($item.GraphUri)" - if (!$item.$schemaScopeType) { + if (!$item.$schemaScopeType) + { Write-Verbose " - Skipping as it doesn't support '$schemaScopeType'" continue } @@ -314,19 +280,23 @@ $children = Get-ObjectProperty $item 'Children' if (!$apiVersion) { $apiVersion = 'v1.0' } - if ($command) { + if ($command) + { $commandParams = @{} # define how the command should be invoked - switch ($command) { - {$command -in 'Get-AccessPackageAssignmentPolicies', 'Get-AccessPackageAssignments', 'Get-AccessPackageResourceScopes'} { + switch ($command) + { + { $command -in 'Get-AccessPackageAssignmentPolicies', 'Get-AccessPackageAssignments', 'Get-AccessPackageResourceScopes' } + { $commandParams = @{ - Parents = $parentIds + Parents = $parentIds BasePath = $basePath } } - default { + default + { Write-Warning "Unknown command '$command'" } } @@ -334,13 +304,17 @@ # invoke the command with splatting & $command @commandParams } - else { + else + { $uri = New-FinalUri -RelativeUri $graphUri -Select (Get-ObjectProperty $item 'Select') -QueryParameters (Get-ObjectProperty $item 'QueryParameters') -Filter (Get-ObjectProperty $item 'Filter') $parentIds | % { - if ($item.Path -match "\.json$") { + if ($item.Path -match "\.json$") + { $outputFileName = Join-Path -Path $basePath -ChildPath $item.Path - } else { + } + else + { $outputFileName = Join-Path -Path $basePath -ChildPath $_ $outputFileName = Join-Path -Path $outputFileName -ChildPath $item.Path } @@ -352,52 +326,60 @@ Write-Verbose "Adding request '$uri' with id '$id' to the batch" - $request = New-GraphBatchRequest -Url $uri -Id $id -placeholder $_ -header @{ ConsistencyLevel = 'eventual' } + $request = New-GraphBatchRequest -url $uri -id $id -placeholder $_ -header @{ ConsistencyLevel = 'eventual' } - if ($apiVersion -eq 'beta') { + if ($apiVersion -eq 'beta') + { $batchRequestBetaApi.Value.Add($request) } - else { + else + { $batchRequestStableApi.Value.Add($request) } } } # recursively process children if they exist - if ($children) { + if ($children) + { # for grandchildren, we need to collect the parent IDs from the results - $childBasePath = if ($item.Path -match "\.json$") { + $childBasePath = if ($item.Path -match "\.json$") + { $basePath - } else { + } + else + { Join-Path -Path $basePath -ChildPath $item.Path } # we'll process these after the current batch is executed and results are available $script:childrenToProcess.Add(@{ - Children = $children - BasePath = $childBasePath - ParentPath = "$($item.Path)*" - }) + Children = $children + BasePath = $childBasePath + ParentPath = "$($item.Path)*" + }) } } } - function _executeBatchRequests { + function _executeBatchRequests + { param( [ref]$batchRequestStableApi, [ref]$batchRequestBetaApi, - [ref]$parentIdIndex, + [ref]$results, [array]$requestedExportSchema ) # execute v1.0 API batch requests - if ($batchRequestStableApi.Value.Count -gt 0) { + if ($batchRequestStableApi.Value.Count -gt 0) + { Write-Verbose "Processing $($batchRequestStableApi.Value.count) v1.0 API requests" - $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestStableApi.Value -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue + $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestStableApi.Value -throttleLimit $ThrottleLimit -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue - if ($batchResults) { - # flush to disk immediately and record IDs for parent-child lookups - _flushResultsToDisk -batchResults @($batchResults) -parentIdIndex $parentIdIndex.Value + if ($batchResults) + { + $results.Value.AddRange(@($batchResults)) } _processBatchErrors -requestErrors $requestErrors -requestedExportSchema $requestedExportSchema @@ -405,122 +387,224 @@ } # execute beta API batch requests - if ($batchRequestBetaApi.Value.Count -gt 0) { + if ($batchRequestBetaApi.Value.Count -gt 0) + { Write-Verbose "Processing $($batchRequestBetaApi.Value.count) beta API requests" - $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestBetaApi.Value -graphVersion beta -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue + $batchResults = Invoke-GraphBatchRequest -batchRequest $batchRequestBetaApi.Value -graphVersion beta -throttleLimit $ThrottleLimit -separateErrors -ErrorAction SilentlyContinue -ErrorVariable requestErrors -WarningAction SilentlyContinue - if ($batchResults) { - # flush to disk immediately and record IDs for parent-child lookups - _flushResultsToDisk -batchResults @($batchResults) -parentIdIndex $parentIdIndex.Value + if ($batchResults) + { + $results.Value.AddRange(@($batchResults)) } _processBatchErrors -requestErrors $requestErrors -requestedExportSchema $requestedExportSchema $batchRequestBetaApi.Value.Clear() } } - #endregion helper functions - #region process all schema items one by one - # Each root schema item is fully processed (request -> flush to disk -> all children at all - # depths) before moving to the next one. $parentIdIndex and $script:childrenToProcess are - # reset per root item so lookups stay scoped to the current item's result set. - $batchRequestStableApi = [System.Collections.Generic.List[Object]]::new() - $batchRequestBetaApi = [System.Collections.Generic.List[Object]]::new() + function _saveResultsToDisk + { + param( + [System.Collections.Generic.List[Object]]$results + ) - $requestedExportSchema = $ExportSchema | ? { Compare-Object $_.Tag $Type -ExcludeDifferent -IncludeEqual } + foreach ($item in $results) + { + if (!(Get-ObjectProperty $item 'Id')) + { + <# + In some special cases it can happen that 'id' property is missing like: - foreach ($item in $requestedExportSchema) { - # reset per-item state so child lookups don't match results from a previous schema item - $parentIdIndex = @{} - $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() + isEnabled : True + notifyReviewers : True + remindersEnabled : False + requestDurationInDays : 14 + version : 0 + reviewers : {...} + RequestId : C:/temp/bkp3/Policies/AdminConsentRequestPolicy + #> + + $itemId = ($item.RequestId -split "/")[-1] + # remove the random number added to avoid duplicated ids in batch requests + $itemId = _normalizeRequestId $itemId + + Write-Verbose ($item | ConvertTo-Json) + Write-Warning "Result without 'id' property, using '$itemId' instead (RequestId '$($item.RequestId)')!" + } + else + { + $itemId = $item.id + } + + if (!$item.RequestId) + { + $item + throw "Item without RequestId. Shouldn't happen!" + } - $outputFileName = Join-Path -Path $Path -ChildPath $item.Path + $outputFileName = $item.RequestId -replace '[\\/]', [Regex]::Escape([IO.Path]::DirectorySeparatorChar) + # remove the random number added to avoid duplicated ids in batch requests + $outputFileName = _normalizeRequestId $outputFileName + + if ($outputFileName -notmatch "\.json$") + { + $outputFileName = Join-Path (Join-Path -Path $outputFileName -ChildPath $itemId) -ChildPath "$itemId.json" + } + + if (!(Invoke-FilePathCheck -FilePath $outputFileName)) + { + continue + } + + $item | SaveAs-SortedJSON -Path $outputFileName + } + } + #endregion helper functions + + #region process schema items - one parent at a time + $requestedExportSchema = $ExportSchema | Where-Object { Compare-Object $_.Tag $Type -ExcludeDifferent -IncludeEqual } + + foreach ($schemaItem in $requestedExportSchema) + { + $outputFileName = Join-Path -Path $Path -ChildPath $schemaItem.Path Write-Host "➡️ $($item.GraphUri)" - if (!$item.$schemaScopeType) { - Write-Verbose "Skipping as it doesn't support '$schemaScopeType'" + if (!$schemaItem.$schemaScopeType) + { + Write-Verbose "Skipping '$($schemaItem.Path)' as it doesn't support '$schemaScopeType'" continue } - $command = Get-ObjectProperty $item 'Command' - $graphUri = Get-ObjectProperty $item 'GraphUri' - $apiVersion = Get-ObjectProperty $item 'ApiVersion' - $children = Get-ObjectProperty $item 'Children' + $command = Get-ObjectProperty $schemaItem 'Command' + $graphUri = Get-ObjectProperty $schemaItem 'GraphUri' + $apiVersion = Get-ObjectProperty $schemaItem 'ApiVersion' + $children = Get-ObjectProperty $schemaItem 'Children' if (!$apiVersion) { $apiVersion = 'v1.0' } - if ($command) { + if ($command) + { + # Commands handle their own I/O, just invoke and move on $commandParams = @{} - switch ($command) { - 'Get-AzureResourceIAMData' { $commandParams.RootFolder = $outputFileName } - 'Get-AzurePIMDirectoryRoles' { $commandParams.RootFolder = $outputFileName } - 'Get-AzurePIMResources' { $commandParams.RootFolder = $outputFileName } - 'Get-AzurePIMGroups' { $commandParams.RootFolder = $outputFileName } + switch ($command) + { + 'Get-AzureResourceIAMData' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMDirectoryRoles' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMResources' { $commandParams.RootFolder = $outputFileName } + 'Get-AzurePIMGroups' { $commandParams.RootFolder = $outputFileName } 'Get-AzureResourceAccessPolicies' { $commandParams.RootFolder = $outputFileName } default { Write-Warning "Unknown command '$command'" } } & $command @commandParams + continue } - else { - $uri = New-FinalUri -RelativeUri $graphUri -Select (Get-ObjectProperty $item 'Select') -QueryParameters (Get-ObjectProperty $item 'QueryParameters') -Filter (Get-ObjectProperty $item 'Filter') - # batch request id cannot contain '\' character - $id = $outputFileName -replace '\\', '/' + # --- Graph URI path: execute parent request, save, then process children --- + $parentResults = [System.Collections.Generic.List[Object]]::new() + $batchRequestStableApi = [System.Collections.Generic.List[Object]]::new() + $batchRequestBetaApi = [System.Collections.Generic.List[Object]]::new() + $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() - # to avoid duplicated ids in batch request if there are multiple $ExportSchema items with the same path ('Groups' in this case) - $id = _randomizeRequestId $id + $uri = New-FinalUri -RelativeUri $graphUri -Select (Get-ObjectProperty $schemaItem 'Select') -QueryParameters (Get-ObjectProperty $schemaItem 'QueryParameters') -Filter (Get-ObjectProperty $schemaItem 'Filter') - Write-Verbose "Adding request '$uri' with id '$id' to the batch" + # batch request id cannot contain '\' character + $id = $outputFileName -replace '\\', '/' + # to avoid duplicated ids in batch request if there are multiple schema items with the same path + $id = _randomizeRequestId $id - $request = New-GraphBatchRequest -Url $uri -Id $id -header @{ ConsistencyLevel = 'eventual' } + Write-Verbose "Adding request '$uri' with id '$id' to the batch" - if ($apiVersion -eq 'beta') { $batchRequestBetaApi.Add($request) } - else { $batchRequestStableApi.Add($request) } + $request = New-GraphBatchRequest -url $uri -id $id -header @{ ConsistencyLevel = 'eventual' } + if ($apiVersion -eq 'beta') { $batchRequestBetaApi.Add($request) } + else { $batchRequestStableApi.Add($request) } - # execute and flush this item's results to disk before processing its children - _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -parentIdIndex ([ref]$parentIdIndex) -requestedExportSchema @($item) - } + # Execute and immediately save parent results to disk + _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -results ([ref]$parentResults) -requestedExportSchema @($schemaItem) + _saveResultsToDisk -results $parentResults - # queue children for this item - if ($children) { - $script:childrenToProcess.Add(@{ - Children = $children - BasePath = Join-Path -Path $Path -ChildPath $item.Path - ParentPath = $item.Path - }) + # Queue children using the parent IDs collected above + if ($children) + { + $parentIds = $parentResults.Id | Select-Object -Unique + if ($parentIds) + { + $script:childrenToProcess.Add(@{ + Children = $children + BasePath = Join-Path -Path $Path -ChildPath $schemaItem.Path + ParentPath = $schemaItem.Path + ParentIds = @($parentIds) + }) + } } - # drain all children (and grandchildren) for this item before moving to the next root item - while ($script:childrenToProcess.Count -gt 0) { - $currentBatch = $script:childrenToProcess - $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() - - foreach ($childGroup in $currentBatch) { - Write-Verbose "Looking for results for parent with path '$($childGroup.ParentPath)'" + # Free parent results - no longer needed + $parentResults = $null - $basePathNormalized = $childGroup.BasePath -replace "\\", "/" - $parentPathPattern = "$($childGroup.ParentPath)*" -replace "\\", "/" + # Process children level by level, chunked to max 5000 parent IDs per batch execution + while ($script:childrenToProcess.Count -gt 0) + { + $currentLevel = $script:childrenToProcess + $script:childrenToProcess = [System.Collections.Generic.List[Object]]::new() - # look up parent IDs from the index (keyed by normalized RequestId) - $parentIds = $parentIdIndex.Keys | Where-Object { - $_ -eq $basePathNormalized -or $_ -like $parentPathPattern - } | ForEach-Object { $parentIdIndex[$_] } | ForEach-Object { $_ } | Select-Object -Unique + foreach ($childGroup in $currentLevel) + { + $allParentIds = $childGroup.ParentIds - if (!$parentIds) { - Write-Verbose "Parent '$($childGroup.ParentPath)' doesn't contain any data, skipping children retrieval" + if (!$allParentIds -or $allParentIds.Count -eq 0) + { + Write-Verbose "No parent IDs for '$($childGroup.ParentPath)', skipping children" continue } - Write-Verbose "Processing children results for parent '$($childGroup.ParentPath)' ($(@($parentIds).count))" + Write-Verbose "Processing children for '$($childGroup.ParentPath)' ($($allParentIds.Count) parents)" + + # Chunk parent IDs into batches of 5000 to cap memory usage + $chunkSize = 5000 + for ($i = 0; $i -lt $allParentIds.Count; $i += $chunkSize) + { + $parentIdChunk = $allParentIds[$i..[Math]::Min($i + $chunkSize - 1, $allParentIds.Count - 1)] + + $chunkResults = [System.Collections.Generic.List[Object]]::new() + $batchStableChunk = [System.Collections.Generic.List[Object]]::new() + $batchBetaChunk = [System.Collections.Generic.List[Object]]::new() + + _processChildrenRecursive ` + -schemaItems $childGroup.Children ` + -basePath $childGroup.BasePath ` + -parentIds $parentIdChunk ` + -results ([ref]$chunkResults) ` + -batchRequestStableApi ([ref]$batchStableChunk) ` + -batchRequestBetaApi ([ref]$batchBetaChunk) + + # Execute batch for this chunk + _executeBatchRequests ` + -batchRequestStableApi ([ref]$batchStableChunk) ` + -batchRequestBetaApi ([ref]$batchBetaChunk) ` + -results ([ref]$chunkResults) ` + -requestedExportSchema @($schemaItem) + + # Resolve ParentIds for any grandchildren queued during this chunk + # (_processChildrenRecursive adds them without a ParentIds key) + foreach ($pendingGrandchild in $script:childrenToProcess | Where-Object { !$_.ContainsKey('ParentIds') }) + { + $matchBase = $pendingGrandchild.BasePath -replace '\\', '/' + $matchPattern = ($pendingGrandchild.ParentPath + '*') -replace '\\', '/' + $pendingGrandchild.ParentIds = @( + $chunkResults | Where-Object { + $nId = _normalizeRequestId $_.RequestId + $nId -eq $matchBase -or $nId -like $matchPattern + } | Select-Object -ExpandProperty Id -Unique + ) + } - _processChildrenRecursive -schemaItems $childGroup.Children -basePath $childGroup.BasePath -parentIds $parentIds -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) + # Save chunk results to disk immediately, then free memory + _saveResultsToDisk -results $chunkResults + $chunkResults = $null + } } - - # execute and flush this level's child requests to disk immediately - _executeBatchRequests -batchRequestStableApi ([ref]$batchRequestStableApi) -batchRequestBetaApi ([ref]$batchRequestBetaApi) -parentIdIndex ([ref]$parentIdIndex) -requestedExportSchema $requestedExportSchema } } - #endregion process all schema items one by one -} + #endregion process schema items - one parent at a time +} \ No newline at end of file diff --git a/src/Get-EEDefaultSchema.ps1 b/src/Get-EEDefaultSchema.ps1 index 3a9f6e7..cff4477 100644 --- a/src/Get-EEDefaultSchema.ps1 +++ b/src/Get-EEDefaultSchema.ps1 @@ -940,9 +940,9 @@ function Get-EEDefaultSchema { QueryParameters = @{ expand = 'principals' } ApiVersion = 'beta' Tag = @('All', 'Config', 'RoleManagement', 'CloudPCRoles') - DelegatedPermission = 'DeviceManagementRBAC.Read.All' - ApplicationPermission = 'DeviceManagementRBAC.Read.All' - } + DelegatedPermission = 'RoleManagement.Read.All' + ApplicationPermission = 'RoleManagement.Read.All' + } # RoleManagement - Entitlement Management Role Definitions @{ GraphUri = 'roleManagement/entitlementManagement/roleDefinitions' diff --git a/src/command/Get-AzurePIMDirectoryRoles.ps1 b/src/command/Get-AzurePIMDirectoryRoles.ps1 index 324e1d7..354c5a0 100644 --- a/src/command/Get-AzurePIMDirectoryRoles.ps1 +++ b/src/command/Get-AzurePIMDirectoryRoles.ps1 @@ -33,7 +33,7 @@ if ($skipAssignmentSettings) { $_ | select *, @{n = 'PrincipalName'; e = { $_.principal.displayName } }, @{n = 'RoleName'; e = { $_.roleDefinition.displayName } } } else { - $rules = Get-PIMDirectoryRoleAssignmentSetting -roleId $_.roleDefinitionId -dontBeautify + $rules = Get-PIMDirectoryRoleAssignmentSetting -roleId $_.roleDefinition.templateId -dontBeautify $_ | select *, @{n = 'PrincipalName'; e = { $_.principal.displayName } }, @{n = 'RoleName'; e = { $_.roleDefinition.displayName } }, @{n = 'Policy'; e = { $rules } } } @@ -92,6 +92,9 @@ $response = Invoke-MgGraphRequest -Uri "v1.0/policies/roleManagementPolicyAssignments?`$filter=scopeType eq 'DirectoryRole' and roleDefinitionId eq '$roleID' and scopeId eq '/' " | Get-MgGraphAllPages $policyID = $response.policyID Write-Verbose "policyID = $policyID" + if (!$policyID) { + throw "PIM assignment settings for $roleID role wasn't found?!" + } # get the rules $response = Invoke-MgGraphRequest -Uri "v1.0/policies/roleManagementPolicies/$policyID/rules" | Get-MgGraphAllPages @@ -233,6 +236,6 @@ continue } - $item | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | SaveAs-SortedJSON -Path $outputFileName } } \ No newline at end of file diff --git a/src/command/Get-AzurePIMGroups.ps1 b/src/command/Get-AzurePIMGroups.ps1 index 2a6f326..cd98901 100644 --- a/src/command/Get-AzurePIMGroups.ps1 +++ b/src/command/Get-AzurePIMGroups.ps1 @@ -133,7 +133,6 @@ continue } - # Hide warning for depth when converting to JSON - $item | ConvertTo-Json -depth 10 -WarningAction SilentlyContinue | Out-File (New-Item -Path $outputFileName -Force) + $item | SaveAs-SortedJSON -Path $outputFileName } } \ No newline at end of file diff --git a/src/command/Get-AzurePIMResources.ps1 b/src/command/Get-AzurePIMResources.ps1 index e2b4e92..64b01c3 100644 --- a/src/command/Get-AzurePIMResources.ps1 +++ b/src/command/Get-AzurePIMResources.ps1 @@ -280,7 +280,7 @@ continue } - $item | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | SaveAs-SortedJSON -Path $outputFileName } Get-PIMSubscriptionEligibleAssignment | ? { $_ } | % { @@ -294,6 +294,6 @@ continue } - $item | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | SaveAs-SortedJSON -Path $outputFileName } } \ No newline at end of file diff --git a/src/command/Get-AzureResourceAccessPolicies.ps1 b/src/command/Get-AzureResourceAccessPolicies.ps1 index 7c5397f..da0f81c 100644 --- a/src/command/Get-AzureResourceAccessPolicies.ps1 +++ b/src/command/Get-AzureResourceAccessPolicies.ps1 @@ -63,6 +63,6 @@ $outputFileName = Join-Path -Path $rootFolder -ChildPath "$id.json" - $result | ConvertTo-Json -Depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $result | SaveAs-SortedJSON -Path $outputFileName } } \ No newline at end of file diff --git a/src/command/Get-AzureResourceIAMData.ps1 b/src/command/Get-AzureResourceIAMData.ps1 index 2745c61..7cae33c 100644 --- a/src/command/Get-AzureResourceIAMData.ps1 +++ b/src/command/Get-AzureResourceIAMData.ps1 @@ -118,7 +118,7 @@ authorizationresources $outputFileName = $outputFileName + ".replace" } - $item | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $item | SaveAs-SortedJSON -Path $outputFileName } #endregion IAM Role assignments export @@ -129,7 +129,7 @@ authorizationresources $roleId = $result.name $outputPath = Join-Path -Path $definitionsFolder -ChildPath "BuiltInRole" $outputFileName = Join-Path -Path $outputPath -ChildPath "$roleId.json" - $result | select * -ExcludeProperty RequestName | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $result | SaveAs-SortedJSON -Path $outputFileName } #endregion export built-in RBAC (IAM) roles @@ -159,7 +159,7 @@ ResourceContainers $outputFileName = Join-Path -Path $outputPath -ChildPath "$roleId.json" - $result | Select-Object * -ExcludeProperty RequestName | ConvertTo-Json -depth 100 | Out-File (New-Item -Path $outputFileName -Force) + $result | SaveAs-SortedJSON -Path $outputFileName } #endregion export custom RBAC (IAM) roles #endregion IAM Role definitions export diff --git a/src/internal/ConvertTo-OrderedDictionary.ps1 b/src/internal/ConvertTo-OrderedDictionary.ps1 index ef7a7d4..d564475 100644 --- a/src/internal/ConvertTo-OrderedDictionary.ps1 +++ b/src/internal/ConvertTo-OrderedDictionary.ps1 @@ -1,37 +1,349 @@ function ConvertTo-OrderedDictionary { + <# + .SYNOPSIS + Converts an input object into a recursively ordered structure. + + .DESCRIPTION + Recursively processes dictionaries, arrays, enumerables, and custom objects to produce deterministic ordering. + Dictionaries and object properties are ordered by name, and collection items are ordered by a stable helper sort key. + Scalar-like values are returned unchanged. + + .PARAMETER InputObject + Object to convert. Supports pipeline input. If null is provided, null is returned. + + .OUTPUTS + System.Object + + .EXAMPLE + $result = @{ b = 2; a = 1 } | ConvertTo-OrderedDictionary + $result + + returns the same data with keys ordered deterministically. + + .EXAMPLE + ConvertTo-OrderedDictionary -InputObject $graphResponse + + Converts nested API response objects into a stable ordered representation useful for compare/export scenarios. + + .EXAMPLE + $inputObject = [PSCustomObject]@{ + service = 'contoso-app' + users = @( + [PSCustomObject]@{ id = 3; displayName = 'Cecil'; department = 'Ops' } + [PSCustomObject]@{ department = 'HR'; displayName = 'Anna'; id = 1 } + [PSCustomObject]@{ displayName = 'Boris'; id = 2; department = 'IT' } + ) + metadata = [PSCustomObject]@{ z = 'last'; a = 'first' } + } + + $ordered = ConvertTo-OrderedDictionary -InputObject $inputObject + + Keeps property names ordered (for example metadata.a before metadata.z) and also stabilizes ordering + in users array items that contain unsorted property declarations. + + OUTPUT: + metadata service users + -------- ------- ----- + @{a=first; z=last} contoso-app {@{department=HR; displayName=Anna; id=1}, @{department=IT; displayName=Boris; id=2}, @{department=Ops; displayName=Cecil; id=3}} + + .EXAMPLE + $inputObject = [PSCustomObject]@{ + groups = @( + [PSCustomObject]@{ + name = 'team-b' + members = @( + [PSCustomObject]@{ role = 'Owner'; userId = 20; upn = 'b-owner@contoso.com' } + [PSCustomObject]@{ upn = 'b-member@contoso.com'; userId = 21; role = 'Member' } + ) + } + [PSCustomObject]@{ + members = @( + [PSCustomObject]@{ userId = 10; upn = 'a-owner@contoso.com'; role = 'Owner' } + [PSCustomObject]@{ role = 'Member'; upn = 'a-member@contoso.com'; userId = 11 } + ) + name = 'team-a' + } + ) + } + + ConvertTo-OrderedDictionary -InputObject $inputObject | ConvertTo-Json -Depth 5 + + Recursively orders group and member properties and makes nested array output deterministic for reliable compare/export. + + OUTPUT: + { + "groups": [ + { + "members": [ + { + "role": "Member", + "upn": "a-member@contoso.com", + "userId": 11 + }, + { + "role": "Owner", + "upn": "a-owner@contoso.com", + "userId": 10 + } + ], + "name": "team-a" + }, + { + "members": [ + { + "role": "Member", + "upn": "b-member@contoso.com", + "userId": 21 + }, + { + "role": "Owner", + "upn": "b-owner@contoso.com", + "userId": 20 + } + ], + "name": "team-b" + } + ] + } + + .NOTES + Intended for deterministic output in diffing, testing, and serialization workflows. + #> + [CmdletBinding()] param ( - [Parameter(Mandatory = $true, ValueFromPipeline = $true)] + [Parameter(ValueFromPipeline = $true)] + [AllowNull()] $InputObject ) - process + begin { - if($InputObject){ - if($InputObject -is [array]){ - $outputArray = @() - foreach($item in $InputObject){ - $outputArray += ConvertTo-OrderedDictionary $item + function Get-OrderedDictionarySortKey + { + <# + .SYNOPSIS + Builds a stable sort key for mixed object items. + + .DESCRIPTION + Extracts up to three scalar values from an input dictionary or object properties and joins them + into a deterministic string key, so collection sorting remains stable between runs. + + .PARAMETER Item + Item used to generate the sort key. + + .OUTPUTS + System.String + #> + + param($Item) + + if ($null -eq $Item) { return '' } + + if ( + $Item.GetType().IsPrimitive -or + $Item -is [string] -or + $Item -is [decimal] -or + $Item -is [datetime] -or + $Item -is [datetimeoffset] -or + $Item -is [timespan] -or + $Item -is [guid] -or + $Item -is [enum] + ) + { + return [string]$Item + } + + $keyParts = [System.Collections.Generic.List[string]]::new() + + if ($Item -is [System.Collections.IDictionary]) + { + # use first scalar dictionary values (sorted by key) as a lightweight stable signature. + foreach ($key in ($Item.Keys | Sort-Object { [string]$_ })) + { + if ($keyParts.Count -ge 3) { break } + $val = $Item[$key] + if ( + $null -eq $val -or + $val.GetType().IsPrimitive -or + $val -is [string] -or + $val -is [decimal] -or + $val -is [datetime] -or + $val -is [datetimeoffset] -or + $val -is [timespan] -or + $val -is [guid] -or + $val -is [enum] + ) + { + $keyParts.Add([string]$val) + } } - return $outputArray - } - elseif($InputObject -is [hashtable]){ - $outputObject = [ordered]@{} - foreach ($Item in ($InputObject.GetEnumerator() | Sort-Object -Property Key)) { - if($Item){ - $value = Get-ObjectProperty $Item 'Value' - if($value -is [hashtable] -or $value -is [array]){ #if child is a hashtable or array, sort it too - $Item.Value = ConvertTo-OrderedDictionary $value + } + else + { + try + { + $props = @( + $Item.PSObject.Properties | + Where-Object { + $_.IsGettable -and + $_.MemberType -in [System.Management.Automation.PSMemberTypes]::NoteProperty, [System.Management.Automation.PSMemberTypes]::Property + } | + Sort-Object Name + ) + + # use first scalar property values (sorted by name) for deterministic ordering. + foreach ($prop in $props) + { + if ($keyParts.Count -ge 3) { break } + try { $val = $prop.Value } catch { continue } + if ( + $null -eq $val -or + $val.GetType().IsPrimitive -or + $val -is [string] -or + $val -is [decimal] -or + $val -is [datetime] -or + $val -is [datetimeoffset] -or + $val -is [timespan] -or + $val -is [guid] -or + $val -is [enum] + ) + { + $keyParts.Add([string]$val) } } - $outputObject[$Item.Key] = $Item.Value } - return $outputObject + catch { } } + + return $keyParts -join "`0" + } + } + + process + { + if ($null -eq $InputObject) + { + return $null } - else { + + if ( + # keep scalar-like values as-is to avoid unnecessary wrapping. + $InputObject.GetType().IsPrimitive -or + $InputObject -is [string] -or + $InputObject -is [decimal] -or + $InputObject -is [datetime] -or + $InputObject -is [datetimeoffset] -or + $InputObject -is [timespan] -or + $InputObject -is [guid] -or + $InputObject -is [enum] + ) + { return $InputObject } + + if ($InputObject -is [System.Collections.IDictionary]) + { + $outputObject = [ordered]@{} + $sortedKeys = [System.Collections.Generic.List[Object]]::new() + + # insert keys using ordinal comparison to keep ordering deterministic and culture-independent. + foreach ($key in $InputObject.Keys) + { + $insertAt = $sortedKeys.Count + + for ($i = 0; $i -lt $sortedKeys.Count; $i++) + { + if ([string]::CompareOrdinal([string]$key, [string]$sortedKeys[$i]) -lt 0) + { + $insertAt = $i + break + } + } + + $sortedKeys.Insert($insertAt, $key) + } + + foreach ($key in $sortedKeys) + { + $outputObject[$key] = ConvertTo-OrderedDictionary -InputObject $InputObject[$key] + } + + return $outputObject + } + + if ($InputObject -is [System.Array]) + { + $outputArray = @() + # sort complex items using the helper key so output is stable between runs. + foreach ($item in ($InputObject | Sort-Object { Get-OrderedDictionarySortKey $_ })) + { + $outputArray += ConvertTo-OrderedDictionary -InputObject $item + } + + return $outputArray + } + + if ($InputObject -is [System.Collections.IEnumerable] -and $InputObject -isnot [string]) + { + $outputArray = @() + # treat non-array enumerables the same way for consistent ordering. + foreach ($item in ($InputObject | Sort-Object { Get-OrderedDictionarySortKey $_ })) + { + $outputArray += ConvertTo-OrderedDictionary -InputObject $item + } + + return $outputArray + } + + $properties = @( + $InputObject.PSObject.Properties | + Where-Object { + $_.MemberType -in [System.Management.Automation.PSMemberTypes]::NoteProperty, [System.Management.Automation.PSMemberTypes]::Property -and + $_.IsGettable + } + ) + + if ($properties.Count -gt 0) + { + $outputObject = [ordered]@{} + $sortedProperties = [System.Collections.Generic.List[Object]]::new() + + # keep property ordering explicit (ordinal by name) before recursive conversion. + foreach ($property in $properties) + { + $insertAt = $sortedProperties.Count + + for ($i = 0; $i -lt $sortedProperties.Count; $i++) + { + if ([string]::CompareOrdinal([string]$property.Name, [string]$sortedProperties[$i].Name) -lt 0) + { + $insertAt = $i + break + } + } + + $sortedProperties.Insert($insertAt, $property) + } + + foreach ($property in $sortedProperties) + { + try + { + $propertyValue = $property.Value + } + catch + { + continue + } + + $outputObject[$property.Name] = ConvertTo-OrderedDictionary -InputObject $propertyValue + } + + return [PSCustomObject]$outputObject + } + + return $InputObject } -} +} \ No newline at end of file diff --git a/src/internal/Invoke-GraphBatchRequest.ps1 b/src/internal/Invoke-GraphBatchRequest.ps1 index 39481bf..1559397 100644 --- a/src/internal/Invoke-GraphBatchRequest.ps1 +++ b/src/internal/Invoke-GraphBatchRequest.ps1 @@ -42,6 +42,17 @@ .PARAMETER separateErrors Switch to return batch request errors one by one instead of all at once. + .PARAMETER throttleLimit + Throttle limit for running batch requests in parallel. + + Parallel processing is ONLY APPLIED if: + - there are at least 21 requests (i.e. more than one batch to process (one batch contains at most 20 individual requests)) to avoid unnecessary overhead of parallelization + - and if the script is run in PowerShell Core (natively supports parallelization). + + Adjust the value based on your environment and needs. Higher values may speed up the export but can lead to throttling by the Graph API and higher resource consumption, while lower values may reduce the chances of throttling but will take longer to complete. + + By default 10, which means that at most 10 batch requests will be run at the same time. + .EXAMPLE [System.Collections.Generic.List[object]] $batchRequest = @() @@ -159,31 +170,34 @@ [switch] $dontFollowNextLink, - [switch] $separateErrors + [switch] $separateErrors, + + [int] $throttleLimit = 10 ) begin { - if ($PSCmdlet.MyInvocation.PipelineLength -eq 1) { - Write-Verbose "Total number of requests to process is $($batchRequest.count)" - } + #region helper functions + function ConvertTo-FlatArray { + # flattens input in case, that primitive(s) and array(s) are entered at the same time + [CmdletBinding()] + param ( + [Parameter(Mandatory = $true)] + $inputArray + ) - if ($dontBeautifyResult -and $dontAddRequestId) { - Write-Verbose "'dontAddRequestId' parameter will be ignored, 'RequestId' property is not being added when 'dontBeautifyResult' parameter is used" + foreach ($item in $inputArray) { + if ($null -ne $item) { + # recurse for arrays + if ($item.GetType().BaseType -eq [System.Array]) { + ConvertTo-FlatArray $item + } else { + # output non-arrays + $item + } + } + } } - # api batch requests are limited to 20 requests - $chunkSize = 20 - # base graph api uri - $uri = "https://graph.microsoft.com" - # batch uri - $requestUri = "$uri/$graphVersion/`$batch" - # buffer to hold chunks of requests - $requestChunk = [System.Collections.Generic.List[Object]]::new() - # paginated or remotely failed requests that should be processed too, to get all the results - $extraRequestChunk = [System.Collections.Generic.List[Object]]::new() - # throttled requests that have to be repeated after given time - $throttledRequestChunk = [System.Collections.Generic.List[Object]]::new() - function _processChunk { <# .SYNOPSIS @@ -197,7 +211,34 @@ [CmdletBinding()] param ( [Parameter(Mandatory = $true)] - [System.Collections.ArrayList] $requestChunk + [PSObject[]] $requestChunk, + + [Parameter(Mandatory = $true)] + [string] $requestUri, + + [Parameter(Mandatory = $true)] + [string] $graphVersion, + + [Parameter(Mandatory = $true)] + [bool] $dontBeautifyResult, + + [Parameter(Mandatory = $true)] + [bool] $dontAddRequestId, + + [Parameter(Mandatory = $true)] + [bool] $dontFollowNextLink, + + [Parameter(Mandatory = $true)] + [bool] $separateErrors, + + [Parameter(Mandatory = $true)] + [System.Collections.Concurrent.ConcurrentBag[Object]] $extraReqBag, + + [Parameter(Mandatory = $true)] + [System.Collections.Concurrent.ConcurrentBag[Object]] $throttledReqBag, + + [Parameter(Mandatory = $true)] + [System.Collections.Concurrent.ConcurrentBag[Int32]] $retryAfterBag ) function Is-JSON { @@ -214,9 +255,9 @@ } switch -Regex ($InputString.TrimStart()) { - '^"' { return "String" } - '^{' { return "Object" } - '^\[' { return "Array" } + '^"' { return "String" } + '^{' { return "Object" } + '^\[' { return "Array" } '^true|^false' { return "Boolean" } '^null' { return "Null" } '^-?\d' { return "Number" } @@ -224,15 +265,14 @@ } } - $duplicityId = $requestChunk.id | Group-Object | ? { $_.Count -gt 1 } + $duplicityId = $requestChunk.id | Group-Object | Where-Object { $_.Count -gt 1 } if ($duplicityId) { - Write-Warning "Batch requests must have unique ids. Id(s): '$(($duplicityId.Name | select -Unique) -join ', ')' is there more than once" - return + throw "Batch requests must have unique ids. Id(s): '$(($duplicityId.Name | Select-Object -Unique) -join ', ')' is there more than once" } Write-Debug ($requestChunk | ConvertTo-Json -Depth 10) - Write-Host "Processing batch of $($requestChunk.count) request(s):`n$(($requestChunk | Sort-Object Url | % {" → $($_.Url)"} ) -join "`n")" + Write-Verbose "Processing batch of $($requestChunk.count) request(s):`n$(($requestChunk | Sort-Object Url | ForEach-Object {" - $($_.Id) - $($_.Url)"} ) -join "`n")" #region process given chunk of batch requests $start = Get-Date @@ -245,274 +285,325 @@ Write-Verbose $body - Invoke-MgRestMethod -Method Post -Uri $requestUri -Body $body -ContentType "application/json" -OutputType PSObject | % { - $responses = $_.responses + $reqError = $null + do { + try { + Invoke-MgRestMethod -Method Post -Uri $requestUri -Body $body -ContentType "application/json" -OutputType PSObject -ErrorAction Stop | ForEach-Object { + $responses = $_.responses - #region return the output - if ($dontBeautifyResult) { - # return original response + #region return the output + if ($dontBeautifyResult) { + # return original response - $responses - } else { - # return just actually requested data without batch-related properties and enhance the returned object with 'RequestId' property for easier filtering + $responses + } else { + # return just actually requested data without batch-related properties and enhance the returned object with 'RequestId' property for easier filtering - foreach ($response in $responses) { - $value, $noteProperty = $null - if ($response.body) { $noteProperty = $response.body | Get-Member -MemberType NoteProperty } + foreach ($response in $responses) { + $value, $noteProperty = $null + if ($response.body) { $noteProperty = $response.body | Get-Member -MemberType NoteProperty } - # there was some error, no real values were returned, skipping - if ($response.Status -in (400..509)) { - continue - } + # there was some error, no real values were returned, skipping + if ($response.Status -in (400..509)) { + continue + } - if ($response.body.value) { - # the result is stored in 'value' property - $value = $response.body.value - } elseif ($response.body -and $noteProperty.Name -contains '@odata.context' -and $noteProperty.Name -contains 'value') { - # the result is stored in 'value' property, but no results were returned, skipping - continue - } elseif ($response.body) { - # the result is in the 'body' property itself - $value = $response.body - } else { - # no results in 'body.value' nor 'body' property itself - continue - } + if ($response.body.value) { + # the result is stored in 'value' property + $value = $response.body.value + } elseif ($response.body -and $noteProperty.Name -contains '@odata.context' -and $noteProperty.Name -contains 'value') { + # the result is stored in 'value' property, but no results were returned, skipping + continue + } elseif ($response.body) { + # the result is in the 'body' property itself + $value = $response.body + } else { + # no results in 'body.value' nor 'body' property itself + continue + } - # return processed output - $primitiveTypeList = 'String', 'Int32', 'Int64', 'Boolean', 'Float', 'Double', 'Decimal', 'Char' + # return processed output + $primitiveTypeList = 'String', 'Int32', 'Int64', 'Boolean', 'Float', 'Double', 'Decimal', 'Char' - if ($value.gettype().name -in $primitiveTypeList -or $value[0].gettype().name -in $primitiveTypeList) { - # it is a primitive (or list of primitives) + if ($value.gettype().name -in $primitiveTypeList -or $value[0].gettype().name -in $primitiveTypeList) { + # it is a primitive (or list of primitives) - if ($dontAddRequestId) { - $value - } else { - [PSCustomObject]@{ - Value = $value - RequestId = $response.Id - } - } - } else { - # it is a complex object (hashtable, ..) + if ($dontAddRequestId) { + $value + } else { + [PSCustomObject]@{ + Value = $value + RequestId = $response.Id + } + } + } else { + # it is a complex object (hashtable, ..) - # properties to return - $property = @("*") - if (!$dontAddRequestId) { - $property += @{n = 'RequestId'; e = { $response.Id } } - } + # properties to return + $property = @("*") + if (!$dontAddRequestId) { + $property += @{n = 'RequestId'; e = { $response.Id } } + } - $value | select -Property $property -ExcludeProperty '@odata.context', '@odata.nextLink' + $value | Select-Object -Property $property -ExcludeProperty '@odata.context', '@odata.nextLink' + } + } } - } - } - #endregion return the output - - #region handle the responses based on their status code - # load the next pages, retry throttled requests, repeat failed requests, ... + #endregion return the output - $failedBatchJob = [System.Collections.Generic.List[Object]]::new() + #region handle the responses based on their status code + # load the next pages, retry throttled requests, repeat failed requests, ... - foreach ($response in $responses) { - # https://learn.microsoft.com/en-us/graph/errors#http-status-codes - if ($response.Status -in 200, 201, 204) { - # success + $failedBatchJob = [System.Collections.Generic.List[Object]]::new() - if ($response.body.'@odata.nextLink') { - # paginated (get remaining results by query returned NextLink URL) + foreach ($response in $responses) { + # https://learn.microsoft.com/en-us/graph/errors#http-status-codes + if ($response.Status -in 200, 201, 204) { + # success - if ($dontFollowNextLink) { - Write-Verbose "Batch result for request '$($response.Id)' is paginated. But 'dontFollowNextLink' switch is set, hence nextLink will not be followed" + if ($response.body.'@odata.nextLink') { + # paginated (get remaining results by query returned NextLink URL) - continue - } else { - Write-Verbose "Batch result for request '$($response.Id)' is paginated. Nextlink will be processed in the next batch" - } + if ($dontFollowNextLink) { + Write-Verbose "Batch result for request '$($response.Id)' is paginated. But 'dontFollowNextLink' switch is set, hence nextLink will not be followed" - $relativeNextLink = $response.body.'@odata.nextLink' -replace [regex]::Escape("https://graph.microsoft.com/$graphVersion/") - # make a request object copy, so I can modify it without interfering with the original object - $nextLinkRequest = $requestChunk | ? Id -EQ $response.Id | ConvertTo-Json -Depth 10 | ConvertFrom-Json - # replace original URL with the nextLink - $nextLinkRequest.URL = $relativeNextLink - # add the request for later processing - $extraRequestChunk.Add($nextLinkRequest) - } - } elseif ($response.Status -in 429, 509) { - # throttled (will be repeated after given time) + continue + } else { + Write-Verbose "Batch result for request '$($response.Id)' is paginated. Nextlink will be processed in the next batch" + } - $jobRetryAfter = $response.Headers.'Retry-After' - $throttledBatchRequest = $requestChunk | ? Id -EQ $response.Id + $relativeNextLink = $response.body.'@odata.nextLink' -replace [regex]::Escape("https://graph.microsoft.com/$graphVersion/") + # make a request object copy, so I can modify it without interfering with the original object + $nextLinkRequest = $requestChunk | Where-Object Id -EQ $response.Id | ConvertTo-Json -Depth 10 | ConvertFrom-Json + # replace original URL with the nextLink + $nextLinkRequest.URL = $relativeNextLink + # add the request for later processing + $extraReqBag.Add($nextLinkRequest) + } + } elseif ($response.Status -in 429, 509) { + # throttled (will be repeated after given time) - Write-Verbose "Batch request with Id: '$($throttledBatchRequest.Id)', Url:'$($throttledBatchRequest.Url)' was throttled, hence will be repeated after $jobRetryAfter seconds" + $jobRetryAfter = $response.Headers.'Retry-After' + $throttledBatchRequest = $requestChunk | Where-Object Id -EQ $response.Id - if ($jobRetryAfter -eq 0) { - # request can be repeated without any delay - #TIP for performance reasons adding to $extraRequestChunk batch (to avoid invocation of unnecessary batch job) - $extraRequestChunk.Add($throttledBatchRequest) - } else { - # request can be repeated after delay - # add the request for later processing - $throttledRequestChunk.Add($throttledBatchRequest) - } + Write-Verbose "Batch request with Id: '$($throttledBatchRequest.Id)', Url:'$($throttledBatchRequest.Url)' was throttled, hence will be repeated after $jobRetryAfter seconds" - # get highest retry-after wait time - if ($jobRetryAfter -gt $script:retryAfter) { - Write-Verbose "Setting $jobRetryAfter retry-after time" - $script:retryAfter = $jobRetryAfter - } - } elseif ($response.Status -in 500, 502, 503, 504) { - # some internal error on remote side (will be repeated) + if ($jobRetryAfter -eq 0) { + # request can be repeated without any delay + #TIP for performance reasons adding to $extraReqBag bag (to avoid invocation of unnecessary batch job) + $extraReqBag.Add($throttledBatchRequest) + } else { + # request can be repeated after delay + # add the request for later processing + $throttledReqBag.Add($throttledBatchRequest) + } - $problematicBatchRequest = $requestChunk | ? Id -EQ $response.Id + # get highest retry-after wait time + if ($jobRetryAfter -gt 0) { + Write-Verbose "Setting $jobRetryAfter retry-after time" + } + $retryAfterBag.Add([int]$jobRetryAfter) + } elseif ($response.Status -in 500, 502, 503, 504) { + # some internal error on remote side (will be repeated) - Write-Verbose "Batch request with Id: '$($problematicBatchRequest.Id)', Url:'$($problematicBatchRequest.Url)' had internal error '$($response.body.error.message)', Code: $($response.Status), hence will be repeated" + $problematicBatchRequest = $requestChunk | Where-Object Id -EQ $response.Id - $extraRequestChunk.Add($problematicBatchRequest) - } else { - # failed + Write-Verbose "Batch request with Id: '$($problematicBatchRequest.Id)', Url:'$($problematicBatchRequest.Url)' had internal error '$($response.body.error.message)', Code: $($response.Status), hence will be repeated" - $failedBatchRequest = $requestChunk | ? Id -EQ $response.Id + $extraReqBag.Add($problematicBatchRequest) + } else { + # failed - $innerErrorText = $null - if ($response.body.error.innerError.code) { - $innerErrorText = " (" + $response.body.error.innerError.code + ")" - } + $failedBatchRequest = $requestChunk | Where-Object Id -EQ $response.Id - $errorText = $null - if ($response.body.error.message) { - # sometimes the error message is not a plain string, but a JSON - if (Is-JSON -InputString $response.body.error.message) { - $errorText = $response.body.error.message | ConvertFrom-Json -ErrorAction Stop + $innerErrorText = $null + if ($response.body.error.innerError.code) { + $innerErrorText = " (" + $response.body.error.innerError.code + ")" + } - if ($errorText.Error.Message) { - $errorText = $errorText.Error.Message + "($($response.body.error.code))" - } elseif ($errorText.Message) { - $errorText = $errorText.Message + " ($($response.body.error.code))" - } else { + $errorText = $null + if ($response.body.error.message) { + # sometimes the error message is not a plain string, but a JSON + if (Is-JSON -InputString $response.body.error.message) { + $errorText = $response.body.error.message | ConvertFrom-Json -ErrorAction Stop + + if ($errorText.Error.Message) { + $errorText = $errorText.Error.Message + "($($response.body.error.code))" + } elseif ($errorText.Message) { + $errorText = $errorText.Message + " ($($response.body.error.code))" + } else { + $errorText = $response.body.error.code + } + } else { + # not a JSON, just a string + $errorText = $response.body.error.message + } + } elseif ($response.body.error.code) { $errorText = $response.body.error.code + } else { + # no error message, just the status code } - } else { - # not a JSON, just a string - $errorText = $response.body.error.message + + $failedBatchJob.Add( + @{ + Id = $response.Id + Url = $failedBatchRequest.Url + StatusCode = $response.Status + Error = "$($errorText)$innerErrorText" + Object = [ordered]@{ + request = $failedBatchRequest + response = $response + } + } + ) } - } elseif ($response.body.error.code) { - $errorText = $response.body.error.code - } else { - # no error message, just the status code } - $failedBatchJob.Add( - @{ - Id = $response.Id - Url = $failedBatchRequest.Url - StatusCode = $response.Status - Error = "$($errorText)$innerErrorText" - Object = [ordered]@{ - request = $failedBatchRequest - response = $response + # return error if critical failure occurred + if ($failedBatchJob) { + if ($separateErrors) { + # output errors one by one, so you can handle them separately if needed + $failedBatchJob | ForEach-Object { + #TIP only the first one will be returned if $ErrorActionPreference is set to stop! + $errorMsg = "`nFailed batch request:`n$(" - Id: '$($_.Id)'", " - Url: '$($_.Url)'", " - StatusCode: '$($_.StatusCode)'", " - Error: '$($_.Error)'`n`n" -join "`n")" + $exception = New-Object System.InvalidOperationException $errorMsg + $exception.Source = "BatchRequest" + + Write-Error -ErrorRecord (New-Object System.Management.Automation.ErrorRecord($exception, $null, "InvalidOperation", $_.Object)) } - } - ) - } - } + } else { + #TIP all errors at once, because batch can contain non-related requests and if errorAction is set to stop, only the first error would be returned, which can be confusing + $errorMsg = "`nFollowing batch request(s) failed:`n`n$(($failedBatchJob | ForEach-Object { " - Id: '$($_.Id)'", " - Url: '$($_.Url)'", " - StatusCode: '$($_.StatusCode)'", " - Error: '$($_.Error)'" -join "`n" }) -join "`n`n")" + $exception = New-Object System.InvalidOperationException $errorMsg + $exception.Source = "BatchRequest" - # return error if critical failure occurred - if ($failedBatchJob) { - if ($separateErrors) { - # output errors one by one, so you can handle them separately if needed - $failedBatchJob | % { - #TIP only the first one will be returned if $ErrorActionPreference is set to stop! - $errorMsg = "`nFailed batch request:`n$(" - Id: '$($_.Id)'", " - Url: '$($_.Url)'", " - StatusCode: '$($_.StatusCode)'", " - Error: '$($_.Error)'`n`n" -join "`n")" - $exception = New-Object System.InvalidOperationException $errorMsg - $exception.Source = "BatchRequest" - - Write-Error -ErrorRecord (New-Object System.Management.Automation.ErrorRecord($exception, $null, "InvalidOperation", $_.Object)) + Write-Error -ErrorRecord (New-Object System.Management.Automation.ErrorRecord($exception, $null, "InvalidOperation", $failedBatchJob.Object)) + } } + #endregion handle the responses based on their status code + } + } catch { + if ($_.Exception.Source -eq "System.Net.Http" -and $_.Exception.InnerException.HResult -eq "-2146233083") { + $reqError = "timeOut" + Write-Warning "Network error occurred while trying to invoke Graph batch request ($($_.Exception.Message)). Retrying in 5 seconds..." + Start-Sleep -Seconds 5 } else { - #TIP all errors at once, because batch can contain non-related requests and if errorAction is set to stop, only the first error would be returned, which can be confusing - $errorMsg = "`nFollowing batch request(s) failed:`n`n$(($failedBatchJob | % { " - Id: '$($_.Id)'", " - Url: '$($_.Url)'", " - StatusCode: '$($_.StatusCode)'", " - Error: '$($_.Error)'" -join "`n" }) -join "`n`n")" - $exception = New-Object System.InvalidOperationException $errorMsg - $exception.Source = "BatchRequest" - - Write-Error -ErrorRecord (New-Object System.Management.Automation.ErrorRecord($exception, $null, "InvalidOperation", $failedBatchJob.Object)) + throw $_ } } - #endregion handle the responses based on their status code - } + } while ($reqError -eq "timeOut") $end = Get-Date Write-Verbose "It took $((New-TimeSpan -Start $start -End $end).TotalSeconds) seconds to process the batch" #endregion process given chunk of batch requests } + #endregion helper functions + + # flatten the batch request array + if ($batchRequest | Where-Object { $_ -and $_.GetType().BaseType -eq [System.Array] }) { + $batchRequest = ConvertTo-FlatArray -inputArray $batchRequest + } + + Write-Verbose "Total number of requests to process is $($batchRequest.count)" + + if ($dontBeautifyResult -and $dontAddRequestId) { + Write-Verbose "'dontAddRequestId' parameter will be ignored, 'RequestId' property is not being added when 'dontBeautifyResult' parameter is used" + } + + # api batch requests are limited to 20 requests + $chunkSize = 20 + # base graph api uri + $uri = "https://graph.microsoft.com" + # batch uri + $requestUri = "$uri/$graphVersion/`$batch" + # buffer to hold all incoming requests from pipeline + $allRequest = [System.Collections.Generic.List[Object]]::new() } process { + # flatten the batch request array + if ($batchRequest | Where-Object { $_ -and $_.GetType().BaseType -eq [System.Array] }) { + $batchRequest = ConvertTo-FlatArray -inputArray $batchRequest + } + # check url validity - $batchRequest.URL | % { + $batchRequest.URL | ForEach-Object { if ($_ -like "http*" -or $_ -like "*/beta/*" -or $_ -like "*/v1.0/*" -or $_ -like "*/graph.microsoft.com/*") { - Write-Warning "url '$_' has to be relative (without the whole 'https://graph.microsoft.com/' part)!" - return + throw "url '$_' has to be relative (without the whole 'https://graph.microsoft.com/' part)!" } } foreach ($request in $batchRequest) { - $requestChunk.Add($request) - - # check if the buffer has reached the required chunk size - if ($requestChunk.count -eq $chunkSize) { - [int] $script:retryAfter = 0 - _processChunk $requestChunk - - # clear the buffer - $requestChunk.Clear() - - # process requests that need to be repeated (paginated, failed on remote server,...) - if ($extraRequestChunk) { - Write-Verbose "Processing $($extraRequestChunk.count) paginated or server-side-failed request(s)" - - $PSBoundParameters['batchRequest'] = $extraRequestChunk - Invoke-GraphBatchRequest @PSBoundParameters - - $extraRequestChunk.Clear() - } - - # process throttled requests - if ($throttledRequestChunk) { - Write-Verbose "Processing $($throttledRequestChunk.count) throttled request(s) with $script:retryAfter seconds wait time" - - Start-Sleep -Seconds $script:retryAfter - - $PSBoundParameters['batchRequest'] = $throttledRequestChunk - Invoke-GraphBatchRequest @PSBoundParameters - - $throttledRequestChunk.Clear() - } - } + $allRequest.Add($request) } } end { - # process any remaining requests in the buffer + # process all accumulated requests in parallelized 20-item chunks + if ($allRequest.Count -gt 0) { + $extraReqBag = [System.Collections.Concurrent.ConcurrentBag[Object]]::new() + $throttledReqBag = [System.Collections.Concurrent.ConcurrentBag[Object]]::new() + $retryAfterBag = [System.Collections.Concurrent.ConcurrentBag[Int32]]::new() + + $requestChunkList = @(for ($i = 0; $i -lt $allRequest.Count; $i += $chunkSize) { + , @($allRequest | Select-Object -Skip $i -First $chunkSize) + }) + + # if there are at least two request chunks to process, run in parallel, otherwise run sequentially to avoid unnecessary overhead of parallelization + $useParallel = $PSVersionTable.PSEdition -eq "Core" -and $allRequest.Count -gt $chunkSize + + if ($useParallel) { + $processChunkDefinition = "function _processChunk { ${function:_processChunk} }" + + Write-Verbose "Running in parallel with throttle limit of $throttleLimit threads because both requirements are met (running in PSH Core, number of requests to process ($($allRequest.Count)))" + $requestChunkList | ForEach-Object -Parallel { + # recreate the function in the parallel runspace + . ([ScriptBlock]::Create($using:processChunkDefinition)) + + $param = @{ + requestChunk = $_ + requestUri = $using:requestUri + graphVersion = $using:graphVersion + dontBeautifyResult = [bool]$using:dontBeautifyResult + dontAddRequestId = [bool]$using:dontAddRequestId + dontFollowNextLink = [bool]$using:dontFollowNextLink + separateErrors = [bool]$using:separateErrors + extraReqBag = $using:extraReqBag + throttledReqBag = $using:throttledReqBag + retryAfterBag = $using:retryAfterBag + } + if ($using:VerbosePreference) { + $param.Verbose = $true + } + _processChunk @param + } -ThrottleLimit $throttleLimit + } else { + Write-Verbose "Running sequentially as not being run in PowerShell Core and/or there are just $($allRequest.Count) requests to process" + $requestChunkList | ForEach-Object { + _processChunk -requestChunk $_ -requestUri $requestUri -graphVersion $graphVersion -dontBeautifyResult ([bool]$dontBeautifyResult) -dontAddRequestId ([bool]$dontAddRequestId) -dontFollowNextLink ([bool]$dontFollowNextLink) -separateErrors ([bool]$separateErrors) -extraReqBag $extraReqBag -throttledReqBag $throttledReqBag -retryAfterBag $retryAfterBag + } + } - if ($requestChunk.Count -gt 0) { [int] $script:retryAfter = 0 - _processChunk $requestChunk + if ($retryAfterBag.Count -gt 0) { + $script:retryAfter = ($retryAfterBag | Measure-Object -Maximum).Maximum + } # process requests that need to be repeated (paginated, failed on remote server,...) - if ($extraRequestChunk) { - Write-Verbose "Processing $($extraRequestChunk.count) paginated or server-side-failed request(s)" - $PSBoundParameters['batchRequest'] = $extraRequestChunk + if ($extraReqBag.Count -gt 0) { + Write-Warning "Processing $($extraReqBag.count) paginated or server-side-failed request(s)" + $PSBoundParameters['batchRequest'] = [Object[]]$extraReqBag Invoke-GraphBatchRequest @PSBoundParameters } # process throttled requests - if ($throttledRequestChunk) { - Write-Verbose "Processing $($throttledRequestChunk.count) throttled request(s) with $script:retryAfter seconds wait time" + if ($throttledReqBag.Count -gt 0) { + Write-Warning "Processing $($throttledReqBag.count) throttled request(s) with $script:retryAfter seconds wait time" Start-Sleep -Seconds $script:retryAfter - $PSBoundParameters['batchRequest'] = $throttledRequestChunk + $PSBoundParameters['batchRequest'] = [Object[]]$throttledReqBag Invoke-GraphBatchRequest @PSBoundParameters } } diff --git a/src/internal/SaveAs-SortedJSON.ps1 b/src/internal/SaveAs-SortedJSON.ps1 new file mode 100644 index 0000000..cb6392a --- /dev/null +++ b/src/internal/SaveAs-SortedJSON.ps1 @@ -0,0 +1,28 @@ +function SaveAs-SortedJSON { + [CmdletBinding()] + param ( + [Parameter(Mandatory = $true, ValueFromPipeline = $true, Position = 0)] + [object]$Item, + + [Parameter(Mandatory = $true, Position = 1)] + [ValidateScript( { + if ($_ -match "\.\w+$") { + $true + } else { + throw "$_ is not a valid JSON file path. Enter in 'c:\destination\file.json' format" + } + })] + [string]$Path + ) + + begin { + if (-not $Item) { + return + } + } + + process { + # RequestId, RequestName are batch api request ids aka unrelated + $Item | Select-Object * -ExcludeProperty RequestId, RequestName | ConvertTo-OrderedDictionary | ConvertTo-Json -Depth 100 -WarningAction SilentlyContinue | Out-File (New-Item -Path $Path -Force) + } +} \ No newline at end of file