From 29a906834ae1d9c1d17c9a8825979f0faaae77e9 Mon Sep 17 00:00:00 2001 From: Ryan Wallace Date: Thu, 7 May 2026 20:36:53 -0700 Subject: [PATCH 1/2] Download remote URLs via open-uri before passing to ogr2ogr GDAL's curl-based fetcher fails with "Empty reply from server" against some ArcGIS endpoints (e.g. Cloudflare-fronted servers that only offer HTTPS 1.1). Fetch remote URLs with Ruby's open-uri and hand ogr2ogr a local tempfile instead. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../importers/esri_geo_json.rb | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/spatial_features/importers/esri_geo_json.rb b/lib/spatial_features/importers/esri_geo_json.rb index a734f11..4d6a31d 100644 --- a/lib/spatial_features/importers/esri_geo_json.rb +++ b/lib/spatial_features/importers/esri_geo_json.rb @@ -1,4 +1,6 @@ require 'digest/md5' +require 'open-uri' +require 'tempfile' require 'spatial_features/importers/geo_json' module SpatialFeatures @@ -14,11 +16,24 @@ def geojson private - def esri_json_to_geojson(url) - if URI.parse(url).relative? - `ogr2ogr -t_srs EPSG:4326 -f GeoJSON /dev/stdout "#{url}"` # It is a local file path + def esri_json_to_geojson(path_or_url) + if URI.parse(path_or_url).relative? + `ogr2ogr -t_srs EPSG:4326 -f GeoJSON /dev/stdout "#{path_or_url}"` # It is a local file path else - `ogr2ogr -t_srs EPSG:4326 -f GeoJSON /dev/stdout "#{url}" OGRGeoJSON` + # Download the URL ourselves rather than letting GDAL's curl fetch it. + # Servers that only offer HTTPS 1.1 may cause GDAL's curl to fail, but Ruby's open-uri can handle it. + with_downloaded_file(path_or_url) do |path| + `ogr2ogr -t_srs EPSG:4326 -f GeoJSON /dev/stdout "#{path}"` + end + end + end + + def with_downloaded_file(url) + Tempfile.create(['esri_geojson', '.json']) do |tempfile| + tempfile.binmode + URI.open(url) { |io| IO.copy_stream(io, tempfile) } + tempfile.close + return yield(tempfile.path) end end end From 9d511cbfd8515033eb9b98dfd139e9e27984fcf5 Mon Sep 17 00:00:00 2001 From: Ryan Wallace Date: Thu, 7 May 2026 20:45:31 -0700 Subject: [PATCH 2/2] Paginate ArcGIS query responses past maxRecordCount ArcGIS query endpoints truncate each response at the service's maxRecordCount (commonly 1000 or 2000 features) and set exceededTransferLimit when more results exist. Walk the pages with resultOffset and merge them into a single FeatureCollection before handing to ogr2ogr. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../importers/esri_geo_json.rb | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/lib/spatial_features/importers/esri_geo_json.rb b/lib/spatial_features/importers/esri_geo_json.rb index 4d6a31d..24e0172 100644 --- a/lib/spatial_features/importers/esri_geo_json.rb +++ b/lib/spatial_features/importers/esri_geo_json.rb @@ -31,11 +31,52 @@ def esri_json_to_geojson(path_or_url) def with_downloaded_file(url) Tempfile.create(['esri_geojson', '.json']) do |tempfile| tempfile.binmode - URI.open(url) { |io| IO.copy_stream(io, tempfile) } + download_paginated(url, tempfile) tempfile.close return yield(tempfile.path) end end + + # ArcGIS query endpoints cap each response at the service's maxRecordCount + # (commonly 1000 or 2000 features) and signal exceededTransferLimit when + # more results are available. Walk the pages with resultOffset and merge + # them into a single FeatureCollection before handing to ogr2ogr. + def download_paginated(url, io) + combined = nil + offset = 0 + + loop do + page = JSON.parse(URI.open(paginated_url(url, offset)).read) + page_features = page['features'] || [] + + if combined.nil? + combined = page + else + combined['features'].concat(page_features) + end + + break if page_features.empty? || !exceeded_transfer_limit?(page) + offset += page_features.length + end + + combined&.delete('exceededTransferLimit') + combined&.fetch('properties', {})&.delete('exceededTransferLimit') + io.write(JSON.dump(combined)) if combined + end + + def exceeded_transfer_limit?(page) + page['exceededTransferLimit'] || page.dig('properties', 'exceededTransferLimit') + end + + def paginated_url(url, offset) + return url if offset.zero? + uri = URI.parse(url) + params = URI.decode_www_form(uri.query || '') + params.reject! { |k, _| k == 'resultOffset' } + params << ['resultOffset', offset.to_s] + uri.query = URI.encode_www_form(params) + uri.to_s + end end end end