diff --git a/fastlane/lanes/catalog_strings_helper.rb b/fastlane/lanes/catalog_strings_helper.rb new file mode 100644 index 000000000000..ee78f5fc8535 --- /dev/null +++ b/fastlane/lanes/catalog_strings_helper.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +require_relative 'translation_validator' + +# Reverse fold for regular (non-plural) strings into a String Catalog (`Localizable.xcstrings`) — the catalog +# analogue of `PluralStrings.fold_translations!`. For each translatable key and target locale it sets the +# stringUnit to `human ?? existing-machine ?? AI ?? English` (human => `translated`; machine / English fallback +# => `needs_review`). Plain Ruby with no fastlane / gem dependencies, so it's unit-testable directly — the lane +# in `localization_catalog.rb` calls into it. +# +# REUSE-AWARE: a cell that already holds a valid machine translation (a `needs_review` value that isn't just the +# English source and still passes the placeholder gate) is kept untouched. That is the whole point of folding +# into the catalog rather than the legacy `.strings`: the catalog's `needs_review` state IS the persistence, so +# re-runs only translate genuinely-new gaps — no side-store, and a human translation from GlotPress supersedes a +# machine cell automatically on the next fold. +module CatalogStrings + module_function + + # Mutates `catalog`; returns the count of (key, locale) cells written. + # + # @param translations_by_locale [Hash{String=>Hash{String=>String}}] locale => { key => human value }, from + # the downloaded `.lproj/Localizable.strings`. + # @param locales [Array] target locales to fold (the source locale is skipped). + # @param ai_translator [#call] `call(entries, locale) => { key => translation }`, entries being + # `[{ key:, source:, comment: }]`. Optional; nil ⇒ the fill rung is skipped (English fallback). + def fold_translations!(catalog, translations_by_locale:, locales:, ai_translator: nil) + source = catalog['sourceLanguage'] || 'en' + sources = translatable_sources(catalog, source) + (locales - [source]).sum do |locale| + fold_locale!(catalog, locale, sources, translations_by_locale[locale] || {}, ai_translator) + end + end + + # { key => { source:, comment: } } for every translatable key — its explicit English value, or the key itself + # for key-as-source strings (genstrings's convention, where the English text *is* the key). Entries flagged + # `shouldTranslate: false` are skipped. + def translatable_sources(catalog, source) + (catalog['strings'] || {}).each_with_object({}) do |(key, body), acc| + next if body['shouldTranslate'] == false + + value = body.dig('localizations', source, 'stringUnit', 'value') || key + acc[key] = { source: value, comment: body['comment'] } unless value.to_s.empty? + end + end + private_class_method :translatable_sources + + # Fold one locale: resolve the human/reused cells, translate only what's left, write them all. Returns the + # number of cells written. + def fold_locale!(catalog, locale, sources, human, ai_translator) + plan = plan_locale(catalog, locale, sources, human) + cells = plan[:cells].merge(machine_cells(plan[:fresh], translate(ai_translator, plan[:fresh], locale))) + cells.each { |key, unit| set_cell!(catalog, key, locale, unit) } + cells.size + end + private_class_method :fold_locale! + + # { key => machine stringUnit } for the fresh entries: the validated AI translation, or the English source as + # a flagged fallback where the model returned nothing. Disjoint from the human/reused cells. + def machine_cells(fresh, ai_reply) + fresh.to_h { |entry| [entry[:key], ai_cell(ai_reply[entry[:key]], entry[:source])] } + end + private_class_method :machine_cells + + # Partition this locale's keys into ready `cells` ({ key => stringUnit }: human ⇒ translated, reusable machine + # ⇒ kept) and `fresh` ([{ key:, source:, comment: }] needing the model). + def plan_locale(catalog, locale, sources, human) + cells = {} + fresh = [] + sources.each do |key, info| + human_value = human[key] + if !human_value.to_s.empty? + cells[key] = cell('translated', human_value) + elsif (reused = reusable_cell(catalog, key, locale, info[:source])) + cells[key] = reused + else + fresh << { key: key, source: info[:source], comment: info[:comment] } + end + end + { cells: cells, fresh: fresh } + end + private_class_method :plan_locale + + # The existing machine cell to keep, or nil: a stringUnit whose value is present, isn't just the English + # source (an unfilled English fallback we should retry), and still satisfies the placeholder gate. + def reusable_cell(catalog, key, locale, source) + unit = catalog.dig('strings', key, 'localizations', locale, 'stringUnit') + return nil if unit.nil? + + value = unit['value'].to_s + return nil if value.empty? || value == source || !TranslationValidator.placeholders_match?(source, value) + + unit + end + private_class_method :reusable_cell + + def translate(ai_translator, fresh, locale) + return {} if ai_translator.nil? || fresh.empty? + + ai_translator.call(fresh, locale) || {} + end + private_class_method :translate + + # A machine cell: the validated AI translation if present, else the English source as a flagged fallback. + def ai_cell(translation, source) + cell('needs_review', translation.to_s.empty? ? source : translation) + end + private_class_method :ai_cell + + def set_cell!(catalog, key, locale, unit) + localizations = (catalog['strings'][key]['localizations'] ||= {}) + localizations[locale] = { 'stringUnit' => unit } + end + private_class_method :set_cell! + + def cell(state, value) + { 'state' => state, 'value' => value } + end + private_class_method :cell +end diff --git a/fastlane/lanes/catalog_strings_helper_test.rb b/fastlane/lanes/catalog_strings_helper_test.rb new file mode 100644 index 000000000000..9e872b4a9852 --- /dev/null +++ b/fastlane/lanes/catalog_strings_helper_test.rb @@ -0,0 +1,144 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for CatalogStrings.fold_translations! — the regular-string reverse fold into +# Localizable.xcstrings. Run directly: `ruby fastlane/lanes/catalog_strings_helper_test.rb`. No bundle / network +# (the AI tier is a stub lambda). +require 'minitest/autorun' +require_relative 'catalog_strings_helper' + +# Exercises provenance (human => translated; machine / English fallback => needs_review), the reuse rule (a +# valid existing machine cell is kept and not re-translated; an English-fallback or placeholder-broken cell is +# retried), key-as-source handling, shouldTranslate, and the batched per-locale AI call. +class CatalogStringsFoldTest < Minitest::Test + def unit(state, value) + { 'stringUnit' => { 'state' => state, 'value' => value } } + end + + # A catalog entry with an explicit English value, optional comment, and optional pre-existing localizations. + def entry(english, comment: nil, locs: {}) + body = { 'localizations' => { 'en' => unit('translated', english) }.merge(locs) } + body['comment'] = comment if comment + body + end + + def catalog(strings) + { 'sourceLanguage' => 'en', 'version' => '1.0', 'strings' => strings } + end + + def cell(cat, key, locale) + cat.dig('strings', key, 'localizations', locale, 'stringUnit') + end + + # An AI stub returning `reply` ({ key => translation }), recording each (entries, locale) call. + def recording_translator(reply:, calls:) + lambda do |entries, locale| + calls << { entries: entries, locale: locale } + reply + end + end + + def fold(cat, translations: {}, locales: %w[en fr], ai_translator: nil) + CatalogStrings.fold_translations!(cat, translations_by_locale: translations, locales: locales, ai_translator: ai_translator) + end + + def test_human_translation_is_used_and_marked_translated + cat = catalog('a' => entry('Save')) + written = fold(cat, translations: { 'fr' => { 'a' => 'Enregistrer' } }) + + assert_equal 1, written + assert_equal({ 'state' => 'translated', 'value' => 'Enregistrer' }, cell(cat, 'a', 'fr')) + end + + def test_ai_fills_missing_and_marks_needs_review + cat = catalog('a' => entry('Save')) + fold(cat, ai_translator: recording_translator(reply: { 'a' => 'Enregistrer' }, calls: [])) + + assert_equal({ 'state' => 'needs_review', 'value' => 'Enregistrer' }, cell(cat, 'a', 'fr')) + end + + def test_english_fallback_when_no_human_and_no_ai + cat = catalog('a' => entry('Save')) + fold(cat) + + assert_equal({ 'state' => 'needs_review', 'value' => 'Save' }, cell(cat, 'a', 'fr')) + end + + def test_existing_machine_cell_is_reused_without_calling_the_model + cat = catalog('a' => entry('Save', locs: { 'fr' => unit('needs_review', 'Enregistrer') })) + calls = [] + fold(cat, ai_translator: recording_translator(reply: {}, calls: calls)) + + assert_empty calls, 'a reusable machine cell must not trigger a model call' + assert_equal({ 'state' => 'needs_review', 'value' => 'Enregistrer' }, cell(cat, 'a', 'fr')) + end + + def test_english_fallback_cell_is_retried_not_reused + # A prior cell whose value is just the English source was an unfilled fallback — retry it. + cat = catalog('a' => entry('Save', locs: { 'fr' => unit('needs_review', 'Save') })) + calls = [] + fold(cat, ai_translator: recording_translator(reply: { 'a' => 'Enregistrer' }, calls: calls)) + + assert_equal(['a'], calls.first[:entries].map { |e| e[:key] }) + assert_equal({ 'state' => 'needs_review', 'value' => 'Enregistrer' }, cell(cat, 'a', 'fr')) + end + + def test_placeholder_broken_cell_is_retried + cat = catalog('a' => entry('%1$d posts', locs: { 'fr' => unit('needs_review', 'articles') })) + fold(cat, ai_translator: recording_translator(reply: { 'a' => '%1$d articles' }, calls: [])) + + assert_equal({ 'state' => 'needs_review', 'value' => '%1$d articles' }, cell(cat, 'a', 'fr')) + end + + def test_human_supersedes_existing_machine_cell + cat = catalog('a' => entry('Save', locs: { 'fr' => unit('needs_review', 'old machine value') })) + fold(cat, translations: { 'fr' => { 'a' => 'Enregistrer' } }) + + assert_equal({ 'state' => 'translated', 'value' => 'Enregistrer' }, cell(cat, 'a', 'fr')) + end + + def test_key_as_source_string_uses_the_key_as_english + cat = catalog('%1$@ on %2$@' => {}) # no English localization: the key is the source + calls = [] + fold(cat, ai_translator: recording_translator(reply: {}, calls: calls)) + + assert_equal '%1$@ on %2$@', calls.first[:entries].first[:source] + assert_equal({ 'state' => 'needs_review', 'value' => '%1$@ on %2$@' }, cell(cat, '%1$@ on %2$@', 'fr')) + end + + def test_should_translate_false_is_skipped + cat = catalog( + 'a' => entry('Save'), + 'b' => entry('WordPress').merge('shouldTranslate' => false) + ) + written = fold(cat) + + assert_equal 1, written + assert_nil cell(cat, 'b', 'fr'), 'shouldTranslate:false entries get no translations' + end + + def test_source_locale_is_not_folded + cat = catalog('a' => entry('Save')) + original_en = cat.dig('strings', 'a', 'localizations', 'en') + fold(cat, locales: %w[en fr]) + + assert_same original_en, cat.dig('strings', 'a', 'localizations', 'en') + end + + def test_ai_called_once_per_locale_with_batched_entries + cat = catalog('a' => entry('Save'), 'b' => entry('Posts: %1$d', comment: 'count')) + calls = [] + fold(cat, ai_translator: recording_translator(reply: { 'a' => 'Enregistrer', 'b' => 'Articles : %1$d' }, calls: calls)) + + assert_equal 1, calls.size + assert_equal 'fr', calls.first[:locale] + assert_equal( + [{ key: 'a', source: 'Save', comment: nil }, { key: 'b', source: 'Posts: %1$d', comment: 'count' }], + calls.first[:entries] + ) + end + + def test_counts_cells_across_locales + cat = catalog('a' => entry('Save')) + assert_equal 2, fold(cat, locales: %w[en fr de]) + end +end diff --git a/fastlane/lanes/localization_catalog.rb b/fastlane/lanes/localization_catalog.rb index 5417953f4203..6748d5fb6971 100644 --- a/fastlane/lanes/localization_catalog.rb +++ b/fastlane/lanes/localization_catalog.rb @@ -3,7 +3,9 @@ require 'json' require 'tmpdir' require 'fileutils' +require 'open3' require_relative 'catalog_helper' +require_relative 'catalog_strings_helper' ################################################# # Catalog generation (forward / extraction) @@ -81,6 +83,55 @@ end end + # STEP 2 (download) — pull the current GlotPress translations for the given locales into their `*.lproj` dirs, + # ready for `localize_catalog` to fold. A thin, scoped wrapper around the GlotPress download (the full + # `download_localized_strings` works too, if you want every locale plus the plural fold). Scope with + # `locales:fr,de`; default is all ship locales. Doesn't commit — the `.strings` are transient input. + desc 'Download step: pull GlotPress translations into the *.lproj dirs (then run localize_catalog)' + lane :download_catalog_strings do |options| + locales = catalog_target_locales(options[:locales]) + ios_download_strings_files_from_glotpress( + project_url: GLOTPRESS_APP_STRINGS_PROJECT_URL, + locales: locales, + download_dir: File.join(PROJECT_ROOT_FOLDER, 'WordPress', 'Resources') + ) + UI.success("Downloaded GlotPress translations for #{locales.values.uniq.size} locale(s). Next: run localize_catalog (same locales:).") + end + + # LOCALIZE (fold + AI-fill) — fill the per-locale translations into the EXISTING Localizable.xcstrings: fold + # the downloaded GlotPress translations in (human => translated), then AI-fill the cells they leave empty + # (=> needs_review). It does NOT scan the code and does NOT download — so each stage is its own invocation: + # generate_strings_catalog (scan) -> download_catalog_strings (download) -> localize_catalog (this). + # Running the scan separately means you can refresh the catalog without ever touching the AI. + # + # Uploading the AI drafts back to GlotPress as needs-review (the eventual "step 4") is a separate step, not + # done here — it builds on the existing GlotPress import integration (cf. gp_update_metadata_source). + # + # STAGED, NOT SHIPPED: Localizable.xcstrings isn't the runtime store yet (the app still ships + # Localizable.strings), so this only pre-populates it for the cutover — it changes nothing users see. + # + # MANUAL ONLY — not wired into download_localized_strings or any CI step: it calls the translation API (cost) + # and commits a large catalog. Set ANTHROPIC_API_KEY for the AI rung; scope a cheap run with `locales:fr`. + desc 'Localize step: fold downloaded GlotPress translations + AI-fill into the existing Localizable.xcstrings (run generate_strings_catalog first)' + lane :localize_catalog do |options| + UI.user_error!("#{LOCALIZABLE_CATALOG} not found — run generate_strings_catalog first") unless File.exist?(LOCALIZABLE_CATALOG) + locales = catalog_target_locales(options[:locales]) + + # Fold the downloaded human translations in (=> translated), then AI-fill the cells they leave empty. + catalog = JSON.parse(File.read(LOCALIZABLE_CATALOG)) + written = CatalogStrings.fold_translations!( + catalog, + translations_by_locale: catalog_translations_by_locale(File.join(PROJECT_ROOT_FOLDER, 'WordPress', 'Resources')), + locales: locales.values.uniq, + ai_translator: catalog_ai_translator + ) + File.write(LOCALIZABLE_CATALOG, "#{JSON.pretty_generate(catalog)}\n") + UI.success("Built #{File.basename(LOCALIZABLE_CATALOG)}: folded #{written} cell(s) across #{locales.values.uniq.size} locale(s).") + + git_add(path: LOCALIZABLE_CATALOG, shell_escape: false) + git_commit(path: [LOCALIZABLE_CATALOG], message: 'Update Localizable.xcstrings translations (staged)', allow_nothing_to_commit: true) + end + ################################################# # Helpers ################################################# @@ -106,6 +157,15 @@ def catalog_excluded?(path) File.basename(path) == 'AppLocalizedString.swift' end + # Run `xcstringstool ` quietly via argv (no shell, so source paths with spaces are safe), capturing + # output and surfacing it only on failure. Used instead of fastlane's `sh` for these bulk calls: each passes + # hundreds of file paths (or a `--stringsdata` pair per file), so `sh` would echo a massive command line AND + # print a "Step: shell command" banner per call. Open3 keeps the run silent and banner-free. + def run_xcstringstool(*args) + output, status = Open3.capture2e('xcrun', 'xcstringstool', *args) + UI.user_error!("xcstringstool #{args.first} failed:\n#{output}") unless status.success? + end + # xcstringstool extract -> one .stringsdata per source file (basename-disambiguated). Chunked to stay under # the OS argument limit; each chunk gets its own output subdir (see below), which sync then consumes together. # `--SwiftUI-Text` (extract `Text("literal")`) is OFF by default and gated behind `swiftui:`. The app has @@ -124,10 +184,12 @@ def extract_stringsdata(files:, output_dir:, swiftui: false) # source basename and only disambiguates collisions WITHIN a single invocation — so two same-named files # in different chunks (e.g. the two NSDate+Helpers.swift / SupportDataProvider.swift) would otherwise # overwrite each other in a shared dir and silently drop strings. - files.each_slice(400).with_index do |chunk, index| + batches = files.each_slice(400).to_a + batches.each_with_index do |chunk, index| chunk_dir = File.join(output_dir, "chunk-#{index}") FileUtils.mkdir_p(chunk_dir) - sh('xcrun', 'xcstringstool', 'extract', *chunk, *flags, '--output-directory', chunk_dir) + UI.message("Extracting strings… (batch #{index + 1}/#{batches.size})") + run_xcstringstool('extract', *chunk, *flags, '--output-directory', chunk_dir) end end @@ -144,7 +206,8 @@ def sync_localizable_catalog(stringsdata_dir:) stringsdata = stringsdata_files(stringsdata_dir) UI.user_error!('xcstringstool produced no .stringsdata') if stringsdata.empty? - sh('xcrun', 'xcstringstool', 'sync', LOCALIZABLE_CATALOG, *stringsdata.flat_map { |f| ['--stringsdata', f] }) + UI.message("Syncing #{stringsdata.count} extracted file(s) into #{File.basename(LOCALIZABLE_CATALOG)}…") + run_xcstringstool('sync', LOCALIZABLE_CATALOG, *stringsdata.flat_map { |f| ['--stringsdata', f] }) JSON.parse(File.read(LOCALIZABLE_CATALOG))['strings'].count end @@ -179,7 +242,7 @@ def current_english_values(stringsdata_dir) fresh = File.join(tmp, 'Localizable.xcstrings') File.write(fresh, "#{JSON.pretty_generate('sourceLanguage' => 'en', 'strings' => {}, 'version' => '1.0')}\n") stringsdata = stringsdata_files(stringsdata_dir) - sh('xcrun', 'xcstringstool', 'sync', fresh, *stringsdata.flat_map { |f| ['--stringsdata', f] }) + run_xcstringstool('sync', fresh, *stringsdata.flat_map { |f| ['--stringsdata', f] }) english_values(JSON.parse(File.read(fresh))) end end @@ -199,4 +262,46 @@ def report_catalog(path, extracted_count:, reconciled_count:) message += " Re-flagged #{reconciled_count} for review (English source changed)." if reconciled_count.positive? UI.success(message) end + + # The { glotpress => lproj } locale map to operate on: all ship locales, or the subset named in `locales:` + # (a comma-separated list of lproj codes, e.g. `locales:fr,de`) for a cheap scoped run. + def catalog_target_locales(spec) + return GLOTPRESS_TO_LPROJ_APP_LOCALE_CODES if spec.to_s.strip.empty? + + wanted = spec.to_s.split(',').map(&:strip) + selected = GLOTPRESS_TO_LPROJ_APP_LOCALE_CODES.select { |_glotpress, lproj| wanted.include?(lproj) } + UI.user_error!("No known ship locales among #{spec.inspect} (use lproj codes, e.g. fr,de,pt-BR)") if selected.empty? + selected + end + + # { lproj => { key => human value } } from the downloaded translation `.strings`. The flat plural keys present + # in these files aren't catalog keys, so the fold ignores them (they belong to Plurals.xcstrings). + def catalog_translations_by_locale(dir) + Dir.glob(File.join(dir, '*.lproj', 'Localizable.strings')).each_with_object({}) do |path, acc| + locale = File.basename(File.dirname(path), '.lproj') + acc[locale] = Fastlane::Helper::Ios::L10nHelper.read_strings_file_as_hash(path: path) + end + end + + # The AI tier for the catalog fold, or nil when ANTHROPIC_API_KEY isn't set (the fold then fills only human + + # English). Returns `call(entries, locale) => { key => translation }` via AITranslator#translate_all, + # degrading to {} on a per-locale API failure so one locale can't abort the whole fold. + def catalog_ai_translator + if ENV['ANTHROPIC_API_KEY'].to_s.empty? + UI.important('ANTHROPIC_API_KEY not set — folding human + English only; undefined cells stay English (needs_review).') + return nil + end + + require_relative 'ai_translator' + translator = AITranslator.with_anthropic + lambda do |entries, locale| + translator.translate_all(entries, locale: locale) + rescue StandardError => e + UI.error("AI catalog translation failed for #{locale} (#{e.message}); leaving its undefined cells to English.") + {} + end + rescue LoadError => e + UI.important("AI translation tier unavailable (#{e.message}); folding human + English only.") + nil + end end