diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml index 7e754a2..f7ba73f 100644 --- a/.github/workflows/autoblack.yml +++ b/.github/workflows/autoblack.yml @@ -1,23 +1,23 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack +# GitHub Action that uses Black to reformat the Python code in an +# incoming pull request. If all Python code in the pull request is +# compliant with Black then this Action does nothing. Othewrwise, +# Black is run and its changes are committed back to the incoming pull +# request. https://github.com/cclauss/autoblack +--- name: autoblack on: [pull_request] jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.13 - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - name: Set up Python 3.14 + uses: actions/setup-python@v6 with: python-version: 3.14 - name: Install click, black and isort - run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . + run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==8.0.1' - name: Run black --check --diff . run: black --check --diff . - name: If needed, commit black changes to the pull request diff --git a/.github/workflows/isort-and-black-checks.yml b/.github/workflows/isort-and-black-checks.yml deleted file mode 100644 index 1273de1..0000000 --- a/.github/workflows/isort-and-black-checks.yml +++ /dev/null @@ -1,32 +0,0 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack - -name: isort and black check -on: [pull_request] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.14 - uses: actions/setup-python@v5 - with: - python-version: 3.13 - - name: Install click, black and isort - run: pip install 'click==8.0.4' 'black==25.1.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . - - name: Run black --check . - run: black --check . - # - name: If needed, commit black changes to the pull request - # if: failure() - # run: | - # black . - # git config --global user.name 'autoblack' - # git config --global user.email 'rocky@users.noreply.github.com' - # git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY - # git checkout $GITHUB_HEAD_REF - # git commit -am "fixup: Format Python code with Black" - # git push diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e4d9cc..cc6612c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,23 @@ +--- default_language_version: python: python repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - - id: check-merge-conflict - - id: debug-statements - stages: [commit] - - id: end-of-file-fixer - stages: [commit] - - id: trailing-whitespace -- repo: https://github.com/psf/black + - id: check-merge-conflict + - id: debug-statements + stages: [commit] + - id: end-of-file-fixer + stages: [commit] + - id: trailing-whitespace + - repo: https://github.com/psf/black rev: 25.11.0 hooks: - - id: black - language_version: python3 - exclude: 'mathicsscript/version.py' -- repo: https://github.com/pycqa/flake8 + - id: black + language_version: python3 + exclude: 'pymathics/icu/version.py' + - repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 diff --git a/pymathics/icu/__init__.py b/pymathics/icu/__init__.py index 5e78af5..74fe247 100644 --- a/pymathics/icu/__init__.py +++ b/pymathics/icu/__init__.py @@ -26,7 +26,7 @@ = {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ} """ -from pymathics.icu.__main__ import Alphabet, AlphabeticOrder, Language +from pymathics.icu.__main__ import Alphabet, AlphabeticOrder from pymathics.icu.version import __version__ pymathics_version_data = { @@ -39,7 +39,6 @@ __all__ = [ "Alphabet", "AlphabeticOrder", - "Language", "pymathics_version_data", "__version__", ] diff --git a/pymathics/icu/__main__.py b/pymathics/icu/__main__.py index 45aac4c..dfe7500 100644 --- a/pymathics/icu/__main__.py +++ b/pymathics/icu/__main__.py @@ -4,13 +4,17 @@ Languages - Human-Language Alphabets and Locales via PyICU. """ -from typing import List, Optional +from dataclasses import dataclass +from typing import Any, Final, Optional -from icu import Collator, Locale, LocaleData +from icu import Collator, Locale, LocaleData, UCollAttribute, UCollAttributeValue +from mathics.builtin.system import LANGUAGE from mathics.core.atoms import Integer, String -from mathics.core.builtin import Builtin, Predefined +from mathics.core.builtin import Builtin from mathics.core.convert.expression import to_mathics_list from mathics.core.evaluation import Evaluation +from mathics.core.symbols import Symbol, SymbolFalse, SymbolTrue +from mathics.core.systemsymbols import SymbolAutomatic available_locales = Locale.getAvailableLocales() language2locale = { @@ -18,11 +22,124 @@ for locale_name, availableLocale in available_locales.items() } -# The current value of $Language -LANGUAGE = "English" +StringAutomatic: Final[String] = String("System`Automatic") +LowerFirst: Final[set[String]] = {String("System`LowerFirst"), String("LowerFirst")} +StringUpperFirst: Final[String] = String("UpperFirst") +SymbolLanguage: Final[String] = Symbol("System`$Language") -def eval_alphabet(language_name: String) -> Optional[List[String]]: +@dataclass(frozen=True) +class AlphabeticOrderOptions: + """ + Stores options associated with AlphbeticOrder[] builtin. + + One initialized, this structure is immutable or frozen. + """ + + lowercase_ordering: Optional[bool] = None + """'True" if ordering should be lowercase first, 'False" if should uppercase first, + and 'None' if we should use the natural alphabet ordering case.""" + + ignore_case: bool = False + """whether to ignore upper versus lower case""" + + ignore_diacritics: bool = False + """whether to ignore diacritics for ordering""" + + ignore_punctuation: bool = False + """whether to ignore punctuation for ordering""" + + language: str = LANGUAGE + """what language or alphabet to assume""" + + @classmethod + def from_dict( + cls, options: dict[str, Any], evaluation: Evaluation + ) -> Optional["AlphabeticOrderOptions"]: + """Factory method that normalizes, type-checks, and builds the frozen structure + from a raw dict[str, str]. + """ + key_mapping = { + "System`CaseOrdering": "lowercase_ordering", + "System`IgnoreCase": "ignore_case", + "System`IgnoreDiacritics": "ignore_diacritics", + "System`IgnorePunctuation": "ignore_punctuation", + "System`Language": "language", + } + + # This will hold our cleaned, type-converted parameters + processed_args: dict[str, Any] = { + "lowercase_ordering": None, + "ignore_case": False, + "ignore_diacritics": False, + "ignore_punctuation": False, + "language": LANGUAGE, + } + + # Iterate through the user-provided options dictionary + for raw_key, option_value in options.items(): + normalized_key = key_mapping.get(raw_key) + + if not normalized_key: + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + + # Type parsing and validation based on the target field name + if normalized_key in ( + "ignore_case", + "ignore_diacritics", + "ignore_punctuation", + ): + if option_value not in (SymbolTrue, SymbolFalse): + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + processed_args[normalized_key] = option_value.value + + elif normalized_key == "language": + if option_value is SymbolLanguage: + option_value = String(LANGUAGE) + + if not isinstance(option_value, String): + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + processed_args[normalized_key] = option_value + + elif normalized_key == "lowercase_ordering": + if (option_value is SymbolAutomatic) or option_value == "Automatic": + processed_args[normalized_key] = None + elif option_value in LowerFirst: + processed_args[normalized_key] = True + elif option_value == StringUpperFirst: + processed_args[normalized_key] = False + else: + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + + # Initialize and return the frozen dataclass using our verified arguments + return cls(**processed_args) + + +def eval_alphabet(language_name: String) -> Optional[list[String]]: py_language_name = language_name.value locale = language2locale.get(py_language_name, py_language_name) @@ -32,7 +149,9 @@ def eval_alphabet(language_name: String) -> Optional[List[String]]: return to_mathics_list(*alphabet_set, elements_conversion_fn=String) -def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> int: +def eval_alphabetic_order( + string1: str, string2: str, language_name, options: AlphabeticOrderOptions +) -> int: """ Compare two strings using locale-sensitive alphabetic order. @@ -43,6 +162,52 @@ def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> """ locale_str = language_to_locale(language_name) collator = Collator.createInstance(Locale(locale_str)) + + # Configure Case and Diacritic (Accent) rules via Collator Strength + # - PRIMARY: Only looks at the base letter (ignores case AND accents). + # - SECONDARY: Looks at base letters + accents (ignores case). + # - TERTIARY: Looks at base letters + accents + case (Default strict sorting). + + if options.ignore_case and options.ignore_diacritics: + # Ignore both accent variations and case sizes + collator.setStrength(Collator.PRIMARY) + + elif options.ignore_case and not options.ignore_diacritics: + # Ignore upper vs lower case, but treat 'e' and 'é' as different letters + collator.setStrength(Collator.SECONDARY) + + elif not options.ignore_case and options.ignore_diacritics: + # Ignore accents, but treat 'A' and 'a' as different letters. + # ICU handles this by setting strength to PRIMARY but turning on Case Level. + collator.setStrength(Collator.PRIMARY) + collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON) + + else: + # Default: strict matching on both case and diacritics + collator.setStrength(Collator.TERTIARY) + + # Configure Punctuation ignoring + # In ICU, ignoring punctuation is called "Alternate Handling". Turning it + # to SHIFTED moves punctuation tokens to the very end of the weight table, + # effectively ignoring them during normal alphanumeric string comparison. + if options.ignore_punctuation: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED + ) + else: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.NON_IGNORABLE + ) + + if options.lowercase_ordering: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.LOWER_FIRST + ) + elif options.lowercase_ordering is False: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.UPPER_FIRST + ) + comparison = collator.compare(string1, string2) if comparison < 0: return 1 @@ -107,11 +272,11 @@ class Alphabet(Builtin): """ messages = { - "nalph": "The alphabet `` is not known or not available.", + "nalph": "The alphabet `1` is not known or not available.", } rules = { - "Alphabet[]": """Alphabet[Pymathics`$Language]""", + "Alphabet[]": """Alphabet[$Language]""", } summary_text = "lowercase letters in an alphabet" @@ -133,6 +298,11 @@ class AlphabeticOrder(Builtin):