diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml index 7e754a2..f7ba73f 100644 --- a/.github/workflows/autoblack.yml +++ b/.github/workflows/autoblack.yml @@ -1,23 +1,23 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack +# GitHub Action that uses Black to reformat the Python code in an +# incoming pull request. If all Python code in the pull request is +# compliant with Black then this Action does nothing. Othewrwise, +# Black is run and its changes are committed back to the incoming pull +# request. https://github.com/cclauss/autoblack +--- name: autoblack on: [pull_request] jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.13 - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - name: Set up Python 3.14 + uses: actions/setup-python@v6 with: python-version: 3.14 - name: Install click, black and isort - run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . + run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==8.0.1' - name: Run black --check --diff . run: black --check --diff . - name: If needed, commit black changes to the pull request diff --git a/.github/workflows/isort-and-black-checks.yml b/.github/workflows/isort-and-black-checks.yml deleted file mode 100644 index 1273de1..0000000 --- a/.github/workflows/isort-and-black-checks.yml +++ /dev/null @@ -1,32 +0,0 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack - -name: isort and black check -on: [pull_request] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.14 - uses: actions/setup-python@v5 - with: - python-version: 3.13 - - name: Install click, black and isort - run: pip install 'click==8.0.4' 'black==25.1.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . - - name: Run black --check . - run: black --check . - # - name: If needed, commit black changes to the pull request - # if: failure() - # run: | - # black . - # git config --global user.name 'autoblack' - # git config --global user.email 'rocky@users.noreply.github.com' - # git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY - # git checkout $GITHUB_HEAD_REF - # git commit -am "fixup: Format Python code with Black" - # git push diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e4d9cc..cc6612c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,23 @@ +--- default_language_version: python: python repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - - id: check-merge-conflict - - id: debug-statements - stages: [commit] - - id: end-of-file-fixer - stages: [commit] - - id: trailing-whitespace -- repo: https://github.com/psf/black + - id: check-merge-conflict + - id: debug-statements + stages: [commit] + - id: end-of-file-fixer + stages: [commit] + - id: trailing-whitespace + - repo: https://github.com/psf/black rev: 25.11.0 hooks: - - id: black - language_version: python3 - exclude: 'mathicsscript/version.py' -- repo: https://github.com/pycqa/flake8 + - id: black + language_version: python3 + exclude: 'pymathics/icu/version.py' + - repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 diff --git a/pymathics/icu/__init__.py b/pymathics/icu/__init__.py index 5e78af5..74fe247 100644 --- a/pymathics/icu/__init__.py +++ b/pymathics/icu/__init__.py @@ -26,7 +26,7 @@ = {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ} """ -from pymathics.icu.__main__ import Alphabet, AlphabeticOrder, Language +from pymathics.icu.__main__ import Alphabet, AlphabeticOrder from pymathics.icu.version import __version__ pymathics_version_data = { @@ -39,7 +39,6 @@ __all__ = [ "Alphabet", "AlphabeticOrder", - "Language", "pymathics_version_data", "__version__", ] diff --git a/pymathics/icu/__main__.py b/pymathics/icu/__main__.py index 45aac4c..dfe7500 100644 --- a/pymathics/icu/__main__.py +++ b/pymathics/icu/__main__.py @@ -4,13 +4,17 @@ Languages - Human-Language Alphabets and Locales via PyICU. """ -from typing import List, Optional +from dataclasses import dataclass +from typing import Any, Final, Optional -from icu import Collator, Locale, LocaleData +from icu import Collator, Locale, LocaleData, UCollAttribute, UCollAttributeValue +from mathics.builtin.system import LANGUAGE from mathics.core.atoms import Integer, String -from mathics.core.builtin import Builtin, Predefined +from mathics.core.builtin import Builtin from mathics.core.convert.expression import to_mathics_list from mathics.core.evaluation import Evaluation +from mathics.core.symbols import Symbol, SymbolFalse, SymbolTrue +from mathics.core.systemsymbols import SymbolAutomatic available_locales = Locale.getAvailableLocales() language2locale = { @@ -18,11 +22,124 @@ for locale_name, availableLocale in available_locales.items() } -# The current value of $Language -LANGUAGE = "English" +StringAutomatic: Final[String] = String("System`Automatic") +LowerFirst: Final[set[String]] = {String("System`LowerFirst"), String("LowerFirst")} +StringUpperFirst: Final[String] = String("UpperFirst") +SymbolLanguage: Final[String] = Symbol("System`$Language") -def eval_alphabet(language_name: String) -> Optional[List[String]]: +@dataclass(frozen=True) +class AlphabeticOrderOptions: + """ + Stores options associated with AlphbeticOrder[] builtin. + + One initialized, this structure is immutable or frozen. + """ + + lowercase_ordering: Optional[bool] = None + """'True" if ordering should be lowercase first, 'False" if should uppercase first, + and 'None' if we should use the natural alphabet ordering case.""" + + ignore_case: bool = False + """whether to ignore upper versus lower case""" + + ignore_diacritics: bool = False + """whether to ignore diacritics for ordering""" + + ignore_punctuation: bool = False + """whether to ignore punctuation for ordering""" + + language: str = LANGUAGE + """what language or alphabet to assume""" + + @classmethod + def from_dict( + cls, options: dict[str, Any], evaluation: Evaluation + ) -> Optional["AlphabeticOrderOptions"]: + """Factory method that normalizes, type-checks, and builds the frozen structure + from a raw dict[str, str]. + """ + key_mapping = { + "System`CaseOrdering": "lowercase_ordering", + "System`IgnoreCase": "ignore_case", + "System`IgnoreDiacritics": "ignore_diacritics", + "System`IgnorePunctuation": "ignore_punctuation", + "System`Language": "language", + } + + # This will hold our cleaned, type-converted parameters + processed_args: dict[str, Any] = { + "lowercase_ordering": None, + "ignore_case": False, + "ignore_diacritics": False, + "ignore_punctuation": False, + "language": LANGUAGE, + } + + # Iterate through the user-provided options dictionary + for raw_key, option_value in options.items(): + normalized_key = key_mapping.get(raw_key) + + if not normalized_key: + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + + # Type parsing and validation based on the target field name + if normalized_key in ( + "ignore_case", + "ignore_diacritics", + "ignore_punctuation", + ): + if option_value not in (SymbolTrue, SymbolFalse): + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + processed_args[normalized_key] = option_value.value + + elif normalized_key == "language": + if option_value is SymbolLanguage: + option_value = String(LANGUAGE) + + if not isinstance(option_value, String): + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + processed_args[normalized_key] = option_value + + elif normalized_key == "lowercase_ordering": + if (option_value is SymbolAutomatic) or option_value == "Automatic": + processed_args[normalized_key] = None + elif option_value in LowerFirst: + processed_args[normalized_key] = True + elif option_value == StringUpperFirst: + processed_args[normalized_key] = False + else: + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + + # Initialize and return the frozen dataclass using our verified arguments + return cls(**processed_args) + + +def eval_alphabet(language_name: String) -> Optional[list[String]]: py_language_name = language_name.value locale = language2locale.get(py_language_name, py_language_name) @@ -32,7 +149,9 @@ def eval_alphabet(language_name: String) -> Optional[List[String]]: return to_mathics_list(*alphabet_set, elements_conversion_fn=String) -def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> int: +def eval_alphabetic_order( + string1: str, string2: str, language_name, options: AlphabeticOrderOptions +) -> int: """ Compare two strings using locale-sensitive alphabetic order. @@ -43,6 +162,52 @@ def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> """ locale_str = language_to_locale(language_name) collator = Collator.createInstance(Locale(locale_str)) + + # Configure Case and Diacritic (Accent) rules via Collator Strength + # - PRIMARY: Only looks at the base letter (ignores case AND accents). + # - SECONDARY: Looks at base letters + accents (ignores case). + # - TERTIARY: Looks at base letters + accents + case (Default strict sorting). + + if options.ignore_case and options.ignore_diacritics: + # Ignore both accent variations and case sizes + collator.setStrength(Collator.PRIMARY) + + elif options.ignore_case and not options.ignore_diacritics: + # Ignore upper vs lower case, but treat 'e' and 'é' as different letters + collator.setStrength(Collator.SECONDARY) + + elif not options.ignore_case and options.ignore_diacritics: + # Ignore accents, but treat 'A' and 'a' as different letters. + # ICU handles this by setting strength to PRIMARY but turning on Case Level. + collator.setStrength(Collator.PRIMARY) + collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON) + + else: + # Default: strict matching on both case and diacritics + collator.setStrength(Collator.TERTIARY) + + # Configure Punctuation ignoring + # In ICU, ignoring punctuation is called "Alternate Handling". Turning it + # to SHIFTED moves punctuation tokens to the very end of the weight table, + # effectively ignoring them during normal alphanumeric string comparison. + if options.ignore_punctuation: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED + ) + else: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.NON_IGNORABLE + ) + + if options.lowercase_ordering: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.LOWER_FIRST + ) + elif options.lowercase_ordering is False: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.UPPER_FIRST + ) + comparison = collator.compare(string1, string2) if comparison < 0: return 1 @@ -107,11 +272,11 @@ class Alphabet(Builtin): """ messages = { - "nalph": "The alphabet `` is not known or not available.", + "nalph": "The alphabet `1` is not known or not available.", } rules = { - "Alphabet[]": """Alphabet[Pymathics`$Language]""", + "Alphabet[]": """Alphabet[$Language]""", } summary_text = "lowercase letters in an alphabet" @@ -133,6 +298,11 @@ class AlphabeticOrder(Builtin):
gives 1 if $string_1$ appears before $string_2$ in alphabetical order, -1 if it is after, and 0 if it is identical. + The alphabetic order of two characters: + >> AlphabeticOrder["e", "f"] + = 1 + + The alphabetic order of two strings: >> AlphabeticOrder["apple", "banana"] = 1 @@ -143,6 +313,16 @@ class AlphabeticOrder(Builtin): >> AlphabeticOrder["A", "a"] = -1 + However, you can for which case comes first using the 'CaseOrdering' option: + >> AlphabeticOrder["a", "A", CaseOrdering -> "LowerFirst"] + = 1 + + >> AlphabeticOrder["a", "A", CaseOrdering -> "UpperFirst"] + = -1 + + >> AlphabeticOrder["a", "A"] == AlphabeticOrder["a", "A", CaseOrdering -> "LowerFirst"] + = True + Longer words follow their prefixes: >> AlphabeticOrder["Papagayo", "Papa", "Spanish"] = -1 @@ -153,70 +333,62 @@ class AlphabeticOrder(Builtin): >> AlphabeticOrder["Papá", "Papagayo", "Spanish"] = 1 + + The alphabetic ordering is determined by the value of :$Language: + doc/reference-of-built-in-symbols/global-system-information/$language/. However, \ + specify a the language as the third argument: + >> AlphabeticOrder["ñ", "n", "Spanish"] + = -1 + + Option 'IgnorePunctuation' specifies whether to remove puctuation characters before comparing the strings: + + >> AlphabeticOrder["Name-1", "Name.1", "Spanish", IgnorePunctuation -> True] + = 0 + + >> AlphabeticOrder["it's", "its", "English", IgnorePunctuation -> False] + = 1 + + >> AlphabeticOrder["it's", "its", "English", IgnorePunctuation -> True] + = 0 """ - summary_text = "compare strings according to an alphabet" + eval_error = Builtin.generic_argument_error + expected_args = range(1, 4) + options = { + "System`CaseOrdering": "Automatic", + "System`IgnoreCase": "False", + "System`IgnoreDiacritics": "False", + "System`IgnorePunctuation": "False", + "System`Language": "$Language", + } + summary_text = "return -1, 0, 1 comparing the alphabetic order of two strings" - def eval(self, string1: String, string2: String, evaluation: Evaluation): - """AlphabeticOrder[string1_String, string2_String]""" - return Integer(eval_alphabetic_order(string1.value, string2.value)) + def eval( + self, string1: String, string2: String, evaluation: Evaluation, options: dict + ): + """AlphabeticOrder[string1_String, string2_String, OptionsPattern[%(name)s]]""" + lang = String(LANGUAGE) + return self.eval_with_lang(string1, string2, lang, options, evaluation) def eval_with_lang( - self, string1: String, string2: String, lang: String, evaluation: Evaluation + self, + string1: String, + string2: String, + lang: String, + options: dict, + evaluation: Evaluation, ): - """AlphabeticOrder[string1_String, string2_String, lang_String]""" + """AlphabeticOrder[string1_String, string2_String, lang_String, OptionsPattern[%(name)s]]""" + + alphabetic_order_options = AlphabeticOrderOptions.from_dict(options, evaluation) + if alphabetic_order_options is None: + return + return Integer( eval_alphabetic_order( string1.value, string2.value, lang.value, + alphabetic_order_options, ) ) - - -## FIXME: move to mathics-core. Will have to change references to Pymathics`$Language to $Language -class Language(Predefined): - """ - - :WMA link: - https://reference.wolfram.com/language/ref/\\$Language.html - -
-
'\\$Language' -
is a settable global variable for the default language used in Mathics3. -
- - See the language in effect used for functions like 'Alphabet[]': - - By setting its value, The letters of 'Alphabet[]' are changed: - - >> $Language = "German"; Alphabet[] - = ... - - #> $Language = "English" - = English - - See also - :Alphabet: - /doc/mathics3-modules/icu-international-components-for-unicode/languages-human-language-alphabets-and-locales-via-pyicu/alphabet/. - """ - - name = "$Language" - messages = { - "notstr": "`1` is not a string. Only strings can be set as the value of $Language.", - } - - summary_text = "settable global variable giving the default language" - value = f'"{LANGUAGE}"' - # Rules has to come after "value" - rules = { - "Pymathics`$Language": value, - } - - def eval_set(self, value, evaluation: Evaluation): - """Set[Pymathics`$Language, value_]""" - if isinstance(value, String): - evaluation.definitions.set_ownvalue("$Language", value) - else: - evaluation.message("Pymathics`$Language", "notstr", value) - return value diff --git a/pymathics/icu/version.py b/pymathics/icu/version.py index e2fcf46..cd2e848 100644 --- a/pymathics/icu/version.py +++ b/pymathics/icu/version.py @@ -5,4 +5,4 @@ # well as importing into Python. That's why there is no # space around "=" below. # fmt: off -__version__="10.0.0" # noqa +__version__="10.0.1.dev0" # noqa