From 2b94c322042eb8b6b36ef97bf8fa2b730467a3b6 Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Sun, 31 May 2026 20:00:19 -0400 Subject: [PATCH 1/3] Integrate linter rules into YAML checker so they can more easily be run from commandline; also allows calling from within ci/cd --- README.md | 24 + pyproject.toml | 2 +- src/dayamlchecker/__init__.py | 16 + src/dayamlchecker/accessibility.py | 636 ++++++- .../data/interview_linter_prompts.yml | 66 + .../data/plain_language_replacements.yml | 594 +++++++ src/dayamlchecker/messages.py | 423 +++++ src/dayamlchecker/style.py | 1461 +++++++++++++++++ src/dayamlchecker/yaml_structure.py | 234 ++- tests/test_style.py | 549 +++++++ tests/test_yaml_structure.py | 112 ++ 11 files changed, 4111 insertions(+), 6 deletions(-) create mode 100644 src/dayamlchecker/data/interview_linter_prompts.yml create mode 100644 src/dayamlchecker/data/plain_language_replacements.yml create mode 100644 src/dayamlchecker/style.py create mode 100644 tests/test_style.py diff --git a/README.md b/README.md index 32ec87a..eeb9459 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,30 @@ python3 -m dayamlchecker --accessibility-error-on-widget combobox path/to/interv Some accessibility checks are behind runtime options while the rules are still being evaluated. Right now `combobox` failures are default-off and can be enabled with `--accessibility-error-on-widget combobox`. +## Style checks + +Assembly Line style checks are opt-in. Enable them with `--style` to run deterministic style findings ported from ALLinter without duplicating the checker’s existing YAML, accessibility, or URL coverage. + +```bash +python3 -m dayamlchecker --style --no-url-check path/to/interview.yml +python3 -m dayamlchecker --style-llm --openai-api-key "$OPENAI_API_KEY" path/to/interview.yml +OPENAI_BASE_URL=https://api.openai.com/v1 OPENAI_API_KEY=... python3 -m dayamlchecker --style-llm path/to/interview.yml +``` + +`--style-llm` also enables `--style`. It reads `OPENAI_BASE_URL`, `OPENAI_API_KEY`, and `OPENAI_MODEL` from the environment when flags are not provided. The checker only emits sanitized configuration/request errors for LLM-backed style rules and does not print the credential values. + +For Python callers, use the module helper instead of shelling out: + +```python +from dayamlchecker import RuntimeOptions, find_style_findings_from_string + +findings = find_style_findings_from_string( + interview_yaml, + input_file="interview.yml", + runtime_options=RuntimeOptions(style_include_llm=True), +) +``` + ## URL checks The main `dayamlchecker` CLI also runs the URL checker by default. Broken URLs in question files fail the command; broken URLs in related `data/templates` files are warnings by default. Use `--no-url-check` to skip it, or tune it with flags such as `--url-check-timeout`, `--url-check-ignore-urls`, `--url-check-skip-templates`, `--template-url-severity`, and `--unreachable-url-severity`. diff --git a/pyproject.toml b/pyproject.toml index 0bfb7fd..3b45c4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ authors = [ package-dir = { "" = "src" } [tool.setuptools.package-data] -dayamlchecker = ["py.typed"] +dayamlchecker = ["py.typed", "data/*.yml"] [build-system] requires = ["setuptools >= 77.0.3"] diff --git a/src/dayamlchecker/__init__.py b/src/dayamlchecker/__init__.py index e69de29..c077a01 100644 --- a/src/dayamlchecker/__init__.py +++ b/src/dayamlchecker/__init__.py @@ -0,0 +1,16 @@ +from dayamlchecker.messages import Finding, FindingClass +from dayamlchecker.yaml_structure import ( + RuntimeOptions, + find_errors, + find_errors_from_string, + find_style_findings_from_string, +) + +__all__ = [ + "Finding", + "FindingClass", + "RuntimeOptions", + "find_errors", + "find_errors_from_string", + "find_style_findings_from_string", +] diff --git a/src/dayamlchecker/accessibility.py b/src/dayamlchecker/accessibility.py index fbb0c81..5474046 100644 --- a/src/dayamlchecker/accessibility.py +++ b/src/dayamlchecker/accessibility.py @@ -12,9 +12,13 @@ "hint", "help", "label", + "note", + "html", "datatype", "choices", + "value", "validation code", + "validation message", "show if", "hide if", "js show if", @@ -24,6 +28,10 @@ "js enable if", "js disable if", "required", + "min", + "max", + "minlength", + "maxlength", "no label", "field", "__line__", @@ -40,6 +48,43 @@ "aqui", "aquí", } +NON_DESCRIPTIVE_FIELD_LABELS = { + "yes", + "no", + "maybe", + "n/a", + "na", + "other", + "unknown", +} +AMBIGUOUS_BUTTON_TEXT = { + "continue", + "go", + "here", + "next", + "ok", + "submit", +} +GENERIC_VALIDATION_MESSAGES = { + "error", + "invalid", + "invalid input", + "invalid value", + "not valid", +} +COLOR_WORDS = { + "red", + "green", + "blue", + "yellow", + "orange", + "purple", + "pink", + "gray", + "grey", + "black", + "white", +} @dataclass(frozen=True, slots=True) @@ -118,7 +163,15 @@ def find_accessibility_findings( ) -> list[AccessibilityFinding]: options = options or AccessibilityLintOptions() findings: list[FindingDraft] = [] + findings.extend(_check_yesno_shortcuts(doc, source_code, document_start_line)) findings.extend(_check_multifield_no_label_usage(doc, document_start_line)) + findings.extend(_check_field_labels(doc, document_start_line)) + findings.extend(_check_choice_labels(doc, document_start_line)) + findings.extend(_check_duplicate_field_labels(doc, document_start_line)) + findings.extend(_check_required_fields(doc, document_start_line)) + findings.extend(_check_validation_guidance(doc, document_start_line)) + findings.extend(_check_generic_validation_messages(doc, document_start_line)) + findings.extend(_check_ambiguous_button_text(doc, document_start_line)) findings.extend( _check_combobox_usage(doc, source_code, document_start_line, options=options) ) @@ -147,6 +200,26 @@ def find_accessibility_findings( findings.extend( _check_html_heading_order(section, source_code, document_start_line) ) + findings.extend( + _check_color_only_instructions(section, source_code, document_start_line) + ) + findings.extend( + _check_inline_color_styling(section, source_code, document_start_line) + ) + findings.extend( + _check_new_tab_links(section, source_code, document_start_line) + ) + findings.extend(_check_svg_names(section, source_code, document_start_line)) + findings.extend(_check_tables(section, source_code, document_start_line)) + findings.extend( + _check_positive_tabindex(section, source_code, document_start_line) + ) + findings.extend( + _check_clickable_non_controls(section, source_code, document_start_line) + ) + findings.extend( + _check_ambiguous_link_destinations(doc, source_code, document_start_line) + ) unique_findings: list[AccessibilityFinding] = [] seen: set[tuple[str, str, int]] = set() for finding in findings: @@ -213,6 +286,30 @@ def _check_combobox_usage( return findings +def _check_yesno_shortcuts( + doc: dict[str, Any], source_code: str, document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for shortcut in ("yesno", "noyes", "yesnomaybe", "noyesmaybe"): + value = doc.get(shortcut) + if value is None: + continue + line_number = _absolute_line_number( + source_code, + document_start_line, + _find_top_level_key_line(source_code, shortcut) or doc.get("__line__", 1), + f"{shortcut}:", + ) + findings.append( + draft( + MessageId.ACCESSIBILITY_YESNO_SHORTCUT, + line_number=line_number, + shortcut=shortcut, + ) + ) + return findings + + def _check_multifield_no_label_usage( doc: dict[str, Any], document_start_line: int ) -> list[FindingDraft]: @@ -249,6 +346,218 @@ def _check_multifield_no_label_usage( return findings +def _check_field_labels( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + fields = _iter_fields(doc) + for field in _iter_fields(doc): + if not _field_collects_user_input(field): + continue + datatype = str(field.get("datatype") or "").strip().lower() + if datatype in {"button", "buttons", "note"}: + continue + field_name = _extract_field_variable(field) or "" + label = _extract_field_label(field) + field_line = _field_line_number(doc, field, document_start_line) + if not label: + if len(fields) > 1: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_FIELD_MISSING_LABEL, + line_number=field_line, + field_name=field_name, + ) + ) + continue + normalized = _normalize_human_text(label) + if normalized in NON_DESCRIPTIVE_FIELD_LABELS or _looks_like_emoji_or_punctuation_only( + label + ): + findings.append( + draft( + MessageId.ACCESSIBILITY_NON_DESCRIPTIVE_FIELD_LABEL, + line_number=field_line, + snippet=label, + ) + ) + return findings + + +def _check_choice_labels( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for field in _iter_fields(doc): + for label, line_hint in _iter_choice_labels_with_lines(field.get("choices")): + line_number = _field_line_number(doc, field, document_start_line, line_hint) + if not label.strip(): + findings.append( + draft( + MessageId.ACCESSIBILITY_BLANK_CHOICE_LABEL, + line_number=line_number, + location=_extract_field_label(field) + or _extract_field_variable(field) + or "choices", + ) + ) + continue + normalized = _normalize_human_text(label) + if normalized in NON_DESCRIPTIVE_FIELD_LABELS or _looks_like_emoji_or_punctuation_only( + label + ): + findings.append( + draft( + MessageId.ACCESSIBILITY_NON_DESCRIPTIVE_CHOICE_LABEL, + line_number=line_number, + location=_extract_field_label(field) + or _extract_field_variable(field) + or "choices", + snippet=label, + ) + ) + return findings + + +def _check_duplicate_field_labels( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + labels: dict[str, list[tuple[int, str]]] = {} + for field in _iter_fields(doc): + if not _field_collects_user_input(field): + continue + label = _extract_field_label(field) + normalized = _normalize_human_text(label) + if not normalized: + continue + labels.setdefault(normalized, []).append( + ( + _field_line_number(doc, field, document_start_line), + label, + ) + ) + + findings: list[FindingDraft] = [] + for entries in labels.values(): + if len(entries) < 2: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_DUPLICATE_FIELD_LABEL, + line_number=entries[0][0], + labels=", ".join(label for _, label in entries[:3]), + ) + ) + return findings + + +def _check_required_fields( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for field in _iter_fields(doc): + if not _is_truthy(field.get("required")): + continue + label = _extract_field_label(field) + hint = str(field.get("hint") or "") + help_text = str(field.get("help") or "") + combined = f"{label} {hint} {help_text}".lower() + if "required" in combined or "*" in f"{label}{hint}{help_text}": + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_REQUIRED_FIELD_NOT_INDICATED, + line_number=_field_line_number(doc, field, document_start_line), + snippet=label or _extract_field_variable(field) or "", + ) + ) + return findings + + +def _check_validation_guidance( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for field in _iter_fields(doc): + if "code" in field: + continue + has_constraint = bool( + str(field.get("validation code") or "").strip() + or field.get("min") + or field.get("max") + or field.get("minlength") + or field.get("maxlength") + or field.get("required") + ) + if not has_constraint: + continue + validation_messages = _collect_validation_messages(field) + has_guidance = bool( + str(field.get("hint") or "").strip() + or str(field.get("help") or "").strip() + or str(field.get("note") or "").strip() + or validation_messages + ) + if has_guidance: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_VALIDATION_WITHOUT_GUIDANCE, + line_number=_field_line_number(doc, field, document_start_line), + snippet=_extract_field_label(field) + or _extract_field_variable(field) + or "", + ) + ) + return findings + + +def _check_generic_validation_messages( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for field in _iter_fields(doc): + for message, line_hint in _collect_validation_messages(field): + normalized = _normalize_human_text(message) + if normalized not in GENERIC_VALIDATION_MESSAGES: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_GENERIC_VALIDATION_MESSAGE, + line_number=_field_line_number( + doc, field, document_start_line, line_hint + ), + snippet=message, + ) + ) + return findings + + +def _check_ambiguous_button_text( + doc: dict[str, Any], document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + labels = _iter_choice_labels(doc.get("buttons")) + labels.append(str(doc.get("continue button label") or "")) + for field in _iter_fields(doc): + datatype = str(field.get("datatype") or "").strip().lower() + if datatype in {"button", "buttons"}: + labels.extend(_iter_choice_labels(field.get("choices"))) + for label in labels: + normalized = _normalize_human_text(label) + if normalized not in AMBIGUOUS_BUTTON_TEXT: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_AMBIGUOUS_BUTTON_TEXT, + line_number=document_start_line + doc.get("__line__", 1) - 1, + snippet=label, + ) + ) + return findings + + def _check_tagged_pdf_for_docx( doc: dict[str, Any], source_code: str, document_start_line: int ) -> list[FindingDraft]: @@ -544,6 +853,236 @@ def _check_non_descriptive_link_text( return findings +def _check_color_only_instructions( + section: TextSection, source_code: str, document_start_line: int +) -> list[FindingDraft]: + plain_text = _normalize_human_text(section.value) + if not any(color in plain_text.split() for color in COLOR_WORDS): + return [] + color_reference_re = re.compile( + r"\b(red|green|blue|yellow|orange|purple|pink|gray|grey|black|white)\b", + re.IGNORECASE, + ) + if not color_reference_re.search(section.value): + return [] + if re.search(r"\b(color|colou?r|highlight|highlighted|shade|shaded)\b", section.value, re.IGNORECASE): + line_number = _absolute_line_number( + source_code, + document_start_line, + section.key_line, + color_reference_re.search(section.value).group(0), # type: ignore[union-attr] + ) + return [ + draft( + MessageId.ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS, + line_number=line_number, + section_location=section.location, + snippet=_short_snippet(section.value), + ) + ] + return [] + + +def _check_inline_color_styling( + section: TextSection, source_code: str, document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + patterns = [ + r"style\s*=\s*([\"']).*?\bcolor\s*:", + r"\bclass\s*=\s*([\"']).*?\b(text|bg)-(danger|warning|success|primary|secondary|info)\b", + r"]*\bcolor\s*=", + ] + for pattern in patterns: + match = re.search(pattern, section.value, re.IGNORECASE | re.DOTALL) + if not match: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_INLINE_COLOR_STYLING, + line_number=_absolute_line_number( + source_code, + document_start_line, + section.key_line, + match.group(0), + ), + section_location=section.location, + snippet=_short_snippet(match.group(0)), + ) + ) + break + return findings + + +def _check_ambiguous_link_destinations( + doc: dict[str, Any], source_code: str, document_start_line: int +) -> list[FindingDraft]: + links_by_text: dict[str, list[tuple[str, TextSection, str]]] = {} + for section in _iter_text_sections(doc, source_code): + for link in _extract_links_from_text(section.value): + text = _normalize_human_text(link["text"]) + if not text or not link["target"].strip(): + continue + links_by_text.setdefault(text, []).append( + (link["target"].strip(), section, link["snippet"]) + ) + + findings: list[FindingDraft] = [] + for text, entries in links_by_text.items(): + targets = {target for target, _, _ in entries} + if len(targets) < 2: + continue + _, section, snippet = entries[0] + findings.append( + draft( + MessageId.ACCESSIBILITY_AMBIGUOUS_LINK_DESTINATIONS, + line_number=_absolute_line_number( + source_code, document_start_line, section.key_line, snippet + ), + snippet=_short_snippet(snippet), + ) + ) + return findings + + +def _check_new_tab_links( + section: TextSection, source_code: str, document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for link in _extract_links_from_text(section.value): + attrs = link["attrs"].lower() + if 'target="_blank"' not in attrs and "target='_blank'" not in attrs: + continue + combined = " ".join( + [link["text"], link["aria_label"], link["title"], section.value] + ).lower() + if any(marker in combined for marker in ("new tab", "new window", "opens in")): + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_NEW_TAB_WITHOUT_WARNING, + line_number=_absolute_line_number( + source_code, + document_start_line, + section.key_line, + link["snippet"], + ), + section_location=section.location, + snippet=_short_snippet(link["snippet"]), + ) + ) + return findings + + +def _check_svg_names( + section: TextSection, source_code: str, document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for match in re.finditer(r"]*>.*?", section.value, re.IGNORECASE | re.DOTALL): + snippet = match.group(0) + if re.search(r"\baria-label\s*=\s*([\"']).+?\1", snippet, re.IGNORECASE): + continue + if re.search(r"\baria-labelledby\s*=\s*([\"']).+?\1", snippet, re.IGNORECASE): + continue + if re.search(r"]*>.*?", snippet, re.IGNORECASE | re.DOTALL): + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_SVG_MISSING_ACCESSIBLE_NAME, + line_number=_absolute_line_number( + source_code, document_start_line, section.key_line, " list[FindingDraft]: + findings: list[FindingDraft] = [] + for match in re.finditer( + r"]*>.*?", section.value, re.IGNORECASE | re.DOTALL + ): + snippet = match.group(0) + has_headers = bool(re.search(r" 1 + line_number = _absolute_line_number( + source_code, document_start_line, section.key_line, " list[FindingDraft]: + findings: list[FindingDraft] = [] + for match in re.finditer( + r"\btabindex\s*=\s*([\"']?)(\d+)\1", section.value, re.IGNORECASE + ): + if int(match.group(2)) <= 0: + continue + findings.append( + draft( + MessageId.ACCESSIBILITY_POSITIVE_TABINDEX, + line_number=_absolute_line_number( + source_code, + document_start_line, + section.key_line, + match.group(0), + ), + section_location=section.location, + snippet=_short_snippet(match.group(0)), + ) + ) + return findings + + +def _check_clickable_non_controls( + section: TextSection, source_code: str, document_start_line: int +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for match in re.finditer( + r"<(div|span|p|li)\b[^>]*\bonclick\s*=", section.value, re.IGNORECASE + ): + tag_name = match.group(1).lower() + findings.append( + draft( + MessageId.ACCESSIBILITY_CLICKABLE_NON_CONTROL_HTML, + line_number=_absolute_line_number( + source_code, + document_start_line, + section.key_line, + match.group(0), + ), + section_location=section.location, + tag_name=tag_name, + snippet=_short_snippet(match.group(0)), + ) + ) + return findings + + def _find_top_level_key_line(source_code: str, key: str) -> Optional[int]: key_re = re.compile(rf"^{re.escape(key)}\s*:", re.MULTILINE) match = key_re.search(source_code) @@ -609,15 +1148,99 @@ def _extract_field_variable(field: dict[str, Any]) -> str: def _extract_field_label(field: dict[str, Any]) -> str: explicit = str(field.get("label") or "").strip() - if explicit: + if explicit and not _is_no_label_marker(explicit): return explicit for key in field.keys(): key_text = str(key).strip() - if key_text and key_text not in FIELD_NON_LABEL_KEYS: + if ( + key_text + and key_text not in FIELD_NON_LABEL_KEYS + and not _is_no_label_marker(key_text) + ): return key_text return "" +def _field_line_number( + doc: dict[str, Any], + field: dict[str, Any], + document_start_line: int, + line_hint: Optional[int] = None, +) -> int: + relative_line = line_hint or field.get("__line__", doc.get("__line__", 1)) + return document_start_line + relative_line - 1 + + +def _is_no_label_marker(value: str) -> bool: + normalized = value.strip().lower() + return normalized in {"no label", "n/a", "na", "none"} + + +def _looks_like_emoji_or_punctuation_only(value: str) -> bool: + stripped = value.strip() + if not stripped: + return False + return not any(char.isalnum() for char in stripped) + + +def _iter_choice_labels(choice_value: Any) -> list[str]: + return [label for label, _ in _iter_choice_labels_with_lines(choice_value)] + + +def _field_collects_user_input(field: dict[str, Any]) -> bool: + if "code" in field: + return False + datatype = str(field.get("datatype") or "").strip().lower() + if datatype in {"button", "buttons", "note"}: + return False + if "field" in field: + return True + return any(str(key).strip() not in FIELD_NON_LABEL_KEYS for key in field.keys()) + + +def _iter_choice_labels_with_lines(choice_value: Any) -> list[tuple[str, Optional[int]]]: + labels: list[tuple[str, Optional[int]]] = [] + if isinstance(choice_value, dict): + for key, value in choice_value.items(): + if isinstance(value, dict): + labels.append((str(value.get("label") or key or ""), value.get("__line__"))) + else: + labels.append((str(key or ""), None)) + return labels + if not isinstance(choice_value, list): + return labels + for item in choice_value: + if isinstance(item, str): + labels.append((item, None)) + elif isinstance(item, dict): + if "label" in item: + labels.append((str(item.get("label") or ""), item.get("__line__"))) + elif len(item) == 1: + key = next(iter(item.keys())) + labels.append((str(key or ""), item.get("__line__"))) + return labels + + +def _collect_validation_messages(field: dict[str, Any]) -> list[tuple[str, Optional[int]]]: + messages: list[tuple[str, Optional[int]]] = [] + validation_message = field.get("validation message") + if isinstance(validation_message, str) and validation_message.strip(): + messages.append((validation_message.strip(), field.get("__line__"))) + elif isinstance(validation_message, list): + for item in validation_message: + if isinstance(item, str) and item.strip(): + messages.append((item.strip(), field.get("__line__"))) + elif isinstance(item, dict): + for value in item.values(): + if isinstance(value, str) and value.strip(): + messages.append((value.strip(), item.get("__line__"))) + elif isinstance(validation_message, dict): + for value in validation_message.values(): + if isinstance(value, str) and value.strip(): + messages.append((value.strip(), validation_message.get("__line__"))) + return messages + + def _is_truthy(value: Any) -> bool: if isinstance(value, bool): return value @@ -858,6 +1481,7 @@ def _extract_links_from_text(text: str) -> list[dict[str, str]]: "target": str(target), "aria_label": "", "title": "", + "attrs": "", "snippet": f"[{link_text}]({target})", } ) @@ -877,7 +1501,15 @@ def _extract_links_from_text(text: str) -> list[dict[str, str]]: aria_label_match.group(2) if aria_label_match else "" ), "title": str(title_match.group(2) if title_match else ""), + "attrs": str(attrs), "snippet": snippet, } ) return links + + +def _short_snippet(value: str, *, limit: int = 120) -> str: + snippet = re.sub(r"\s+", " ", value).strip() + if len(snippet) <= limit: + return snippet + return f"{snippet[: limit - 1].rstrip()}…" diff --git a/src/dayamlchecker/data/interview_linter_prompts.yml b/src/dayamlchecker/data/interview_linter_prompts.yml new file mode 100644 index 0000000..b1c0819 --- /dev/null +++ b/src/dayamlchecker/data/interview_linter_prompts.yml @@ -0,0 +1,66 @@ +llm_rules: + - rule_id: tone-and-respect + default_severity: yellow + url: https://assemblyline.suffolklitlab.org/docs/style_guide/question_overview/ + system_prompt: | + You are a docassemble interview style reviewer. + Review, preview, check-your-answers, and edit-answer screens are summary/navigation screens. + On those screens, standard UI copy like "Review and edit your answers", "Review and edit your interview", "Edit answers", "Preview", "Back", and "Continue" is normally acceptable and should not be flagged unless it is itself disrespectful, misleading, or unusually harsh. + Return strict JSON only. + Use this schema: + { + "findings": [ + { + "rule_id": "tone-and-respect", + "severity": "yellow", + "message": "short issue summary", + "screen_id": "required screen id from input", + "problematic_text": "exact quote from that screen that triggered this finding" + } + ] + } + If there are no issues, return {"findings":[]}. + user_prompt: | + Review these interview screens for tone and respect risks only. + Focus on potentially judgmental, coercive, or disrespectful wording. + Treat review, preview, check-your-answers, and edit-answer screens as summary/navigation screens. + On those screens, do not flag neutral interface scaffolding such as repeated "Edit", "Edit answers", "Review", "Preview", "Back", or "Continue" labels, or standard headings like "Review and edit your answers/interview", unless the wording itself is disrespectful, misleading, or unusually harsh. + For review-style screens, only report a finding when the overall instruction would plausibly confuse, shame, pressure, or mislead a user. + For every finding, include: + 1) screen_id + 2) exact problematic_text quote from that screen + Screens JSON: + {screens_json} + - rule_id: plain-language-rewrite-opportunities + default_severity: yellow + url: https://assemblyline.suffolklitlab.org/docs/style_guide/readability/ + system_prompt: | + You are a plain-language reviewer for legal interviews. + Review, preview, check-your-answers, and edit-answer screens are summary/navigation screens. + On those screens, standard UI copy like "Review and edit your answers", "Review and edit your interview", "Edit your answers about ...", "Edit answers", and "Preview" is normally acceptable and should not be flagged. + Do not produce one finding per repeated row label, edit link, or recap item on a review-style screen. + Return strict JSON only. + Use this schema: + { + "findings": [ + { + "rule_id": "plain-language-rewrite-opportunities", + "severity": "yellow", + "message": "short issue summary", + "screen_id": "required screen id from input", + "problematic_text": "exact quote from that screen that triggered this finding" + } + ] + } + If there are no issues, return {"findings":[]}. + user_prompt: | + Review these interview screens and identify only high-confidence plain-language rewrite opportunities. + Do not flag jargon that is legally required. + Treat review, preview, check-your-answers, and edit-answer screens as summary/navigation screens. + On those screens, ignore repeated row labels, edit links, and recap phrases such as "Edit your answers about ...", "Review ...", "Preview ...", and similar checklist text. Also ignore standard review-screen headings like "Review and edit your answers/interview" unless the full instruction is genuinely confusing or misleading. + Never emit one finding per repeated review-row label or edit link. For a review-style screen, either return no findings or at most one finding for a genuinely unclear overall instruction. + For every finding, include: + 1) screen_id + 2) exact problematic_text quote from that screen + Screens JSON: + {screens_json} diff --git a/src/dayamlchecker/data/plain_language_replacements.yml b/src/dayamlchecker/data/plain_language_replacements.yml new file mode 100644 index 0000000..0b33964 --- /dev/null +++ b/src/dayamlchecker/data/plain_language_replacements.yml @@ -0,0 +1,594 @@ +# Plain-language replacements used by the interview linter. +# Sources reviewed on 2026-02-22: +# - RateMyPDF/FormFyxer wordlist: formfyxer/data/simplified_words.yml +# - https://github.com/GSA/plainlanguage.gov/blob/main/_pages/guidelines/words/use-simple-words-phrases.md +# +# Notes: +# - Values are suggestions only and should be applied in context. +# - A few high-noise terms (e.g., "address", "type") are intentionally excluded. + +a and/or b: a or b or both +a number of: some +absence of: '[no, none]' +abundance: '[enough, plenty, a lot, say how many]' +accede to: '[allow, agree to]' +accelerate: speed up +accentuate: stress +accommodation: '[where you live, home]' +accompany: go with +accompanying: with +accomplish: '[do, finish, carry out]' +accorded: given +according to our records: our records show +accordingly: '[so, in line with this]' +accrue: '[add, gain]' +accurate: '[exact, right]' +acknowledge: thank you for +acquaint yourself with: '[find out about, read]' +acquiesce: agree +acquire: '[buy, get]' +added: '[more, other]' +additional: '[extra, more, other]' +addressees: you +addressees are requested: (omit) +adequate: '[enough, sufficient]' +adjacent: next to +adjacent to: next to +adjustment: '[change, alteration]' +admissible: '[allowed, acceptable]' +advantageous: '[useful, helpful]' +adversely impact: '[hurt, set back]' +adversely impact on: '[hurt, set back]' +advise: '[recommend, tell, say]' +affix: '[add, write, fasten, stick on, fix to]' +afford an opportunity: '[allow, let]' +afforded: given +aforesaid: '[this, earlier in this document]' +aggregate: total +aircraft: plane +aligned: '[lined up, in line]' +alleviate: '[ease, reduce]' +allocate: '[divide, share, give]' +along the lines of: '[like, as in]' +alternative: '[a choice, the other]' +alternatively: '[or, on the other hand]' +ameliorate: '[improve, help]' +amendment: change +and/or: a or b or both +anticipate: expect +apparent: '[clear, plain, obvious, seeming]' +applicant: you +application: use +appoint: '[choose, name]' +appointed: '[chose, named]' +appreciable: '[many, large, great]' +apprise: '[inform, tell]' +appropriate: '[(omit), proper, right, suitable]' +appropriate to: suitable for +approximate: about +approximately: '[about, roughly]' +arrive onboard: arrive +as a consequence of: because +as a means of: to +as of the date of: from +as prescribed by: '[in, under]' +as regards: '[about, on the subject of]' +ascertain: '[find out, learn]' +assemble: '[build, gather, put together]' +assist: '[aid, help]' +assist, assistance: aid, help +assistance: '[aid, help]' +at an early date: '[soon, say when]' +at issue: being questioned or challenged +at its discretion: '[can, may, (omit)]' +at present: now +at the moment: '[now, (omit)]' +at the present time: '[now, (omit)]' +attain: meet +attempt: try +attend: '[come to, go to, be at]' +attributable to: '[due to, because of]' +authority: '[right, power]' +authorize: '[allow, let]' +axiomatic: '[obvious, goes without saying]' +be advised: (omit) +belated: late +believe: '[consider, think]' +beneficial: '[helpful, useful]' +benefit: help +bestow: '[give, award]' +breach: break +by means of: '[by, with]' +calculate: '[work out, decide]' +capability: ability +caveat: warning +cease: '[finish, stop, end]' +circumvent: '[get round, avoid, skirt, circle]' +clarification: '[explanation, help]' +close proximity: near +combat environment: combat +combine: '[mix, join, merge]' +combined: '[together, joint]' +commence: '[start, begin]' +communicate: '[talk, write, telephone, be specific]' +competent: '[able, can]' +compile: '[make, collect]' +complete: '[fill in, finish]' +completion: end +comply with: '[follow, keep to, meet]' +component: part +comprehend: '[understand, grasp]' +comprise: form, include, make up +comprise form: '[include, make up]' +comprises: '[is made up of, includes]' +compulsory: you must +conceal: hide +concerning: '[about, on]' +conclusion: end +concur: agree +condition: rule +consequently: so +considerable: '[great, important]' +consolidate: '[join, merge]' +constitutes: '[makes up, forms, is]' +constitutes is: '[forms, makes up]' +construe: interpret +consult: '[talk to, meet, ask]' +consumption: amount used +contains: has +contemplate: think about +contrary to: '[against, despite]' +convene: meet +correct: '[exact, right, put right]' +correspond: write +costs the sum of: costs +counter: against +courteous: polite +cumulative: '[added up, added together]' +currently: '[(omit), now]' +customary: '[usual, normal]' +decide: '[figure, find]' +deduct: '[take off, take away]' +deem: '[consider, think]' +deem to be: treat as +defer: '[put off, delay]' +deficiency: lack of +delete: '[cut, drop, cross out]' +demonstrate: '[show, prove]' +denote: show +depart: leave +depict: show +designate: '[choose, point out, show, name]' +designated: '[chose, named]' +desire: '[want, wish]' +despatch or dispatch: '[send, post]' +despite the fact that: '[though, although]' +determine: '[decide, work out, set, end, figure, find]' +detrimental: '[harmful, damaging]' +difficulties: problems +diminish: '[lessen, reduce]' +disburse: '[pay, pay out]' +discharge: carry out +disclose: '[tell, show]' +disconnect: '[cut off, unplug]' +discontinue: '[stop, end, drop]' +discrete: separate +discuss: talk about +disseminate: '[spread, give]' +documentation: '[papers, documents]' +domiciled in: living in +dominant: main +due to the fact that: '[due to, since, because, as]' +duration: '[time, life]' +during the period: during +during which time: while +dwelling: home +economical: '[cheap, good value]' +effect modifications: '[make changes]' +elaborate: '[explain, describe, develop]' +elect: '[choose, pick]' +eligible: '[allowed, qualified]' +eliminate: '[cut, drop, end]' +elucidate: '[explain, make clear]' +emphasise: stress +emphasize: stress +employ: use +empower: '[allow, let]' +enable: allow +enclosed: '[inside, with, I enclose]' +encounter: meet +endeavor: try +endeavour: try +enquire: ask +enquiry: question +ensure: make sure +entitlement: right +enumerate: count +envisage: '[expect, imagine]' +equipments: equipment +equitable: fair +equivalent: '[equal, the same]' +erroneous: wrong +establish: '[show, find out, set up, prove]' +evaluate: '[test, check]' +evidenced: showed +evident: clear +evince: '[show, prove]' +ex officio: because of his or her position +exceptionally: '[only when, in this case]' +excessive: '[too many, too much]' +exclude: leave out +excluding: '[apart from, except]' +exclusively: only +exemplify: '[show, demonstrate, represent]' +exempt from: free from +exhibit: '[show, exhibit]' +expedite: '[hurry, speed up, hasten]' +expeditious: '[fast, quick]' +expeditiously: '[as soon as possible, quickly]' +expend: spend +expenditure: spending +expertise: ability +expiration: end +expire: run out +extant: '[current, in force]' +extremity: limit +fabricate: '[make, make up]' +facilitate: '[help, make possible, ease]' +factor: reason +failed to: '[didn''t]' +failure to: if you do not +feasible: '[can be done, workable]' +females: women +finalise: '[end, finish]' +finalize: '[complete, finish]' +find: '[name, show]' +find enclosed: I enclose +following: '[after, per, under]' +for a period of: for +for example,______etc.: for example, such as +for the duration of: '[during, while]' +for the purpose of: '[to, for]' +for the reason that: because +forfeit: '[give up, lose]' +formulate: '[plan, devise]' +forthwith: '[now, at once]' +forward: '[send, forward]' +frequently: often +function: act, role, work +function act: '[role, work]' +furnish: '[give, send]' +further to: '[after, following]' +furthermore: '[then, also, and]' +generate: '[produce, give, make]' +give consideration to: '[consider, think about]' +grant: give +greatest: '[largest, most]' +has a requirement for: needs +has regard to: takes into account +have regard to: take into account +have the authority to: may +hence: '[so, therefore, as a result]' +henceforth: '[from now on, from today]' +hereby: '[now, by this, (omit)]' +herein: '[here, (omit)]' +hereinafter: '[after this, (omit)]' +hereof: of this +hereto: to this +heretofore: '[until now, previously]' +hereunder: below +herewith: '[with this, below, here, (omit)]' +hitherto: until now +hold in abeyance: '[wait, postpone]' +hope and trust: '[hope, trust]' +however: but +i, the undersigned: I +identical: same +identify: '[name, show]' +if and when: '[if, when]' +illustrate: '[show, explain]' +immediately: '[at once, now]' +impacted: '[affected, changed]' +implement: '[carry out, do, start]' +imply: '[suggest, hint at]' +in a number of cases: '[some, say how many]' +in a timely manner: '[on time, promptly]' +in accordance with: '[as under, in line with, because of, by]' +in addition: '[and, as well as, also, besides, too]' +in advance: before +in an effort to: to +in case of: if +in conjunction with: '[and, with]' +in connection with: '[for, about]' +in consequence: '[because, as a result]' +in excess of: more than +in lieu of: instead of +in order that: '[for, so, so that]' +in order to: to +in receipt of: '[get, have, receive]' +in regard to: '[concerning, on]' +in relation to: '[about, with, to]' +in respect of: '[about, for]' +in the absence of: without +in the amount of: for +in the course of: '[while, during]' +in the event of: if +in the event of/that: if +in the majority of instances: '[most, mostly]' +in the near future: soon +in the neighborhood of: '[about, around]' +in the neighbourhood of: '[about, around]' +in the process of: (omit) +in view of: since +in view of the above: so +in view of the fact that: '[as, because]' +inappropriate: '[wrong, unsuitable]' +inasmuch as: since +inception: '[start, beginning]' +incorporating: which includes +incumbent upon: must +incur: '[have to pay, owe]' +indicate: '[show, suggest, write down]' +indication: sign +inform: tell +initial: first +initially: at first +initiate: '[begin, start]' +inquire: '[ask, question, investigate]' +insert: put in +instances: cases +intend to: will +inter alia: (omit) +interface: '[meet, work with]' +interpose no objection: don't object +intimate: '[say, hint]' +irrespective of: '[despite, even if]' +is applicable to: applies to +is authorized to: may +is compulsory: you must +is in consonance with: '[agrees with, follows]' +is of the opinion: thinks +is responsible for: '[(omit), handles]' +issue: '[give, send, problem]' +it appears: seems +it is: (omit) +it is compulsory: you must +it is essential: '[must, need to]' +it is known that: I/we know that +it is mandatory: you must +it is requested: '[please, we request, I request]' +jeopardize: '[risk, threaten]' +large number of: '[many, most, say how many]' +liaison: discussion +limited number: limits +locality: '[place, area]' +locate: '[find, put]' +magnitude: size +maintain: '[keep, support]' +mandatory: you must +manner: way +manufacture: make +marginal: '[small, slight]' +material: relevant +materialise: '[happen, occur]' +materialize: '[happen, occur]' +maximum: '[largest, most]' +may in the future: '[may, might, could]' +merchandise: goods +methodology: method +middle initial: middle initial +minimize: decrease +minimum: '[least, smallest]' +mislay: lose +modification: change +modify: change +monitor: '[check, watch]' +moreover: '[and, also, as well]' +must issue: '[will pass, will send]' +necessitate: '[cause, need]' +negligible: very small +nevertheless: '[but, however, even so]' +not later than: '[before, by]' +not later than 10 may: by 10 May, before 11 May +not later than 1600: by 1600 +notify: '[tell, let you know, let us know]' +notwithstanding: '[even if, despite, still, yet]' +numerous: '[many, say how many]' +objective: '[aim, goal]' +obligate: '[bind, compel]' +obligatory: you must +observe: see +obtain: '[get, receive]' +occasioned by: '[caused by, because of]' +on a regular basis: (omit) +on behalf of: for +on numerous occasions: often +on request: if you ask +on the grounds that: because +on the occasion that: '[when, if]' +operate: '[use, work, run]' +optimum: '[best, ideal, greatest, most]' +option: '[choice, way]' +ordinarily: '[normally, usually]' +otherwise: or +outstanding: unpaid +owing to: because of +parameters: limits +partially: partly +participate: '[join in, take part]' +particulars: '[details, facts]' +per annum: a year +perceive: '[see, notice, understand]' +perform: do +permissible: allowed +permit: '[let, allow]' +personnel: '[people, staff]' +persons: '[people, anyone]' +pertaining to: about, of, on +pertaining to about: '[of, on]' +peruse: '[read, read carefully, look at]' +place: put +please: (omit) +please find enclosed: I enclose +portion: part +possess: '[have, own]' +possessions: belongings +practicable: practical +practically: '[almost, nearly]' +preclude: prevent +predominant: main +prescribe: '[set, fix]' +preserve: '[keep, protect]' +previous: '[earlier, before, last]' +previously: before +principal: main +prior to: before +prioritize: rank +proceed: go ahead +proceed do: '[go ahead, try]' +procure: '[get, obtain, arrange, (omit)]' +proficiency: skill +profusion of: '[plenty, too many, say how many]' +prohibit: '[ban, stop]' +projected: estimated +prolonged: long +promptly: '[quickly, at once]' +promulgate: '[advertise, announce, issue, publish]' +proportion: part +provide: give +provide give: '[offer, say]' +provided that: '[if, as long as]' +provides guidance for: guides +provisions: '[rules, terms]' +proximity: '[closeness, nearness]' +purchase: buy +pursuant to: '[under, because of, in line with]' +pursuant to by: '[following, per, under]' +reconsider: '[think again about, look again at]' +reduce: cut +reduction: cut +referred to as: called +refers to: '[talks about, mentions]' +reflect: '[say, show]' +regarding: '[about, on, of]' +regulation: rule +reimburse: '[repay, pay back]' +reiterate: '[repeat, restate]' +relating to: about +relative to: '[about, on]' +relocate: move +remain: stay +remainder: '[the rest, what is left]' +remittance: payment +remuneration: '[pay, payment, wages, salary]' +render: '[make, give, send]' +report: tell +represents: '[shows, stands for, is]' +request: '[ask, question, document asking for]' +require: '[need, want, force, must]' +requirement: need +requirements: '[needs, rules]' +reside: live +residence: '[home, where you live]' +restriction: limit +retain: keep +review: '[look at, look at again]' +revised: '[new, changed]' +said, some, such: the, this, that +said/such/same: '[the, this, that]' +scrutinize: '[read carefully, look at carefully]' +select: choose +selection: choice +set forth in: in +set up: '[prove, show]' +settle: pay +shall be issued: (organization) will send +shall issue: '[will pass, will send]' +similar to: like +similarly: '[also, in the same way]' +solely: only +solicit: '[ask for, request]' +specified: '[given, written, set]' +state of the art: latest +state-of-the-art: latest +statutory: '[legal, by law]' +subject: '[the, this, your]' +subject to: '[depending on, under, keeping to]' +submit: '[send, give]' +subsequent: '[later, next]' +subsequent to/upon: after +subsequently: '[after, later, then]' +substantial: '[large, great, a lot of, much]' +substantially: more or less +successfully complete: '[complete, pass]' +sufficient: enough +supplement: '[go with, add to]' +supplementary: '[extra, more]' +supply: '[give, sell, deliver]' +take action to: (omit) +terminate: '[stop, end]' +that being the case: if so +the month of: (omit) +the question as to whether: whether +the tenant: you +the undersigned: I +the use of: (omit) +there are: (omit) +there is: (omit) +thereafter: '[then, afterwards]' +thereby: '[by that, because of that]' +therefore: so +therein: '[in that, there]' +thereof: '[its, their, of that]' +thereto: to that +this activity, command: us, we +thus: so +time period: time +timely: prompt +to date: '[so far, up to now]' +to the extent that: '[if, when]' +transfer: '[change, move]' +transmit: send +ultimately: '[in the end, finally]' +unavailability: lack of +under the provisions of: under +undernoted: the following +undersigned: '[I, we]' +undertake: '[agree, promise, do]' +uniform: '[same, similar]' +unilateral: '[one-sided, one-way]' +unoccupied: empty +until such time: until +until such time as: until +utilisation: use +utilise: use +utilization: use +utilize: use +utilize, utilization: use +validate: confirm +variation: change +verify: '[check, confirm, make sure]' +viable: '[practical, workable]' +vice: '[instead of, versus]' +virtually: '[almost, (omit)]' +visualize: '[see, predict]' +warrant: '[call for, permit]' +ways and means: ways +we have pleasure in: we are glad to +whatsoever: '[whatever, what, any]' +whensoever: when +whereas: '[but, because, since]' +whether or not: whether +will issue: '[will pass, will send]' +with a view to: '[to, so that]' +with effect from: from +with reference to: about +with regard to: '[about, for]' +with respect to: '[about, for]' +with the exception of: except for +with the minimum of delay: '[quickly, say when]' +witnessed: saw +you are requested: please +your attention is drawn to: '[please see, please note]' +your office: you +zone: '[area, region]' diff --git a/src/dayamlchecker/messages.py b/src/dayamlchecker/messages.py index 31725a4..b886a0f 100644 --- a/src/dayamlchecker/messages.py +++ b/src/dayamlchecker/messages.py @@ -14,6 +14,7 @@ class Severity(StrEnum): class FindingClass(StrEnum): GENERAL = "general" ACCESSIBILITY = "accessibility" + STYLE = "style" class MessageId(StrEnum): @@ -65,6 +66,9 @@ class MessageId(StrEnum): FIELD_MODIFIER_DICT_KEYS = "field_modifier_dict_keys" FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING = "field_modifier_unknown_variable_string" FIELD_MODIFIER_CASE = "field_modifier_case" + MISSING_QUESTION_ID = "missing_question_id" + MULTIPLE_MANDATORY_BLOCKS = "multiple_mandatory_blocks" + MISSING_METADATA_FIELDS = "missing_metadata_fields" ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE = "accessibility_combobox_not_accessible" ACCESSIBILITY_NO_LABEL_MULTI_FIELD = "accessibility_no_label_multi_field" @@ -77,6 +81,83 @@ class MessageId(StrEnum): ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP = "accessibility_html_heading_level_skip" ACCESSIBILITY_EMPTY_LINK_TEXT = "accessibility_empty_link_text" ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT = "accessibility_non_descriptive_link_text" + ACCESSIBILITY_YESNO_SHORTCUT = "accessibility_yesno_shortcut" + ACCESSIBILITY_FIELD_MISSING_LABEL = "accessibility_field_missing_label" + ACCESSIBILITY_NON_DESCRIPTIVE_FIELD_LABEL = ( + "accessibility_non_descriptive_field_label" + ) + ACCESSIBILITY_BLANK_CHOICE_LABEL = "accessibility_blank_choice_label" + ACCESSIBILITY_NON_DESCRIPTIVE_CHOICE_LABEL = ( + "accessibility_non_descriptive_choice_label" + ) + ACCESSIBILITY_DUPLICATE_FIELD_LABEL = "accessibility_duplicate_field_label" + ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS = ( + "accessibility_color_only_instructions" + ) + ACCESSIBILITY_INLINE_COLOR_STYLING = "accessibility_inline_color_styling" + ACCESSIBILITY_AMBIGUOUS_LINK_DESTINATIONS = ( + "accessibility_ambiguous_link_destinations" + ) + ACCESSIBILITY_NEW_TAB_WITHOUT_WARNING = ( + "accessibility_new_tab_without_warning" + ) + ACCESSIBILITY_SVG_MISSING_ACCESSIBLE_NAME = ( + "accessibility_svg_missing_accessible_name" + ) + ACCESSIBILITY_TABLE_MISSING_HEADERS = ( + "accessibility_table_missing_headers" + ) + ACCESSIBILITY_LAYOUT_TABLE_NEEDS_REVIEW = ( + "accessibility_layout_table_needs_review" + ) + ACCESSIBILITY_POSITIVE_TABINDEX = "accessibility_positive_tabindex" + ACCESSIBILITY_CLICKABLE_NON_CONTROL_HTML = ( + "accessibility_clickable_non_control_html" + ) + ACCESSIBILITY_REQUIRED_FIELD_NOT_INDICATED = ( + "accessibility_required_field_not_indicated" + ) + ACCESSIBILITY_VALIDATION_WITHOUT_GUIDANCE = ( + "accessibility_validation_without_guidance" + ) + ACCESSIBILITY_GENERIC_VALIDATION_MESSAGE = ( + "accessibility_generic_validation_message" + ) + ACCESSIBILITY_AMBIGUOUS_BUTTON_TEXT = ( + "accessibility_ambiguous_button_text" + ) + + STYLE_SUBQUESTION_H1 = "style_subquestion_h1" + STYLE_CHOICES_WITHOUT_STABLE_VALUES = "style_choices_without_stable_values" + STYLE_REMOVE_LANGUAGE_EN = "style_remove_language_en" + STYLE_HARDCODED_USER_TEXT_IN_CODE = "style_hardcoded_user_text_in_code" + STYLE_MISSING_SCREEN_TITLE = "style_missing_screen_title" + STYLE_PLACEHOLDER_LANGUAGE = "style_placeholder_language" + STYLE_PLAIN_LANGUAGE_REPLACEMENT = "style_plain_language_replacement" + STYLE_VARIABLE_ROOT_NOT_SNAKE_CASE = "style_variable_root_not_snake_case" + STYLE_LONG_SENTENCE = "style_long_sentence" + STYLE_COMPOUND_QUESTION = "style_compound_question" + STYLE_OVERLONG_QUESTION_LABEL = "style_overlong_question_label" + STYLE_OVERLONG_FIELD_LABEL = "style_overlong_field_label" + STYLE_TOO_MANY_FIELDS_ON_SCREEN = "style_too_many_fields_on_screen" + STYLE_WALL_OF_TEXT = "style_wall_of_text" + STYLE_COMPLEX_SCREEN_MISSING_HELP = "style_complex_screen_missing_help" + STYLE_MISSING_EXIT_CRITERIA_SCREEN = "style_missing_exit_criteria_screen" + STYLE_MISSING_CUSTOM_THEME = "style_missing_custom_theme" + STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS = ( + "style_review_screen_missing_edit_links" + ) + STYLE_REVIEW_SCREEN_MISSING_KEY_CHOICE_EDITS = ( + "style_review_screen_missing_key_choice_edits" + ) + STYLE_PREFER_PERSON_OBJECTS = "style_prefer_person_objects" + STYLE_QUESTION_LEVEL_HELP = "style_question_level_help" + STYLE_TONE_AND_RESPECT = "style_tone_and_respect" + STYLE_PLAIN_LANGUAGE_REWRITE_OPPORTUNITY = ( + "style_plain_language_rewrite_opportunity" + ) + STYLE_LLM_CONFIGURATION_ERROR = "style_llm_configuration_error" + STYLE_LLM_REQUEST_FAILED = "style_llm_request_failed" URL_CONCATENATED_ERROR = "url_concatenated_error" URL_CONCATENATED_WARNING = "url_concatenated_warning" @@ -432,6 +513,30 @@ class MessageDefinition: 'case-sensitive; use "{suggested_key}"' ), ), + MessageId.MISSING_QUESTION_ID: MessageDefinition( + code="EG414", + severity=Severity.ERROR, + finding_class=FindingClass.GENERAL, + summary="Question block is missing an id", + template="question block is missing an `id`: {snippet}", + ), + MessageId.MULTIPLE_MANDATORY_BLOCKS: MessageDefinition( + code="EG415", + severity=Severity.ERROR, + finding_class=FindingClass.GENERAL, + summary="Interview has more than one mandatory block", + template="interview has more than one `mandatory: True` block: {labels}", + ), + MessageId.MISSING_METADATA_FIELDS: MessageDefinition( + code="IG416", + severity=Severity.INFO, + finding_class=FindingClass.GENERAL, + summary="Metadata block is missing common CourtFormsOnline fields", + template=( + "metadata block is missing common CourtFormsOnline publishing fields: " + "{fields}" + ), + ), MessageId.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE: MessageDefinition( code="EA501", severity=Severity.ERROR, @@ -510,6 +615,324 @@ class MessageDefinition: summary="Link text is too generic", template="link text in {section_location} is too generic: {text}", ), + MessageId.ACCESSIBILITY_YESNO_SHORTCUT: MessageDefinition( + code="EA510", + severity=Severity.ERROR, + finding_class=FindingClass.ACCESSIBILITY, + summary="Question uses yesno shorthand", + template="screen uses `{shortcut}` question shorthand; prefer `fields` with an explicit datatype", + ), + MessageId.ACCESSIBILITY_FIELD_MISSING_LABEL: MessageDefinition( + code="EA511", + severity=Severity.ERROR, + finding_class=FindingClass.ACCESSIBILITY, + summary="Field appears to have no visible label", + template="field appears to collect user input but has no visible label: {field_name}", + ), + MessageId.ACCESSIBILITY_NON_DESCRIPTIVE_FIELD_LABEL: MessageDefinition( + code="WA512", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Field label may be too vague", + template="field label may be too vague for assistive technology users: {snippet}", + ), + MessageId.ACCESSIBILITY_BLANK_CHOICE_LABEL: MessageDefinition( + code="EA513", + severity=Severity.ERROR, + finding_class=FindingClass.ACCESSIBILITY, + summary="Choice label is blank", + template="choice label is blank in {location}", + ), + MessageId.ACCESSIBILITY_NON_DESCRIPTIVE_CHOICE_LABEL: MessageDefinition( + code="WA514", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Choice label may be too vague", + template="choice label in {location} may be too vague: {snippet}", + ), + MessageId.ACCESSIBILITY_DUPLICATE_FIELD_LABEL: MessageDefinition( + code="WA515", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Multiple fields share the same label", + template="multiple fields on this screen share the same label text: {labels}", + ), + MessageId.ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS: MessageDefinition( + code="WA516", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Text may rely on color or symbols alone", + template="text in {section_location} may rely on color or symbols alone to convey meaning: {snippet}", + ), + MessageId.ACCESSIBILITY_INLINE_COLOR_STYLING: MessageDefinition( + code="WA517", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Text uses inline or semantic color styling", + template="text in {section_location} uses inline or semantic color styling; verify contrast and non-color cues: {snippet}", + ), + MessageId.ACCESSIBILITY_AMBIGUOUS_LINK_DESTINATIONS: MessageDefinition( + code="WA518", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Same link text points to multiple destinations", + template="same link text points to multiple destinations on this screen: {snippet}", + ), + MessageId.ACCESSIBILITY_NEW_TAB_WITHOUT_WARNING: MessageDefinition( + code="WA519", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Link opens a new tab without warning text", + template="link in {section_location} opens a new tab or window without warning text: {snippet}", + ), + MessageId.ACCESSIBILITY_SVG_MISSING_ACCESSIBLE_NAME: MessageDefinition( + code="WA520", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Inline SVG is missing an accessible name", + template="inline SVG in {section_location} is missing a title or ARIA label: {snippet}", + ), + MessageId.ACCESSIBILITY_TABLE_MISSING_HEADERS: MessageDefinition( + code="EA521", + severity=Severity.ERROR, + finding_class=FindingClass.ACCESSIBILITY, + summary="Table appears to need header cells", + template="table in {section_location} appears to be data but has no `` headers: {snippet}", + ), + MessageId.ACCESSIBILITY_LAYOUT_TABLE_NEEDS_REVIEW: MessageDefinition( + code="WA522", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Table needs review for layout-only usage", + template="table in {section_location} has no headers or caption; confirm it is not layout-only: {snippet}", + ), + MessageId.ACCESSIBILITY_POSITIVE_TABINDEX: MessageDefinition( + code="EA523", + severity=Severity.ERROR, + finding_class=FindingClass.ACCESSIBILITY, + summary="Positive tabindex disrupts focus order", + template="HTML in {section_location} uses `tabindex` greater than 0: {snippet}", + ), + MessageId.ACCESSIBILITY_CLICKABLE_NON_CONTROL_HTML: MessageDefinition( + code="WA524", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Clickable non-control element may lack keyboard support", + template="`<{tag_name}>` in {section_location} uses `onclick` without clear keyboard semantics: {snippet}", + ), + MessageId.ACCESSIBILITY_REQUIRED_FIELD_NOT_INDICATED: MessageDefinition( + code="WA525", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Required field may not be indicated clearly", + template="required field may not clearly indicate that it is required: {snippet}", + ), + MessageId.ACCESSIBILITY_VALIDATION_WITHOUT_GUIDANCE: MessageDefinition( + code="WA526", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Validation constraints lack guidance", + template="field has validation constraints but no hint, help, or validation message: {snippet}", + ), + MessageId.ACCESSIBILITY_GENERIC_VALIDATION_MESSAGE: MessageDefinition( + code="WA527", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Validation message may be too generic", + template="validation message may be too generic to help users recover: {snippet}", + ), + MessageId.ACCESSIBILITY_AMBIGUOUS_BUTTON_TEXT: MessageDefinition( + code="WA528", + severity=Severity.WARNING, + finding_class=FindingClass.ACCESSIBILITY, + summary="Button text may be too vague", + template="button text may be too vague out of context: {snippet}", + ), + MessageId.STYLE_SUBQUESTION_H1: MessageDefinition( + code="ES701", + severity=Severity.ERROR, + finding_class=FindingClass.STYLE, + summary="Subquestion contains an H1 heading", + template="subquestion contains an H1 heading; use H2+ inside body text: {snippet}", + ), + MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES: MessageDefinition( + code="ES702", + severity=Severity.ERROR, + finding_class=FindingClass.STYLE, + summary="Choices are missing stable values", + template="{origin} includes labels without explicit stable values: {snippet}", + ), + MessageId.STYLE_REMOVE_LANGUAGE_EN: MessageDefinition( + code="ES703", + severity=Severity.ERROR, + finding_class=FindingClass.STYLE, + summary="Explicit language: en is unnecessary", + template="remove `language: en`; English is the default in docassemble interviews", + ), + MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE: MessageDefinition( + code="WS704", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="User-facing text appears inside a code block", + template="move user-facing text out of `code:` blocks: {snippet}", + ), + MessageId.STYLE_MISSING_SCREEN_TITLE: MessageDefinition( + code="WS710", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Screen is missing a meaningful title", + template="screen appears to have user-facing content but no meaningful `question`: {snippet}", + ), + MessageId.STYLE_PLACEHOLDER_LANGUAGE: MessageDefinition( + code="WS711", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Placeholder language detected", + template="placeholder language appears in {location}: {snippet}", + ), + MessageId.STYLE_PLAIN_LANGUAGE_REPLACEMENT: MessageDefinition( + code="IS712", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Prefer a plainer word or phrase", + template=( + "consider replacing {matched_text!r} in {location} with simpler language, " + "such as {replacement!r}" + ), + ), + MessageId.STYLE_VARIABLE_ROOT_NOT_SNAKE_CASE: MessageDefinition( + code="WS713", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Variable roots should use snake_case", + template="variable roots should use snake_case: {roots}", + ), + MessageId.STYLE_LONG_SENTENCE: MessageDefinition( + code="WS714", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Sentence is probably too long", + template="sentence in {location} exceeds 20 words: {snippet}", + ), + MessageId.STYLE_COMPOUND_QUESTION: MessageDefinition( + code="IS715", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Question may ask about more than one thing", + template="potential compound question in {location}: {snippet}", + ), + MessageId.STYLE_OVERLONG_QUESTION_LABEL: MessageDefinition( + code="WS716", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Question title is probably too long", + template="question text exceeds 120 characters: {snippet}", + ), + MessageId.STYLE_OVERLONG_FIELD_LABEL: MessageDefinition( + code="WS717", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Field label is probably too long", + template="field label exceeds 90 characters: {snippet}", + ), + MessageId.STYLE_TOO_MANY_FIELDS_ON_SCREEN: MessageDefinition( + code="WS718", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Screen contains too many fields", + template="screen contains {field_count} fields; consider splitting it into smaller steps", + ), + MessageId.STYLE_WALL_OF_TEXT: MessageDefinition( + code="WS719", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Subquestion may be a wall of text", + template="subquestion has a long unstructured block of text: {snippet}", + ), + MessageId.STYLE_COMPLEX_SCREEN_MISSING_HELP: MessageDefinition( + code="IS720", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Complex screen has no inline help", + template="complex screen has no help, hint, or note text: {snippet}", + ), + MessageId.STYLE_MISSING_EXIT_CRITERIA_SCREEN: MessageDefinition( + code="IS721", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Eligibility screening may need an exit screen", + template=( + "interview appears to screen for eligibility but no clear ineligible or exit screen was detected" + ), + ), + MessageId.STYLE_MISSING_CUSTOM_THEME: MessageDefinition( + code="IS722", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Root interview may be missing a custom theme", + template=( + "metadata suggests a root interview file, but no explicit custom theme or CSS dependency was detected" + ), + ), + MessageId.STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS: MessageDefinition( + code="WS723", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Review screen has no edit links", + template="review screen detected but no editable `Edit:` links were found", + ), + MessageId.STYLE_REVIEW_SCREEN_MISSING_KEY_CHOICE_EDITS: MessageDefinition( + code="IS724", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Review screen may not edit key decision fields", + template="review screen exists but does not appear to allow editing key decision fields: {snippet}", + ), + MessageId.STYLE_PREFER_PERSON_OBJECTS: MessageDefinition( + code="IS725", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Interview may benefit from person objects", + template="interview appears to use disconnected name or address variables; prefer ALIndividual or ALPeopleList patterns: {snippet}", + ), + MessageId.STYLE_QUESTION_LEVEL_HELP: MessageDefinition( + code="WS726", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Question-level help is not recommended", + template=( + "avoid `help:` as a question modifier; use `collapse_template()` in " + "`subquestion` or another inline pattern instead: {snippet}" + ), + ), + MessageId.STYLE_TONE_AND_RESPECT: MessageDefinition( + code="WS790", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Potential tone or respect issue", + template="{message}", + ), + MessageId.STYLE_PLAIN_LANGUAGE_REWRITE_OPPORTUNITY: MessageDefinition( + code="WS791", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Potential plain-language rewrite opportunity", + template="{message}", + ), + MessageId.STYLE_LLM_CONFIGURATION_ERROR: MessageDefinition( + code="ES792", + severity=Severity.ERROR, + finding_class=FindingClass.STYLE, + summary="Style LLM checks are misconfigured", + template="{detail}", + ), + MessageId.STYLE_LLM_REQUEST_FAILED: MessageDefinition( + code="ES793", + severity=Severity.ERROR, + finding_class=FindingClass.STYLE, + summary="Style LLM request failed", + template='LLM-backed style rule "{rule_id}" could not run: {detail}', + ), MessageId.URL_CONCATENATED_ERROR: MessageDefinition( code="EG601", severity=Severity.ERROR, diff --git a/src/dayamlchecker/style.py b/src/dayamlchecker/style.py new file mode 100644 index 0000000..d2b0f27 --- /dev/null +++ b/src/dayamlchecker/style.py @@ -0,0 +1,1461 @@ +from __future__ import annotations + +import ast +from dataclasses import dataclass +from functools import lru_cache +import html +import importlib.resources +import json +import os +import re +from typing import Any, Iterable, Optional + +from dayamlchecker.accessibility import ( + _absolute_line_number, + _extract_field_label, + _extract_field_variable, + _find_top_level_key_line, + _iter_fields, +) +from dayamlchecker.messages import Finding, FindingDraft, MessageId, draft, make_finding +import requests +from ruamel.yaml import YAML + +VISIBLE_TEXT_KEYS = ("question", "subquestion", "under", "help", "note", "html") +_OPENAI_BASE_URL_ENV = "OPENAI_BASE_URL" +_OPENAI_API_KEY_ENV = "OPENAI_API_KEY" +_OPENAI_MODEL_ENV = "OPENAI_MODEL" +_DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1" +_DEFAULT_OPENAI_MODEL = "gpt-4o-mini" +_MAKO_EXPR_RE = re.compile(r"\$\{.*?\}", re.DOTALL) +_MAKO_BLOCK_RE = re.compile(r"<%[\s\S]*?%>") +_MAKO_CONTROL_RE = re.compile(r"(?m)^\s*%.*$") +_MARKDOWN_IMAGE_RE = re.compile(r"!\[([^\]]*)\]\([^)]+\)") +_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\([^)]+\)") +_HTML_TAG_RE = re.compile(r"<[^>]+>") +_MARKDOWN_CODE_RE = re.compile(r"`([^`]+)`") +_FILE_TAG_RE = re.compile( + r"\[FILE\s+([^,\]]+)(?:\s*,\s*([^,\]]+))?(?:\s*,\s*([^\]]+))?\]", + re.IGNORECASE, +) +_SENTENCE_RE = re.compile(r"[^.!?]+[.!?]") +_WORD_RE = re.compile(r"\b\w+\b") +_COMPOUND_QUESTION_RE = re.compile( + r"\b(?:and|or)\s+" + r"(?:who|what|when|where|why|how|do|does|did|is|are|am|was|were|" + r"can|could|will|would|should|have|has|had)\b", + re.IGNORECASE, +) +_PLACEHOLDER_PATTERNS = ( + re.compile(r"\bplaceholder\b", re.IGNORECASE), + re.compile(r"\blorem ipsum\b", re.IGNORECASE), + re.compile(r"\btodo\b", re.IGNORECASE), + re.compile(r"\btbd\b", re.IGNORECASE), + re.compile(r"\bto be determined\b", re.IGNORECASE), + re.compile(r"\bcoming soon\b", re.IGNORECASE), + re.compile(r"\[insert[^\]]*\]", re.IGNORECASE), + re.compile(r"\byour text here\b", re.IGNORECASE), +) +_USER_FACING_CODE_CALLS = frozenset( + {"action_button_html", "action_menu_item", "validation_error", "word"} +) +_USER_FACING_CODE_NAME_PARTS = frozenset( + { + "body", + "button", + "caption", + "error", + "footer", + "header", + "heading", + "help", + "hint", + "instruction", + "instructions", + "intro", + "label", + "menu", + "message", + "note", + "prompt", + "question", + "subject", + "subquestion", + "title", + "warning", + } +) +_USER_FACING_CODE_EXACT_NAMES = frozenset({"interview_short_title"}) + + +@dataclass(frozen=True) +class ParsedInterviewDocument: + doc: dict[str, Any] + source_code: str + document_start_line: int + index: int + + @property + def screen_id(self) -> str: + for key in ("id", "event"): + value = _stringify(self.doc.get(key)).strip() + if value: + return value + return f"block-{self.index}" + + def line_for_key(self, key: str) -> int: + key_line = _find_top_level_key_line(self.source_code, key) + if key_line is not None: + return _absolute_line_number( + self.source_code, + self.document_start_line, + key_line, + f"{key}:", + ) + return self.document_start_line + self.doc.get("__line__", 1) - 1 + + def line_for_field(self, field: dict[str, Any]) -> int: + return ( + self.document_start_line + + field.get("__line__", self.doc.get("__line__", 1)) + - 1 + ) + + def default_line(self) -> int: + question = _stringify(self.doc.get("question")).strip() + if question: + return self.line_for_key("question") + return self.document_start_line + self.doc.get("__line__", 1) - 1 + + +@dataclass(frozen=True) +class TextEntry: + location: str + text: str + line_number: int + screen_id: str + + +@dataclass(frozen=True) +class StyleLintOptions: + enabled: bool = False + include_llm: bool = False + openai_base_url: str | None = None + openai_api_key: str | None = None + openai_model: str | None = None + + def llm_enabled(self) -> bool: + return self.include_llm + + def resolved_openai_base_url(self) -> str: + return ( + _stringify(self.openai_base_url).strip() + or os.getenv(_OPENAI_BASE_URL_ENV, "").strip() + or _DEFAULT_OPENAI_BASE_URL + ) + + def resolved_openai_api_key(self) -> str: + return ( + _stringify(self.openai_api_key).strip() + or os.getenv(_OPENAI_API_KEY_ENV, "").strip() + ) + + def resolved_openai_model(self) -> str: + return ( + _stringify(self.openai_model).strip() + or os.getenv(_OPENAI_MODEL_ENV, "").strip() + or _DEFAULT_OPENAI_MODEL + ) + + +def _style_draft( + message_id: str, + *, + line_number: int = 1, + screen_id: str | None = None, + **context: Any, +) -> FindingDraft: + payload = dict(context) + if screen_id: + payload["screen_id"] = screen_id + return draft(message_id, line_number=line_number, **payload) + + +def find_style_findings( + *, + docs: Iterable[ParsedInterviewDocument], + input_file: str | None, + options: Optional[StyleLintOptions] = None, +) -> list[Finding]: + resolved_options = options or StyleLintOptions() + parsed_docs = list(docs) + deterministic: list[Finding] = [] + + for check in ( + _check_subquestion_h1, + _check_choices_without_stable_values, + _check_language_en_flag, + _check_hardcoded_strings_in_code, + _check_empty_screen_title, + _check_placeholder_language, + _check_plain_language_replacements, + _check_variable_conventions, + _check_long_sentences, + _check_compound_questions, + _check_overlong_labels, + _check_too_many_fields, + _check_wall_of_text, + _check_question_level_help, + _check_missing_help_on_complex_screens, + _check_exit_criteria_and_screen, + _check_theme_usage, + _check_review_screen_editability, + _check_prefer_person_objects, + ): + deterministic.extend( + finding.to_finding(file_name=input_file or "") + for finding in check(parsed_docs) + ) + + if not resolved_options.llm_enabled(): + return _dedupe_findings(deterministic) + + llm_api_key = resolved_options.resolved_openai_api_key() + if not llm_api_key: + deterministic.append( + make_finding( + MessageId.STYLE_LLM_CONFIGURATION_ERROR, + file_name=input_file, + line_number=parsed_docs[0].default_line() if parsed_docs else 1, + screen_id=parsed_docs[0].screen_id if parsed_docs else "", + detail=( + "style LLM checks require an API key via " + "--openai-api-key or OPENAI_API_KEY" + ), + ) + ) + return _dedupe_findings(deterministic) + + deterministic.extend( + _run_llm_rules( + parsed_docs=parsed_docs, + input_file=input_file, + options=resolved_options, + ) + ) + return _dedupe_findings(deterministic) + + +def _check_subquestion_h1(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + subquestion = _stringify(parsed_doc.doc.get("subquestion")) + match = re.search(r"(?m)^\s*#\s+.*$", subquestion) + if not match: + continue + findings.append( + _style_draft( + MessageId.STYLE_SUBQUESTION_H1, + line_number=parsed_doc.line_for_key("subquestion"), + screen_id=parsed_doc.screen_id, + snippet=_shorten(match.group(0)), + ) + ) + return findings + + +def _check_choices_without_stable_values( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + + def has_unstable_choices(choices: Any) -> bool: + if not isinstance(choices, list): + return False + for item in choices: + if isinstance(item, str) and ": " not in item: + return True + if not isinstance(item, dict): + continue + if len(item) == 1 and "label" not in item and "value" not in item: + continue + if "label" in item and "value" not in item: + return True + return False + + for parsed_doc in docs: + for key in ("choices", "dropdown", "buttons"): + value = parsed_doc.doc.get(key) + if not has_unstable_choices(value): + continue + findings.append( + _style_draft( + MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES, + line_number=parsed_doc.line_for_key(key), + screen_id=parsed_doc.screen_id, + origin=key, + snippet=_shorten(value), + ) + ) + for field in _iter_fields(parsed_doc.doc): + choices = field.get("choices") + if not has_unstable_choices(choices): + continue + findings.append( + _style_draft( + MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + origin="field choices", + snippet=_shorten(choices), + ) + ) + return findings + + +def _check_language_en_flag(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + if _stringify(parsed_doc.doc.get("language")).strip().lower() != "en": + continue + findings.append( + _style_draft( + MessageId.STYLE_REMOVE_LANGUAGE_EN, + line_number=parsed_doc.line_for_key("language"), + screen_id=parsed_doc.screen_id, + ) + ) + return findings + + +def _check_hardcoded_strings_in_code( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + code = _stringify(parsed_doc.doc.get("code")) + if not code: + continue + for content in _iter_user_facing_code_strings(code): + normalized = content.strip() + if _looks_user_facing_code_string(normalized): + findings.append( + _style_draft( + MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE, + line_number=parsed_doc.line_for_key("code"), + screen_id=parsed_doc.screen_id, + snippet=_shorten(normalized), + ) + ) + break + return findings + + +def _check_empty_screen_title( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + question_text = _plain_text( + _visible_text(parsed_doc.doc.get("question")) + ).strip() + if question_text: + continue + has_fields = len(_iter_fields(parsed_doc.doc)) > 0 + supplemental = " ".join( + _plain_text(_visible_text(parsed_doc.doc.get(key))) + for key in ("subquestion", "under", "help", "note", "html") + ).strip() + if not has_fields and len(supplemental) < 60: + continue + findings.append( + _style_draft( + MessageId.STYLE_MISSING_SCREEN_TITLE, + line_number=parsed_doc.default_line(), + screen_id=parsed_doc.screen_id, + snippet=_shorten(supplemental or "question is blank"), + ) + ) + return findings + + +def _check_placeholder_language( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _iter_doc_texts(docs): + plain = _plain_text(entry.text) + for pattern in _PLACEHOLDER_PATTERNS: + match = pattern.search(plain) + if not match: + continue + findings.append( + _style_draft( + MessageId.STYLE_PLACEHOLDER_LANGUAGE, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + snippet=_shorten(match.group(0)), + ) + ) + break + return findings + + +def _check_plain_language_replacements( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + seen: set[tuple[str, str, str]] = set() + for entry in _iter_doc_texts(docs): + for matched_text, replacement in _find_plain_language_suggestions(entry.text): + key = (entry.screen_id, entry.location, matched_text.strip().lower()) + if key in seen: + continue + seen.add(key) + findings.append( + _style_draft( + MessageId.STYLE_PLAIN_LANGUAGE_REPLACEMENT, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + matched_text=matched_text, + replacement=_format_plain_language_replacement(replacement), + ) + ) + return findings + + +def _check_variable_conventions( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + valid_root_re = re.compile(r"^[a-z][a-z0-9_]*$") + bad_roots: dict[str, tuple[int, str]] = {} + for parsed_doc in docs: + for value, line_number in _variable_references(parsed_doc): + root = re.split(r"[.\[]", value, maxsplit=1)[0].strip() + if not root or valid_root_re.fullmatch(root): + continue + bad_roots.setdefault(root, (line_number, parsed_doc.screen_id)) + if not bad_roots: + return [] + first_root = sorted(bad_roots.items(), key=lambda item: (item[1][0], item[0]))[0] + return [ + _style_draft( + MessageId.STYLE_VARIABLE_ROOT_NOT_SNAKE_CASE, + line_number=first_root[1][0], + screen_id=first_root[1][1], + roots=", ".join(sorted(bad_roots)), + ) + ] + + +def _check_long_sentences(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _user_facing_text_entries(docs): + plain = _plain_text(entry.text) + for sentence in _SENTENCE_RE.findall(plain): + if len(_WORD_RE.findall(sentence)) <= 20: + continue + findings.append( + _style_draft( + MessageId.STYLE_LONG_SENTENCE, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + snippet=_shorten(sentence), + ) + ) + break + return findings + + +def _check_compound_questions( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _question_text_entries(docs): + plain = _plain_text(entry.text).lower() + if "?" not in plain: + continue + if "and/or" not in plain and not _COMPOUND_QUESTION_RE.search(plain): + continue + findings.append( + _style_draft( + MessageId.STYLE_COMPOUND_QUESTION, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + snippet=_shorten(_plain_text(entry.text)), + ) + ) + return findings + + +def _check_overlong_labels(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + question = _plain_text(_stringify(parsed_doc.doc.get("question"))) + if len(question) > 120: + findings.append( + _style_draft( + MessageId.STYLE_OVERLONG_QUESTION_LABEL, + line_number=parsed_doc.line_for_key("question"), + screen_id=parsed_doc.screen_id, + snippet=_shorten(question), + ) + ) + for field in _iter_fields(parsed_doc.doc): + field_label = _extract_field_label(field) + if len(field_label) <= 90: + continue + findings.append( + _style_draft( + MessageId.STYLE_OVERLONG_FIELD_LABEL, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(field_label), + ) + ) + break + return findings + + +def _check_too_many_fields(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + field_count = len(_iter_fields(parsed_doc.doc)) + if field_count <= 6: + continue + findings.append( + _style_draft( + MessageId.STYLE_TOO_MANY_FIELDS_ON_SCREEN, + line_number=parsed_doc.default_line(), + screen_id=parsed_doc.screen_id, + field_count=field_count, + ) + ) + return findings + + +def _check_wall_of_text(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + subquestion = _stringify(parsed_doc.doc.get("subquestion")) + plain = _plain_text(subquestion) + word_count = len(_WORD_RE.findall(plain)) + has_structure = bool( + re.search(r"(?m)^\s*[-*]\s+", subquestion) + or re.search(r"(?m)^\s*#{2,6}\s+", subquestion) + ) + if word_count <= 120 or has_structure: + continue + findings.append( + _style_draft( + MessageId.STYLE_WALL_OF_TEXT, + line_number=parsed_doc.line_for_key("subquestion"), + screen_id=parsed_doc.screen_id, + snippet=_shorten(plain), + ) + ) + return findings + + +def _check_question_level_help( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + help_value = parsed_doc.doc.get("help") + if help_value is None: + continue + if not any( + _stringify(parsed_doc.doc.get(key)).strip() + for key in ("question", "subquestion", "field") + ) and not parsed_doc.doc.get("fields"): + continue + help_text = _stringify(help_value).strip() + findings.append( + _style_draft( + MessageId.STYLE_QUESTION_LEVEL_HELP, + line_number=parsed_doc.line_for_key("help"), + screen_id=parsed_doc.screen_id, + snippet=_shorten(help_text or "help"), + ) + ) + return findings + + +def _check_missing_help_on_complex_screens( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + fields = _iter_fields(parsed_doc.doc) + if len(fields) < 5: + continue + has_help = bool(parsed_doc.doc.get("help")) + for field in fields: + if field.get("help") or field.get("hint") or field.get("note"): + has_help = True + break + if has_help: + continue + sample_labels = [ + _extract_field_label(field) or _extract_field_variable(field) + for field in fields + ] + findings.append( + _style_draft( + MessageId.STYLE_COMPLEX_SCREEN_MISSING_HELP, + line_number=parsed_doc.default_line(), + screen_id=parsed_doc.screen_id, + snippet=_shorten( + ", ".join(label for label in sample_labels if label) + or f"{len(fields)} fields" + ), + ) + ) + return findings + + +def _check_exit_criteria_and_screen( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + metadata = _find_metadata(docs) + screening_signal = bool(_stringify(metadata.get("can_I_use_this_form")).strip()) + for parsed_doc in docs: + combined = " ".join( + _stringify(parsed_doc.doc.get(key)) + for key in ("question", "subquestion", "id", "event") + ).lower() + if any( + marker in combined + for marker in ( + "can i use", + "eligible", + "qualify", + "right form", + "wrong form", + ) + ): + screening_signal = True + break + if not screening_signal: + return [] + for parsed_doc in docs: + combined = " ".join( + _stringify(parsed_doc.doc.get(key)) + for key in ("question", "subquestion", "under", "id", "event") + ).lower() + if any( + marker in combined + for marker in ( + "not eligible", + "may not be able", + "cannot help", + "can't help", + "wrong form", + "stop here", + "exit", + ) + ): + return [] + line_number = docs[0].default_line() if docs else 1 + return [ + _style_draft( + MessageId.STYLE_MISSING_EXIT_CRITERIA_SCREEN, + line_number=line_number, + screen_id=docs[0].screen_id if docs else None, + ) + ] + + +def _check_theme_usage(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + metadata_docs = [parsed_doc for parsed_doc in docs if isinstance(parsed_doc.doc.get("metadata"), dict)] + if not metadata_docs: + return [] + theme_references: set[str] = set() + for parsed_doc in docs: + include_value = parsed_doc.doc.get("include") + theme_references.update(_iter_include_values(include_value)) + css_value = _stringify(parsed_doc.doc.get("css")).strip().lower() + if css_value: + theme_references.add(css_value) + features = parsed_doc.doc.get("features") + if isinstance(features, dict): + bootstrap_theme = _stringify(features.get("bootstrap theme")).strip().lower() + if bootstrap_theme: + theme_references.add(bootstrap_theme) + if any( + marker in reference + for reference in theme_references + for marker in ("theme", "css", "bootstrap") + ): + return [] + metadata_doc = metadata_docs[0] + return [ + _style_draft( + MessageId.STYLE_MISSING_CUSTOM_THEME, + line_number=metadata_doc.line_for_key("metadata"), + screen_id=metadata_doc.screen_id, + ) + ] + + +def _check_review_screen_editability( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + review_docs = [parsed_doc for parsed_doc in docs if _is_review_screen(parsed_doc)] + if not review_docs: + return [] + + editable_variables: set[str] = set() + for parsed_doc in review_docs: + editable_variables.update(_review_edit_variables(parsed_doc.doc.get("review"))) + + findings: list[FindingDraft] = [] + if not editable_variables: + review_doc = review_docs[0] + findings.append( + _style_draft( + MessageId.STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS, + line_number=review_doc.default_line(), + screen_id=review_doc.screen_id, + ) + ) + return findings + + key_choice_variables = sorted( + { + field_var + for parsed_doc in docs + for field in _iter_fields(parsed_doc.doc) + if _field_looks_like_key_choice(field) + for field_var in [_extract_field_variable(field).strip()] + if field_var + } + ) + if key_choice_variables and not any( + _variable_name_matches(edit_name, key_choice) + for edit_name in editable_variables + for key_choice in key_choice_variables + ): + review_doc = review_docs[0] + findings.append( + _style_draft( + MessageId.STYLE_REVIEW_SCREEN_MISSING_KEY_CHOICE_EDITS, + line_number=review_doc.default_line(), + screen_id=review_doc.screen_id, + snippet=", ".join(key_choice_variables[:4]), + ) + ) + return findings + + +def _check_prefer_person_objects( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + references = [ + (reference, line_number, parsed_doc.screen_id) + for parsed_doc in docs + for reference, line_number in _variable_references(parsed_doc) + ] + if not references: + return [] + if any( + marker in reference + for reference, _, _ in references + for marker in (".name.", ".address.", ".birthdate", ".gender") + ): + return [] + name_parts = [ + item + for item in references + if re.search(r"(first|middle|last|full)_name$", item[0]) + ] + address_parts = [ + item + for item in references + if re.search(r"(address|street|unit|city|state|zip|postal_code)$", item[0]) + ] + if len(name_parts) < 2 and len(address_parts) < 3: + return [] + first_reference, line_number, screen_id = sorted( + name_parts + address_parts, + key=lambda item: (item[1], item[0]), + )[0] + return [ + _style_draft( + MessageId.STYLE_PREFER_PERSON_OBJECTS, + line_number=line_number, + screen_id=screen_id, + snippet=first_reference, + ) + ] + + +def _run_llm_rules( + *, + parsed_docs: list[ParsedInterviewDocument], + input_file: str | None, + options: StyleLintOptions, +) -> list[Finding]: + prompts = _load_llm_prompt_templates() + llm_rules = prompts.get("llm_rules") + if not isinstance(llm_rules, list): + return [] + screen_payload = _build_screen_payload(parsed_docs) + if not screen_payload: + return [] + findings: list[Finding] = [] + for rule in llm_rules: + if not isinstance(rule, dict): + continue + rule_id = _stringify(rule.get("rule_id")).strip() + system_prompt = _stringify(rule.get("system_prompt")) + user_prompt = _stringify(rule.get("user_prompt")).replace( + "{screens_json}", screen_payload + ) + raw_response, error_detail = _call_openai_chat_completion( + system_prompt=system_prompt, + user_prompt=user_prompt, + base_url=options.resolved_openai_base_url(), + api_key=options.resolved_openai_api_key(), + model=options.resolved_openai_model(), + ) + if error_detail is not None: + findings.append( + make_finding( + MessageId.STYLE_LLM_REQUEST_FAILED, + file_name=input_file, + line_number=parsed_docs[0].default_line() if parsed_docs else 1, + screen_id=parsed_docs[0].screen_id if parsed_docs else "", + rule_id=rule_id or "style-llm", + detail=error_detail, + ) + ) + continue + for item in _safe_parse_llm_json(raw_response): + finding = _build_llm_finding( + parsed_docs=parsed_docs, + input_file=input_file, + rule_id=_stringify(item.get("rule_id")).strip() or rule_id, + message=_stringify(item.get("message")).strip() + or "LLM identified a potential style issue.", + problematic_text=_stringify(item.get("problematic_text")).strip(), + screen_id=_stringify(item.get("screen_id")).strip(), + ) + if finding is not None: + findings.append(finding) + return findings + + +def _build_llm_finding( + *, + parsed_docs: list[ParsedInterviewDocument], + input_file: str | None, + rule_id: str, + message: str, + problematic_text: str, + screen_id: str, +) -> Finding | None: + message_id: str + if rule_id == "tone-and-respect": + message_id = MessageId.STYLE_TONE_AND_RESPECT + elif rule_id == "plain-language-rewrite-opportunities": + message_id = MessageId.STYLE_PLAIN_LANGUAGE_REWRITE_OPPORTUNITY + else: + return None + line_number = next( + ( + parsed_doc.default_line() + for parsed_doc in parsed_docs + if parsed_doc.screen_id == screen_id + ), + parsed_docs[0].default_line() if parsed_docs else 1, + ) + rendered_message = message + if problematic_text: + rendered_message = f'{message} Quote: "{_shorten(problematic_text, limit=220)}"' + return make_finding( + message_id, + file_name=input_file, + line_number=line_number, + message=rendered_message, + screen_id=screen_id, + snippet=_shorten(problematic_text, limit=220) if problematic_text else "", + ) + + +def _iter_doc_texts(docs: list[ParsedInterviewDocument]) -> list[TextEntry]: + values: list[TextEntry] = [] + for parsed_doc in docs: + for key in VISIBLE_TEXT_KEYS: + value = parsed_doc.doc.get(key) + line_number = parsed_doc.line_for_key(key) + if isinstance(value, dict): + content = _stringify(value.get("content")) + label = _stringify(value.get("label")) + if content: + values.append( + TextEntry( + location=f"{key}.content", + text=content, + line_number=line_number, + screen_id=parsed_doc.screen_id, + ) + ) + if label: + values.append( + TextEntry( + location=f"{key}.label", + text=label, + line_number=line_number, + screen_id=parsed_doc.screen_id, + ) + ) + else: + rendered = _stringify(value) + if rendered: + values.append( + TextEntry( + location=key, + text=rendered, + line_number=line_number, + screen_id=parsed_doc.screen_id, + ) + ) + for index, field in enumerate(_iter_fields(parsed_doc.doc)): + line_number = parsed_doc.line_for_field(field) + for field_key in ("label", "help", "hint", "note", "html"): + rendered = _stringify(field.get(field_key)) + if not rendered: + continue + values.append( + TextEntry( + location=f"fields[{index}].{field_key}", + text=rendered, + line_number=line_number, + screen_id=parsed_doc.screen_id, + ) + ) + if not field.get("label") and field: + first_key = _stringify(next(iter(field.keys()))) + if first_key: + values.append( + TextEntry( + location=f"fields[{index}].first_key", + text=first_key, + line_number=line_number, + screen_id=parsed_doc.screen_id, + ) + ) + return values + + +def _user_facing_text_entries(docs: list[ParsedInterviewDocument]) -> list[TextEntry]: + entries = _iter_doc_texts(docs) + for parsed_doc in docs: + for key in ("choices", "dropdown", "buttons"): + for label in _extract_choice_display_text(parsed_doc.doc.get(key)): + entries.append( + TextEntry( + location=key, + text=label, + line_number=parsed_doc.line_for_key(key), + screen_id=parsed_doc.screen_id, + ) + ) + for index, field in enumerate(_iter_fields(parsed_doc.doc)): + for label in _extract_choice_display_text(field.get("choices")): + entries.append( + TextEntry( + location=f"fields[{index}].choices", + text=label, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + ) + ) + return entries + + +def _question_text_entries(docs: list[ParsedInterviewDocument]) -> list[TextEntry]: + entries: list[TextEntry] = [] + for parsed_doc in docs: + for key in ("question", "subquestion"): + value = _stringify(parsed_doc.doc.get(key)) + if not value: + continue + entries.append( + TextEntry( + location=key, + text=value, + line_number=parsed_doc.line_for_key(key), + screen_id=parsed_doc.screen_id, + ) + ) + return entries + + +def _extract_choice_display_text(choices: Any) -> list[str]: + extracted: list[str] = [] + if isinstance(choices, list): + for choice in choices: + if isinstance(choice, str): + extracted.append(choice.split(": ", 1)[0] if ": " in choice else choice) + elif isinstance(choice, dict): + label = _stringify(choice.get("label")) + if label: + extracted.append(label) + elif len(choice) == 1: + extracted.append(_stringify(next(iter(choice.keys())))) + elif isinstance(choices, dict): + extracted.extend(_stringify(key) for key in choices.keys()) + return [item for item in extracted if item] + + +def _variable_references( + parsed_doc: ParsedInterviewDocument, +) -> list[tuple[str, int]]: + references: list[tuple[str, int]] = [] + for key in ("yesno", "noyes", "yesnomaybe", "noyesmaybe"): + value = _stringify(parsed_doc.doc.get(key)).strip() + if value and _looks_like_variable_reference(value): + references.append((value, parsed_doc.line_for_key(key))) + for field in _iter_fields(parsed_doc.doc): + value = _extract_field_variable(field) + if value and _looks_like_variable_reference(value): + references.append((value, parsed_doc.line_for_field(field))) + return references + + +def _looks_like_variable_reference(value: str) -> bool: + stripped = value.strip() + return bool( + re.fullmatch( + r"[A-Za-z_][A-Za-z0-9_]*(?:\[[^\]]+\]|\.[A-Za-z_][A-Za-z0-9_]*)*", + stripped, + ) + ) + + +def _looks_user_facing_code_string(value: str) -> bool: + if ( + " " not in value + or value.startswith("http") + or re.fullmatch(r"[A-Za-z0-9_./:-]+", value) + ): + return False + letters_only = re.sub(r"[^A-Za-z ]+", " ", value).strip() + if not letters_only: + return False + if letters_only == letters_only.lower() and not re.search(r"[.!?:]", value): + return False + return True + + +def _iter_user_facing_code_strings(code: str) -> list[str]: + contents: list[str] = [] + try: + tree = ast.parse(code) + except SyntaxError: + return contents + parents = _parent_map(tree) + docstrings = _docstring_nodes(tree) + for node in ast.walk(tree): + if ( + not isinstance(node, ast.Constant) + or not isinstance(node.value, str) + or node in docstrings + ): + continue + content = node.value.strip() + if not content or not _has_user_facing_code_sink(node, parents): + continue + contents.append(content) + return contents + + +def _parent_map(tree: ast.AST) -> dict[ast.AST, ast.AST]: + parents: dict[ast.AST, ast.AST] = {} + for parent in ast.walk(tree): + for child in ast.iter_child_nodes(parent): + parents[child] = parent + return parents + + +def _docstring_nodes(tree: ast.AST) -> set[ast.Constant]: + docstrings: set[ast.Constant] = set() + for node in ast.walk(tree): + body = getattr(node, "body", None) + if not isinstance(body, list) or not body: + continue + first_stmt = body[0] + if not isinstance(first_stmt, ast.Expr): + continue + value = getattr(first_stmt, "value", None) + if isinstance(value, ast.Constant) and isinstance(value.value, str): + docstrings.add(value) + return docstrings + + +def _has_user_facing_code_sink( + node: ast.Constant, parents: dict[ast.AST, ast.AST] +) -> bool: + current: ast.AST = node + while current in parents: + parent = parents[current] + if isinstance(parent, ast.keyword) and _is_user_facing_code_name( + parent.arg or "" + ): + return True + if isinstance(parent, ast.Call) and _is_user_facing_code_call(parent): + return True + if isinstance(parent, ast.Assign) and any( + _is_user_facing_assignment_target(target) for target in parent.targets + ): + return True + if isinstance(parent, ast.AnnAssign) and _is_user_facing_assignment_target( + parent.target + ): + return True + if isinstance(parent, ast.NamedExpr) and _is_user_facing_assignment_target( + parent.target + ): + return True + if isinstance(parent, ast.Dict) and _dict_value_has_user_facing_key( + parent, current + ): + return True + current = parent + return False + + +def _is_user_facing_code_call(call: ast.Call) -> bool: + func = call.func + if isinstance(func, ast.Name): + return func.id in _USER_FACING_CODE_CALLS + if isinstance(func, ast.Attribute): + return func.attr in _USER_FACING_CODE_CALLS + return False + + +def _is_user_facing_assignment_target(target: ast.expr) -> bool: + if isinstance(target, ast.Name): + return _is_user_facing_code_name(target.id) + if isinstance(target, ast.Attribute): + return _is_user_facing_code_name(target.attr) + return False + + +def _dict_value_has_user_facing_key(mapping: ast.Dict, node: ast.AST) -> bool: + for key, value in zip(mapping.keys, mapping.values): + if value is not node: + continue + return _is_user_facing_code_name(_dict_key_string(key)) + return False + + +def _dict_key_string(node: ast.expr | None) -> str: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + return "" + + +def _is_user_facing_code_name(name: str) -> bool: + lowered = _stringify(name).strip().lower() + if not lowered: + return False + if lowered in _USER_FACING_CODE_EXACT_NAMES: + return True + parts = [part for part in re.split(r"[^a-z0-9]+", lowered) if part] + return any(part in _USER_FACING_CODE_NAME_PARTS for part in parts) + + +def _find_metadata(docs: list[ParsedInterviewDocument]) -> dict[str, Any]: + metadata: dict[str, Any] = {} + for parsed_doc in docs: + block = parsed_doc.doc.get("metadata") + if isinstance(block, dict): + metadata.update(block) + return metadata + + +def _iter_include_values(value: Any) -> list[str]: + if isinstance(value, str): + return [_stringify(value).strip().lower()] + if isinstance(value, list): + return [ + _stringify(item).strip().lower() + for item in value + if _stringify(item).strip() + ] + return [] + + +def _is_review_screen(parsed_doc: ParsedInterviewDocument) -> bool: + if parsed_doc.doc.get("review") is not None: + return True + combined = " ".join( + _stringify(parsed_doc.doc.get(key)) + for key in ("question", "id", "event") + ).lower() + return bool( + re.search( + r"\b(?:review|check your answers|edit your answers)\b", + combined, + ) + ) + + +def _review_edit_variables(review_value: Any) -> set[str]: + variables: set[str] = set() + if isinstance(review_value, list): + for item in review_value: + if isinstance(item, dict): + edit_value = _stringify(item.get("Edit")).strip() + if edit_value: + variables.add(edit_value) + elif isinstance(review_value, dict): + edit_value = _stringify(review_value.get("Edit")).strip() + if edit_value: + variables.add(edit_value) + return variables + + +def _field_looks_like_key_choice(field: dict[str, Any]) -> bool: + datatype = _stringify(field.get("datatype")).strip().lower() + if datatype in {"yesno", "noyes", "yesnomaybe", "noyesmaybe", "radio"}: + return True + choices = field.get("choices") + return isinstance(choices, (list, dict)) + + +def _variable_name_matches(left: str, right: str) -> bool: + left_text = _stringify(left).strip() + right_text = _stringify(right).strip() + if not left_text or not right_text: + return False + if left_text == right_text: + return True + return left_text.split(".")[0] == right_text.split(".")[0] + + +def _plain_text(text: str) -> str: + rendered = _strip_mako(text) + rendered = _FILE_TAG_RE.sub(" ", rendered) + rendered = _MARKDOWN_IMAGE_RE.sub(r" \1 ", rendered) + rendered = _MARKDOWN_LINK_RE.sub(r" \1 ", rendered) + rendered = _MARKDOWN_CODE_RE.sub(r" \1 ", rendered) + rendered = _HTML_TAG_RE.sub(" ", rendered) + rendered = html.unescape(rendered) + rendered = re.sub(r"(?m)^\s*#{1,6}\s+", "", rendered) + rendered = re.sub(r"(?m)^\s*[-*+]\s+", "", rendered) + return re.sub(r"\s+", " ", rendered).strip() + + +def _strip_mako(text: str) -> str: + rendered = _MAKO_BLOCK_RE.sub(" ", text) + rendered = _MAKO_EXPR_RE.sub(" ", rendered) + rendered = _MAKO_CONTROL_RE.sub(" ", rendered) + return rendered + + +def _stringify(item: Any) -> str: + if item is None: + return "" + if isinstance(item, str): + return item + return str(item) + + +def _visible_text(item: Any) -> str: + if isinstance(item, dict): + return " ".join( + part + for part in (_stringify(item.get("label")), _stringify(item.get("content"))) + if part + ) + return _stringify(item) + + +def _shorten(text: Any, limit: int = 180) -> str: + value = re.sub(r"\s+", " ", _stringify(text)).strip() + if len(value) <= limit: + return value + return value[: limit - 3] + "..." + + +@lru_cache(maxsize=None) +def _load_yaml_data_file(filename: str) -> Any: + yaml = YAML(typ="safe") + data_path = importlib.resources.files("dayamlchecker").joinpath("data", filename) + return yaml.load(data_path.read_text(encoding="utf-8")) + + +@lru_cache(maxsize=None) +def _load_llm_prompt_templates() -> dict[str, Any]: + loaded = _load_yaml_data_file("interview_linter_prompts.yml") + return loaded if isinstance(loaded, dict) else {} + + +@lru_cache(maxsize=None) +def _load_plain_language_replacements() -> dict[str, str]: + loaded = _load_yaml_data_file("plain_language_replacements.yml") + if not isinstance(loaded, dict): + return {} + normalized: dict[str, str] = {} + for key, value in loaded.items(): + term = _stringify(key).strip().lower().replace("’", "'") + replacement = _stringify(value).strip() + if term and replacement: + normalized[term] = replacement + return normalized + + +@lru_cache(maxsize=None) +def _compiled_plain_language_patterns() -> list[tuple[str, str, re.Pattern[str]]]: + compiled: list[tuple[str, str, re.Pattern[str]]] = [] + sorted_terms = sorted( + _load_plain_language_replacements().items(), + key=lambda item: len(item[0]), + reverse=True, + ) + for term, replacement in sorted_terms: + if not re.search(r"[a-z0-9]", term): + continue + compiled.append( + ( + term, + replacement, + re.compile( + rf"(? list[tuple[str, str]]: + plain = _plain_text(text) + if not plain: + return [] + occupied: list[tuple[int, int]] = [] + seen_terms: set[str] = set() + matches: list[tuple[int, str, str]] = [] + for _, replacement, pattern in _compiled_plain_language_patterns(): + if len(matches) >= max_matches: + break + for found in pattern.finditer(plain): + span = (found.start(), found.end()) + overlaps = any( + not (span[1] <= used_start or span[0] >= used_end) + for used_start, used_end in occupied + ) + if overlaps: + continue + seen_key = found.group(0).strip().lower() + if seen_key in seen_terms: + continue + seen_terms.add(seen_key) + occupied.append(span) + matches.append((found.start(), found.group(0), replacement)) + break + matches.sort(key=lambda item: item[0]) + return [(match_text, replacement) for _, match_text, replacement in matches] + + +def _format_plain_language_replacement(value: str) -> str: + formatted = _stringify(value).strip() + if formatted.startswith("[") and formatted.endswith("]"): + formatted = formatted[1:-1].strip() + return formatted + + +def _build_screen_payload(parsed_docs: list[ParsedInterviewDocument]) -> str: + payload = [] + for parsed_doc in parsed_docs[:40]: + screen_text = "\n\n".join( + _visible_text(parsed_doc.doc.get(key)) + for key in ("question", "subquestion", "under", "help", "note", "html") + if _visible_text(parsed_doc.doc.get(key)).strip() + ) + screen_text = _shorten(_plain_text(screen_text), limit=800) + if not screen_text: + continue + payload.append({"screen_id": parsed_doc.screen_id, "text": screen_text}) + return json.dumps(payload, ensure_ascii=False) + + +def _call_openai_chat_completion( + *, + system_prompt: str, + user_prompt: str, + base_url: str, + api_key: str, + model: str, +) -> tuple[Any | None, str | None]: + request_url = base_url.rstrip("/") + if not request_url.endswith("/chat/completions"): + request_url = f"{request_url}/chat/completions" + try: + response = requests.post( + request_url, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": 0, + "response_format": {"type": "json_object"}, + }, + timeout=60, + ) + except requests.RequestException: + return None, "network request failed" + if response.status_code >= 400: + return None, f"provider returned HTTP {response.status_code}" + try: + payload = response.json() + except ValueError: + return None, "provider returned a non-JSON response" + try: + return payload["choices"][0]["message"]["content"], None + except (KeyError, IndexError, TypeError): + return None, "provider response was missing completion text" + + +def _safe_parse_llm_json(raw: Any) -> list[dict[str, Any]]: + if isinstance(raw, list): + return [item for item in raw if isinstance(item, dict)] + if isinstance(raw, dict): + if isinstance(raw.get("findings"), list): + return [item for item in raw["findings"] if isinstance(item, dict)] + return [raw] + if isinstance(raw, str): + try: + return _safe_parse_llm_json(json.loads(raw)) + except ValueError: + return [] + return [] + + +def _dedupe_findings(findings: list[Finding]) -> list[Finding]: + unique: list[Finding] = [] + seen: set[tuple[str, str, int]] = set() + for finding in findings: + key = (finding.code, finding.message, finding.line_number or 0) + if key in seen: + continue + seen.add(key) + unique.append(finding) + return unique diff --git a/src/dayamlchecker/yaml_structure.py b/src/dayamlchecker/yaml_structure.py index 42ca57c..f5e7402 100644 --- a/src/dayamlchecker/yaml_structure.py +++ b/src/dayamlchecker/yaml_structure.py @@ -1,7 +1,7 @@ # Each doc, apply this to each block import ast import argparse -from dataclasses import dataclass, field +from dataclasses import dataclass, field, replace from pathlib import Path import re import sys @@ -11,7 +11,12 @@ AccessibilityLintOptions, find_accessibility_findings, ) -from dayamlchecker.messages import Finding, MessageId, draft, make_finding +from dayamlchecker.messages import Finding, FindingClass, MessageId, draft, make_finding +from dayamlchecker.style import ( + ParsedInterviewDocument, + StyleLintOptions, + find_style_findings, +) from mako.template import Template as MakoTemplate # type: ignore[import-untyped] from mako.exceptions import ( # type: ignore[import-untyped] SyntaxException, @@ -50,12 +55,26 @@ @dataclass(frozen=True) class RuntimeOptions: accessibility_error_on_widgets: frozenset[str] = field(default_factory=frozenset) + style_enabled: bool = False + style_include_llm: bool = False + style_openai_base_url: str | None = None + style_openai_api_key: str | None = None + style_openai_model: str | None = None def accessibility_options(self) -> AccessibilityLintOptions: return AccessibilityLintOptions( error_on_widgets=self.accessibility_error_on_widgets ) + def style_options(self) -> StyleLintOptions: + return StyleLintOptions( + enabled=self.style_enabled or self.style_include_llm, + include_llm=self.style_include_llm, + openai_base_url=self.style_openai_base_url, + openai_api_key=self.style_openai_api_key, + openai_model=self.style_openai_model, + ) + # Global identifiers for _extract_conditional_fields_from_doc below. Should cover all show/hide style modifiers _IDENTIFIER_RE = re.compile(r"[A-Za-z_]\w*") @@ -1561,6 +1580,8 @@ def find_errors_from_string( ] yaml_parser = _make_yaml_parser() prior_conditional_fields: list[dict[str, Any]] = [] + parsed_docs: list[ParsedInterviewDocument] = [] + has_yaml_parse_errors = False line_number = 1 for source_code in document_match.split(full_content): lines_in_code = sum(l == "\n" for l in source_code) @@ -1587,6 +1608,7 @@ def find_errors_from_string( error=rendered_error, ) ) + has_yaml_parse_errors = True line_number += lines_in_code continue @@ -1709,11 +1731,159 @@ def find_errors_from_string( prior_conditional_fields.extend( _extract_conditional_fields_from_doc(doc, line_number) ) + parsed_docs.append( + ParsedInterviewDocument( + doc=doc, + source_code=source_code, + document_start_line=line_number, + index=len(parsed_docs), + ) + ) line_number += lines_in_code + if not has_yaml_parse_errors: + all_errors.extend( + _find_interview_level_findings(parsed_docs, input_file=input_file) + ) + style_options = runtime_options.style_options() + if style_options.enabled and not has_yaml_parse_errors: + all_errors.extend( + find_style_findings( + docs=parsed_docs, + input_file=input_file, + options=style_options, + ) + ) return all_errors +_COMMON_COURTFORMSONLINE_METADATA_FIELDS = ( + "title", + "short title", + "description", + "can_I_use_this_form", + "before_you_start", + "LIST_topics", + "jurisdiction", +) + + +def _find_interview_level_findings( + parsed_docs: list[ParsedInterviewDocument], *, input_file: str +) -> list[Finding]: + findings: list[Finding] = [] + findings.extend(_check_missing_question_ids(parsed_docs, input_file=input_file)) + findings.extend(_check_multiple_mandatory_blocks(parsed_docs, input_file=input_file)) + findings.extend(_check_metadata_fields(parsed_docs, input_file=input_file)) + return findings + + +def _check_missing_question_ids( + parsed_docs: list[ParsedInterviewDocument], *, input_file: str +) -> list[Finding]: + if not _looks_like_interview_file(parsed_docs): + return [] + findings: list[Finding] = [] + for parsed_doc in parsed_docs: + question = str(parsed_doc.doc.get("question") or "").strip() + if not question or str(parsed_doc.doc.get("id") or "").strip(): + continue + findings.append( + make_finding( + MessageId.MISSING_QUESTION_ID, + file_name=input_file, + line_number=parsed_doc.line_for_key("question"), + snippet=_shorten(question), + ) + ) + return findings + + +def _check_multiple_mandatory_blocks( + parsed_docs: list[ParsedInterviewDocument], *, input_file: str +) -> list[Finding]: + mandatory_docs = [ + parsed_doc + for parsed_doc in parsed_docs + if _is_truthy(parsed_doc.doc.get("mandatory")) + ] + if len(mandatory_docs) < 2: + return [] + labels = [] + for parsed_doc in mandatory_docs[:4]: + label = str(parsed_doc.doc.get("id") or parsed_doc.doc.get("question") or "").strip() + labels.append(_shorten(label or parsed_doc.screen_id)) + return [ + make_finding( + MessageId.MULTIPLE_MANDATORY_BLOCKS, + file_name=input_file, + line_number=mandatory_docs[1].line_for_key("mandatory"), + labels=", ".join(labels), + ) + ] + + +def _check_metadata_fields( + parsed_docs: list[ParsedInterviewDocument], *, input_file: str +) -> list[Finding]: + metadata_doc: Optional[ParsedInterviewDocument] = None + metadata: dict[str, Any] = {} + for parsed_doc in parsed_docs: + block = parsed_doc.doc.get("metadata") + if not isinstance(block, dict): + continue + if metadata_doc is None: + metadata_doc = parsed_doc + metadata.update(block) + if metadata_doc is None: + return [] + missing = [ + field + for field in _COMMON_COURTFORMSONLINE_METADATA_FIELDS + if not metadata.get(field) + ] + if not missing: + return [] + return [ + make_finding( + MessageId.MISSING_METADATA_FIELDS, + file_name=input_file, + line_number=metadata_doc.line_for_key("metadata"), + fields=", ".join(missing), + ) + ] + + +def _looks_like_interview_file(parsed_docs: list[ParsedInterviewDocument]) -> bool: + if len(parsed_docs) > 1: + return True + for parsed_doc in parsed_docs: + if parsed_doc.doc.get("metadata") is not None: + return True + if parsed_doc.doc.get("include") is not None: + return True + if parsed_doc.doc.get("mandatory") is not None: + return True + return False + + +def _is_truthy(value: Any) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value != 0 + if isinstance(value, str): + return value.strip().lower() in {"true", "yes", "1", "on"} + return False + + +def _shorten(value: Any, limit: int = 160) -> str: + rendered = re.sub(r"\s+", " ", str(value or "")).strip() + if len(rendered) <= limit: + return rendered + return rendered[: limit - 3] + "..." + + def find_errors( input_file: str, lint_mode: str = DEFAULT_LINT_MODE, @@ -1746,6 +1916,28 @@ def find_errors( ) +def find_style_findings_from_string( + full_content: str, + *, + input_file: str | None = None, + lint_mode: str = DEFAULT_LINT_MODE, + runtime_options: Optional[RuntimeOptions] = None, +) -> list[Finding]: + resolved_options = runtime_options or RuntimeOptions() + if not resolved_options.style_enabled and not resolved_options.style_include_llm: + resolved_options = replace(resolved_options, style_enabled=True) + return [ + finding + for finding in find_errors_from_string( + full_content, + input_file=input_file, + lint_mode=lint_mode, + runtime_options=resolved_options, + ) + if finding.finding_class == FindingClass.STYLE + ] + + def _collect_yaml_files( paths: list[Path], include_default_ignores: bool = True ) -> list[Path]: @@ -1846,6 +2038,37 @@ def main(argv: Optional[list[str]] = None) -> int: "Repeat to enable multiple widgets. Default: none" ), ) + parser.add_argument( + "--style", + action="store_true", + help="Enable Assembly Line style lint checks.", + ) + parser.add_argument( + "--style-llm", + action="store_true", + help=( + "Enable LLM-backed style findings. This also enables --style and " + "uses an OpenAI-compatible API key from flags or environment." + ), + ) + parser.add_argument( + "--openai-base-url", + default=None, + help=( + "Base URL for an OpenAI-compatible API used by --style-llm " + "(default: OPENAI_BASE_URL environment variable or the standard OpenAI API)" + ), + ) + parser.add_argument( + "--openai-api-key", + default=None, + help="API key for --style-llm (default: OPENAI_API_KEY environment variable)", + ) + parser.add_argument( + "--openai-model", + default=None, + help="Model name for --style-llm (default: OPENAI_MODEL env var or gpt-4o-mini)", + ) parser.add_argument( "--url-check", action=argparse.BooleanOptionalAction, @@ -1922,7 +2145,12 @@ def main(argv: Optional[list[str]] = None) -> int: widget.strip().lower() for widget in args.accessibility_error_on_widgets if widget.strip() - ) + ), + style_enabled=args.style or args.style_llm, + style_include_llm=args.style_llm, + style_openai_base_url=args.openai_base_url, + style_openai_api_key=args.openai_api_key, + style_openai_model=args.openai_model, ) yaml_files = _collect_yaml_files( diff --git a/tests/test_style.py b/tests/test_style.py new file mode 100644 index 0000000..7387e8f --- /dev/null +++ b/tests/test_style.py @@ -0,0 +1,549 @@ +import io +from contextlib import redirect_stdout +from pathlib import Path +from tempfile import TemporaryDirectory + +import dayamlchecker +import dayamlchecker.style as style_module +from dayamlchecker.messages import FindingClass, MessageId, Severity +from dayamlchecker.yaml_structure import RuntimeOptions, find_errors_from_string, main + + +def test_style_checks_are_opt_in_by_default(): + yaml_text = ( + "question: |\n" + " Commence the interview.\n" + "field: user_name\n" + "language: en\n" + ) + + findings = find_errors_from_string(yaml_text, input_file="") + + assert all(finding.finding_class != FindingClass.STYLE for finding in findings) + + +def test_style_checks_report_deterministic_findings(): + yaml_text = ( + "question: |\n" + " Do you rent your home and do you have a written lease?\n" + "fields:\n" + ' - label: "Commence the interview."\n' + " field: FirstName\n" + "language: en\n" + ) + + findings = find_errors_from_string( + yaml_text, + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + message_ids = {finding.message_id for finding in findings} + assert MessageId.STYLE_PLAIN_LANGUAGE_REPLACEMENT in message_ids + assert MessageId.STYLE_VARIABLE_ROOT_NOT_SNAKE_CASE in message_ids + assert MessageId.STYLE_REMOVE_LANGUAGE_EN in message_ids + assert MessageId.STYLE_COMPOUND_QUESTION in message_ids + + +def test_style_choices_without_stable_values_are_errors(): + findings = find_errors_from_string( + "question: |\n" + " Choose a county.\n" + "fields:\n" + ' - label: "County"\n' + " field: county\n" + " choices:\n" + " - Suffolk\n" + " - Middlesex\n" + "---\n" + "question: |\n" + " Choose a delivery method.\n" + "field: delivery_method\n" + "choices:\n" + " - label: Mail\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + choice_findings = [ + finding + for finding in findings + if finding.message_id == MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES + ] + assert len(choice_findings) == 2 + assert all(finding.severity == Severity.ERROR for finding in choice_findings) + + +def test_style_choices_with_stable_values_are_allowed(): + findings = find_errors_from_string( + "question: |\n" + " Choose a county.\n" + "fields:\n" + ' - label: "County"\n' + " field: county\n" + " choices:\n" + " - Suffolk: suffolk\n" + " - Middlesex: middlesex\n" + "---\n" + "question: |\n" + " Choose a delivery method.\n" + "field: delivery_method\n" + "choices:\n" + " - label: Mail\n" + " value: mail\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES + for finding in findings + ) + + +def test_style_rule_severity_decisions_are_stable(): + findings = find_errors_from_string( + "question: |\n" + " Commence the interview.\n" + "field: delivery_method\n" + "choices:\n" + " - Mail\n" + "---\n" + 'code: |\n user_message = "Please complete the form before you continue."\n', + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + severities = { + finding.message_id: finding.severity + for finding in findings + if finding.message_id + in { + MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES, + MessageId.STYLE_PLAIN_LANGUAGE_REPLACEMENT, + MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE, + } + } + assert severities[MessageId.STYLE_CHOICES_WITHOUT_STABLE_VALUES] == Severity.ERROR + assert severities[MessageId.STYLE_PLAIN_LANGUAGE_REPLACEMENT] == Severity.INFO + assert severities[MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE] == Severity.WARNING + + +def test_style_llm_missing_api_key_reports_configuration_error(monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + findings = find_errors_from_string( + "question: |\n Please do this now.\nfield: user_name\n", + input_file="", + runtime_options=RuntimeOptions(style_include_llm=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_LLM_CONFIGURATION_ERROR + for finding in findings + ) + + +def test_style_checks_report_hardcoded_user_text_in_code(): + findings = find_errors_from_string( + 'code: |\n user_message = "Please complete the form before you continue."\n', + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_checks_ignore_multiline_code_without_hardcoded_text(): + findings = find_errors_from_string( + "code: |\n" + " if url_args.get('recipient_email'):\n" + " recipient_email = url_args.get('recipient_email')\n" + " else:\n" + ' recipient_email = ""\n', + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_checks_ignore_lowercase_config_strings_in_code(): + findings = find_errors_from_string( + 'code: |\n config_name = "enable weaver for unauthenticated users"\n', + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_checks_ignore_docstrings_and_data_lists_in_code(): + findings = find_errors_from_string( + "code: |\n" + " def parse_date(value):\n" + ' """Return a valid date input string as a date, otherwise None."""\n' + " allowed_courts = ['Boston Municipal Court']\n" + " return value\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_checks_ignore_short_f_string_fragments_in_code(): + findings = find_errors_from_string( + "code: |\n" + " def job_description():\n" + " return f\"{job.source} {'(self-employed)' if job.is_self_employed else ('for ' + job.employer_name_address_phone())}\"\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_checks_flag_validation_error_messages_in_code(): + findings = find_errors_from_string( + 'code: |\n validation_error("Please choose who will mail notice of the hearing.")\n', + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_HARDCODED_USER_TEXT_IN_CODE + for finding in findings + ) + + +def test_style_compound_question_ignores_single_choice_question(): + findings = find_errors_from_string( + "question: |\n Do you rent or own your home?\nfield: housing_choice\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_COMPOUND_QUESTION + for finding in findings + ) + + +def test_style_compound_question_flags_multiple_prompts(): + findings = find_errors_from_string( + "question: |\n" + " Do you rent your home and do you have a written lease?\n" + "field: lease_status\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_COMPOUND_QUESTION + for finding in findings + ) + + +def test_style_checks_warn_on_question_level_help(): + findings = find_errors_from_string( + "id: intro\n" + "question: |\n" + " Tell us about your case.\n" + "help: |\n" + " You can find more details in the court paperwork.\n" + "field: case_summary\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_QUESTION_LEVEL_HELP + for finding in findings + ) + + +def test_style_checks_allow_field_level_help(): + findings = find_errors_from_string( + "id: intro\n" + "question: |\n" + " Tell us about your case.\n" + "fields:\n" + ' - label: "Case number"\n' + " field: case_number\n" + " help: |\n" + " This is on the top-right corner of your notice.\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_QUESTION_LEVEL_HELP + for finding in findings + ) + + +def test_style_variable_root_check_ignores_note_only_fields(): + findings = find_errors_from_string( + "question: |\n" + " Tell us about your fees.\n" + "fields:\n" + " - note: |\n" + " You do not need to tell the court the amount to waive.\n" + " - label: First name\n" + " field: user_name\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_VARIABLE_ROOT_NOT_SNAKE_CASE + for finding in findings + ) + + +def test_style_llm_uses_explicit_credentials(monkeypatch): + calls: list[dict[str, str]] = [] + + def fake_call_openai_chat_completion(**kwargs): + calls.append(kwargs) + return ( + { + "findings": [ + { + "rule_id": "tone-and-respect", + "message": "This sounds too directive.", + "screen_id": "block-0", + "problematic_text": "You must do this now.", + } + ] + }, + None, + ) + + monkeypatch.setattr( + style_module, + "_call_openai_chat_completion", + fake_call_openai_chat_completion, + ) + + findings = find_errors_from_string( + "question: |\n You must do this now.\nfield: user_name\n", + input_file="", + runtime_options=RuntimeOptions( + style_include_llm=True, + style_openai_base_url="https://example.test/v1", + style_openai_api_key="test-key", + style_openai_model="demo-model", + ), + ) + + assert len(calls) == 2 + assert all(call["base_url"] == "https://example.test/v1" for call in calls) + assert all(call["api_key"] == "test-key" for call in calls) + assert all(call["model"] == "demo-model" for call in calls) + assert any( + finding.message_id == MessageId.STYLE_TONE_AND_RESPECT for finding in findings + ) + + +def test_style_llm_uses_environment_credentials(monkeypatch): + calls: list[dict[str, str]] = [] + + def fake_call_openai_chat_completion(**kwargs): + calls.append(kwargs) + return ({"findings": []}, None) + + monkeypatch.setenv("OPENAI_BASE_URL", "https://env.example/v1") + monkeypatch.setenv("OPENAI_API_KEY", "env-key") + monkeypatch.setenv("OPENAI_MODEL", "env-model") + monkeypatch.setattr( + style_module, + "_call_openai_chat_completion", + fake_call_openai_chat_completion, + ) + + findings = find_errors_from_string( + "question: |\n Ready to begin?\nfield: user_name\n", + input_file="", + runtime_options=RuntimeOptions(style_include_llm=True), + ) + + assert len(calls) == 2 + assert all(call["base_url"] == "https://env.example/v1" for call in calls) + assert all(call["api_key"] == "env-key" for call in calls) + assert all(call["model"] == "env-model" for call in calls) + assert all( + finding.message_id != MessageId.STYLE_LLM_CONFIGURATION_ERROR + for finding in findings + ) + + +def test_module_style_helper_returns_structured_style_findings(): + findings = dayamlchecker.find_style_findings_from_string( + "id: intro\nquestion: |\n Commence the interview.\nfield: user_name\n", + input_file="", + ) + + assert findings + assert all(finding.finding_class == FindingClass.STYLE for finding in findings) + assert all(finding.context.get("screen_id") == "intro" for finding in findings) + + +def test_main_can_run_style_checks_from_cli(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "style.yml" + interview.write_text( + "question: |\n" + " Commence the interview.\n" + "field: user_name\n" + "language: en\n", + encoding="utf-8", + ) + + stdout = io.StringIO() + with redirect_stdout(stdout): + exit_code = main(["--style", "--no-url-check", str(interview)]) + + output = stdout.getvalue().lower() + assert exit_code == 1 + assert "[is712]" in output + assert "[es703]" in output + + +def test_style_checks_report_missing_theme_and_review_edits(): + yaml_text = ( + "metadata:\n" + " title: Example interview\n" + "---\n" + "id: review_screen\n" + "question: |\n" + " Review your answers\n" + "review:\n" + ' - label: "Name"\n' + "---\n" + "id: housing\n" + "question: |\n" + " Do you rent your home?\n" + "fields:\n" + ' - label: "Rent or own"\n' + " field: housing_choice\n" + " datatype: radio\n" + " choices:\n" + ' - "Rent: rent"\n' + ' - "Own: own"\n' + ) + + findings = find_errors_from_string( + yaml_text, + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + message_ids = {finding.message_id for finding in findings} + assert MessageId.STYLE_MISSING_CUSTOM_THEME in message_ids + assert MessageId.STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS in message_ids + + +def test_style_theme_rule_allows_explicit_theme_configuration(): + findings = find_errors_from_string( + "metadata:\n" + " title: Example interview\n" + "---\n" + "features:\n" + " bootstrap theme: example-theme.css\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_MISSING_CUSTOM_THEME + for finding in findings + ) + + +def test_style_preview_screen_without_review_block_is_not_review_screen(): + findings = find_errors_from_string( + "id: preview_document\n" + "question: |\n" + " Preview your document\n" + "subquestion: |\n" + " Review the document before you download it.\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id != MessageId.STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS + for finding in findings + ) + + +def test_style_checks_report_review_choice_gaps_and_person_object_hint(): + yaml_text = ( + "id: housing\n" + "question: |\n" + " Do you rent your home?\n" + "fields:\n" + ' - label: "Rent or own"\n' + " field: housing_choice\n" + " datatype: radio\n" + " choices:\n" + ' - "Rent: rent"\n' + ' - "Own: own"\n' + "---\n" + "id: review_screen\n" + "question: |\n" + " Review your answers\n" + "review:\n" + ' - label: "Name"\n' + ' Edit: user_name\n' + "---\n" + "id: names\n" + "question: |\n" + " Tell us about yourself\n" + "fields:\n" + ' - label: "First name"\n' + " field: user_first_name\n" + ' - label: "Last name"\n' + " field: user_last_name\n" + ' - label: "Street address"\n' + " field: user_address\n" + ' - label: "City"\n' + " field: user_city\n" + ' - label: "Zip code"\n' + " field: user_zip\n" + ) + + findings = find_errors_from_string( + yaml_text, + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + message_ids = {finding.message_id for finding in findings} + assert MessageId.STYLE_REVIEW_SCREEN_MISSING_KEY_CHOICE_EDITS in message_ids + assert MessageId.STYLE_PREFER_PERSON_OBJECTS in message_ids + person_object_findings = [ + finding + for finding in findings + if finding.message_id == MessageId.STYLE_PREFER_PERSON_OBJECTS + ] + assert person_object_findings[0].context["snippet"] == "user_first_name" diff --git a/tests/test_yaml_structure.py b/tests/test_yaml_structure.py index 5d0fcd7..f3530d5 100644 --- a/tests/test_yaml_structure.py +++ b/tests/test_yaml_structure.py @@ -11,6 +11,7 @@ def _has_code(errs, code: str) -> bool: class TestYAMLStructure(unittest.TestCase): def test_valid_question_no_errors(self): valid = """ +id: name_screen question: | What is your name? field: name @@ -1915,5 +1916,116 @@ def test_mandatory_null_errors(self): ) +class TestALLinterParityRules(unittest.TestCase): + def test_missing_question_id_is_reported(self): + yaml_text = """ +metadata: + title: Example interview +--- +question: | + What is your mailing address? +fields: + - Address: user_address +""" + errs = find_errors_from_string(yaml_text, input_file="") + self.assertTrue( + _has_code(errs, "EG414"), + f"Expected missing-question-id finding, got: {errs}", + ) + + def test_multiple_mandatory_blocks_are_reported(self): + yaml_text = """ +id: start +mandatory: True +question: | + Start here +field: start_now +--- +id: review +mandatory: True +question: | + Review your answers +field: review_now +""" + errs = find_errors_from_string(yaml_text, input_file="") + self.assertTrue( + _has_code(errs, "EG415"), + f"Expected multiple-mandatory-blocks finding, got: {errs}", + ) + + def test_metadata_fields_are_checked_when_metadata_is_present(self): + yaml_text = """ +metadata: + title: Example interview +""" + errs = find_errors_from_string(yaml_text, input_file="") + self.assertTrue( + _has_code(errs, "IG416"), + f"Expected metadata-fields finding, got: {errs}", + ) + metadata_findings = [err for err in errs if err.code == "IG416"] + self.assertTrue(metadata_findings) + self.assertIn("description", metadata_findings[0].err_str) + self.assertIn("can_I_use_this_form", metadata_findings[0].err_str) + self.assertNotIn("landing_page_url", metadata_findings[0].err_str) + + def test_accessibility_field_shortcut_and_validation_rules_are_reported(self): + yaml_text = """ +id: start +question: | + Ready to continue? +yesno: ready_to_continue +continue button label: Continue +--- +id: details +question: | + Tell us more +fields: + - field: case_notes + required: True + datatype: radio + choices: + - label: "" + value: skip + validation message: invalid +""" + errs = find_errors_from_string( + yaml_text, + input_file="", + lint_mode="accessibility", + ) + codes = {err.code for err in errs} + self.assertTrue( + {"EA510", "EA511", "EA513", "WA525", "WA527", "WA528"}.issubset(codes), + f"Expected new accessibility parity findings, got: {errs}", + ) + + def test_accessibility_html_rules_are_reported(self): + yaml_text = """ +id: content +question: | + [Policy](https://example.com/a) + Policy +subquestion: | + Read the guide + +
Header
Value
+
Open
+ Focusable +

Please click the green button to continue.

+ Warning +""" + errs = find_errors_from_string( + yaml_text, + input_file="", + lint_mode="accessibility", + ) + codes = {err.code for err in errs} + self.assertTrue( + {"WA516", "WA517", "WA518", "WA519", "WA520", "EA521", "EA523", "WA524"}.issubset(codes), + f"Expected HTML accessibility parity findings, got: {errs}", + ) + + if __name__ == "__main__": unittest.main() From 49129f92f48ef1f655f87fdefeb3067138a71f15 Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Sun, 31 May 2026 20:11:20 -0400 Subject: [PATCH 2/3] Format with black; address color rule being too restrictive --- src/dayamlchecker/accessibility.py | 58 ++++++++++++++++------------- src/dayamlchecker/messages.py | 24 +++--------- src/dayamlchecker/style.py | 13 +++++-- src/dayamlchecker/yaml_structure.py | 8 +++- tests/test_style.py | 8 ++-- tests/test_yaml_structure.py | 11 +++++- 6 files changed, 66 insertions(+), 56 deletions(-) diff --git a/src/dayamlchecker/accessibility.py b/src/dayamlchecker/accessibility.py index 5474046..a5cb4e2 100644 --- a/src/dayamlchecker/accessibility.py +++ b/src/dayamlchecker/accessibility.py @@ -206,9 +206,7 @@ def find_accessibility_findings( findings.extend( _check_inline_color_styling(section, source_code, document_start_line) ) - findings.extend( - _check_new_tab_links(section, source_code, document_start_line) - ) + findings.extend(_check_new_tab_links(section, source_code, document_start_line)) findings.extend(_check_svg_names(section, source_code, document_start_line)) findings.extend(_check_tables(section, source_code, document_start_line)) findings.extend( @@ -372,8 +370,9 @@ def _check_field_labels( ) continue normalized = _normalize_human_text(label) - if normalized in NON_DESCRIPTIVE_FIELD_LABELS or _looks_like_emoji_or_punctuation_only( - label + if ( + normalized in NON_DESCRIPTIVE_FIELD_LABELS + or _looks_like_emoji_or_punctuation_only(label) ): findings.append( draft( @@ -404,8 +403,9 @@ def _check_choice_labels( ) continue normalized = _normalize_human_text(label) - if normalized in NON_DESCRIPTIVE_FIELD_LABELS or _looks_like_emoji_or_punctuation_only( - label + if ( + normalized in NON_DESCRIPTIVE_FIELD_LABELS + or _looks_like_emoji_or_punctuation_only(label) ): findings.append( draft( @@ -865,22 +865,20 @@ def _check_color_only_instructions( ) if not color_reference_re.search(section.value): return [] - if re.search(r"\b(color|colou?r|highlight|highlighted|shade|shaded)\b", section.value, re.IGNORECASE): - line_number = _absolute_line_number( - source_code, - document_start_line, - section.key_line, - color_reference_re.search(section.value).group(0), # type: ignore[union-attr] + line_number = _absolute_line_number( + source_code, + document_start_line, + section.key_line, + color_reference_re.search(section.value).group(0), # type: ignore[union-attr] + ) + return [ + draft( + MessageId.ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS, + line_number=line_number, + section_location=section.location, + snippet=_short_snippet(section.value), ) - return [ - draft( - MessageId.ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS, - line_number=line_number, - section_location=section.location, - snippet=_short_snippet(section.value), - ) - ] - return [] + ] def _check_inline_color_styling( @@ -977,7 +975,9 @@ def _check_svg_names( section: TextSection, source_code: str, document_start_line: int ) -> list[FindingDraft]: findings: list[FindingDraft] = [] - for match in re.finditer(r"]*>.*?", section.value, re.IGNORECASE | re.DOTALL): + for match in re.finditer( + r"]*>.*?", section.value, re.IGNORECASE | re.DOTALL + ): snippet = match.group(0) if re.search(r"\baria-label\s*=\s*([\"']).+?\1", snippet, re.IGNORECASE): continue @@ -1198,12 +1198,16 @@ def _field_collects_user_input(field: dict[str, Any]) -> bool: return any(str(key).strip() not in FIELD_NON_LABEL_KEYS for key in field.keys()) -def _iter_choice_labels_with_lines(choice_value: Any) -> list[tuple[str, Optional[int]]]: +def _iter_choice_labels_with_lines( + choice_value: Any, +) -> list[tuple[str, Optional[int]]]: labels: list[tuple[str, Optional[int]]] = [] if isinstance(choice_value, dict): for key, value in choice_value.items(): if isinstance(value, dict): - labels.append((str(value.get("label") or key or ""), value.get("__line__"))) + labels.append( + (str(value.get("label") or key or ""), value.get("__line__")) + ) else: labels.append((str(key or ""), None)) return labels @@ -1221,7 +1225,9 @@ def _iter_choice_labels_with_lines(choice_value: Any) -> list[tuple[str, Optiona return labels -def _collect_validation_messages(field: dict[str, Any]) -> list[tuple[str, Optional[int]]]: +def _collect_validation_messages( + field: dict[str, Any], +) -> list[tuple[str, Optional[int]]]: messages: list[tuple[str, Optional[int]]] = [] validation_message = field.get("validation message") if isinstance(validation_message, str) and validation_message.strip(): diff --git a/src/dayamlchecker/messages.py b/src/dayamlchecker/messages.py index b886a0f..f6e24f5 100644 --- a/src/dayamlchecker/messages.py +++ b/src/dayamlchecker/messages.py @@ -91,25 +91,17 @@ class MessageId(StrEnum): "accessibility_non_descriptive_choice_label" ) ACCESSIBILITY_DUPLICATE_FIELD_LABEL = "accessibility_duplicate_field_label" - ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS = ( - "accessibility_color_only_instructions" - ) + ACCESSIBILITY_COLOR_ONLY_INSTRUCTIONS = "accessibility_color_only_instructions" ACCESSIBILITY_INLINE_COLOR_STYLING = "accessibility_inline_color_styling" ACCESSIBILITY_AMBIGUOUS_LINK_DESTINATIONS = ( "accessibility_ambiguous_link_destinations" ) - ACCESSIBILITY_NEW_TAB_WITHOUT_WARNING = ( - "accessibility_new_tab_without_warning" - ) + ACCESSIBILITY_NEW_TAB_WITHOUT_WARNING = "accessibility_new_tab_without_warning" ACCESSIBILITY_SVG_MISSING_ACCESSIBLE_NAME = ( "accessibility_svg_missing_accessible_name" ) - ACCESSIBILITY_TABLE_MISSING_HEADERS = ( - "accessibility_table_missing_headers" - ) - ACCESSIBILITY_LAYOUT_TABLE_NEEDS_REVIEW = ( - "accessibility_layout_table_needs_review" - ) + ACCESSIBILITY_TABLE_MISSING_HEADERS = "accessibility_table_missing_headers" + ACCESSIBILITY_LAYOUT_TABLE_NEEDS_REVIEW = "accessibility_layout_table_needs_review" ACCESSIBILITY_POSITIVE_TABINDEX = "accessibility_positive_tabindex" ACCESSIBILITY_CLICKABLE_NON_CONTROL_HTML = ( "accessibility_clickable_non_control_html" @@ -123,9 +115,7 @@ class MessageId(StrEnum): ACCESSIBILITY_GENERIC_VALIDATION_MESSAGE = ( "accessibility_generic_validation_message" ) - ACCESSIBILITY_AMBIGUOUS_BUTTON_TEXT = ( - "accessibility_ambiguous_button_text" - ) + ACCESSIBILITY_AMBIGUOUS_BUTTON_TEXT = "accessibility_ambiguous_button_text" STYLE_SUBQUESTION_H1 = "style_subquestion_h1" STYLE_CHOICES_WITHOUT_STABLE_VALUES = "style_choices_without_stable_values" @@ -144,9 +134,7 @@ class MessageId(StrEnum): STYLE_COMPLEX_SCREEN_MISSING_HELP = "style_complex_screen_missing_help" STYLE_MISSING_EXIT_CRITERIA_SCREEN = "style_missing_exit_criteria_screen" STYLE_MISSING_CUSTOM_THEME = "style_missing_custom_theme" - STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS = ( - "style_review_screen_missing_edit_links" - ) + STYLE_REVIEW_SCREEN_MISSING_EDIT_LINKS = "style_review_screen_missing_edit_links" STYLE_REVIEW_SCREEN_MISSING_KEY_CHOICE_EDITS = ( "style_review_screen_missing_key_choice_edits" ) diff --git a/src/dayamlchecker/style.py b/src/dayamlchecker/style.py index d2b0f27..ddc2701 100644 --- a/src/dayamlchecker/style.py +++ b/src/dayamlchecker/style.py @@ -672,7 +672,11 @@ def _check_exit_criteria_and_screen( def _check_theme_usage(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: - metadata_docs = [parsed_doc for parsed_doc in docs if isinstance(parsed_doc.doc.get("metadata"), dict)] + metadata_docs = [ + parsed_doc + for parsed_doc in docs + if isinstance(parsed_doc.doc.get("metadata"), dict) + ] if not metadata_docs: return [] theme_references: set[str] = set() @@ -684,7 +688,9 @@ def _check_theme_usage(docs: list[ParsedInterviewDocument]) -> list[FindingDraft theme_references.add(css_value) features = parsed_doc.doc.get("features") if isinstance(features, dict): - bootstrap_theme = _stringify(features.get("bootstrap theme")).strip().lower() + bootstrap_theme = ( + _stringify(features.get("bootstrap theme")).strip().lower() + ) if bootstrap_theme: theme_references.add(bootstrap_theme) if any( @@ -1198,8 +1204,7 @@ def _is_review_screen(parsed_doc: ParsedInterviewDocument) -> bool: if parsed_doc.doc.get("review") is not None: return True combined = " ".join( - _stringify(parsed_doc.doc.get(key)) - for key in ("question", "id", "event") + _stringify(parsed_doc.doc.get(key)) for key in ("question", "id", "event") ).lower() return bool( re.search( diff --git a/src/dayamlchecker/yaml_structure.py b/src/dayamlchecker/yaml_structure.py index f5e7402..9251729 100644 --- a/src/dayamlchecker/yaml_structure.py +++ b/src/dayamlchecker/yaml_structure.py @@ -1773,7 +1773,9 @@ def _find_interview_level_findings( ) -> list[Finding]: findings: list[Finding] = [] findings.extend(_check_missing_question_ids(parsed_docs, input_file=input_file)) - findings.extend(_check_multiple_mandatory_blocks(parsed_docs, input_file=input_file)) + findings.extend( + _check_multiple_mandatory_blocks(parsed_docs, input_file=input_file) + ) findings.extend(_check_metadata_fields(parsed_docs, input_file=input_file)) return findings @@ -1811,7 +1813,9 @@ def _check_multiple_mandatory_blocks( return [] labels = [] for parsed_doc in mandatory_docs[:4]: - label = str(parsed_doc.doc.get("id") or parsed_doc.doc.get("question") or "").strip() + label = str( + parsed_doc.doc.get("id") or parsed_doc.doc.get("question") or "" + ).strip() labels.append(_shorten(label or parsed_doc.screen_id)) return [ make_finding( diff --git a/tests/test_style.py b/tests/test_style.py index 7387e8f..a5c88a2 100644 --- a/tests/test_style.py +++ b/tests/test_style.py @@ -242,8 +242,7 @@ def test_style_compound_question_ignores_single_choice_question(): ) assert all( - finding.message_id != MessageId.STYLE_COMPOUND_QUESTION - for finding in findings + finding.message_id != MessageId.STYLE_COMPOUND_QUESTION for finding in findings ) @@ -257,8 +256,7 @@ def test_style_compound_question_flags_multiple_prompts(): ) assert any( - finding.message_id == MessageId.STYLE_COMPOUND_QUESTION - for finding in findings + finding.message_id == MessageId.STYLE_COMPOUND_QUESTION for finding in findings ) @@ -514,7 +512,7 @@ def test_style_checks_report_review_choice_gaps_and_person_object_hint(): " Review your answers\n" "review:\n" ' - label: "Name"\n' - ' Edit: user_name\n' + " Edit: user_name\n" "---\n" "id: names\n" "question: |\n" diff --git a/tests/test_yaml_structure.py b/tests/test_yaml_structure.py index f3530d5..241bd08 100644 --- a/tests/test_yaml_structure.py +++ b/tests/test_yaml_structure.py @@ -2022,7 +2022,16 @@ def test_accessibility_html_rules_are_reported(self): ) codes = {err.code for err in errs} self.assertTrue( - {"WA516", "WA517", "WA518", "WA519", "WA520", "EA521", "EA523", "WA524"}.issubset(codes), + { + "WA516", + "WA517", + "WA518", + "WA519", + "WA520", + "EA521", + "EA523", + "WA524", + }.issubset(codes), f"Expected HTML accessibility parity findings, got: {errs}", ) From fcadb69ef79ad26a3bea157032525887f760375a Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Sun, 31 May 2026 20:35:08 -0400 Subject: [PATCH 3/3] Added rules for: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - contractions in user-facing text, excluding common “I don’t know” - slash-separated alternatives, with exceptions for pronoun sets, N/A, and URL path fragments - field labels starting with enter, write, or list - title-case field labels as INFO, narrowed after sample validation to avoid page-title noise - Other appearing before the end of any choice list - language fields using dropdowns or non-ISO choice values - “preferred pronouns”, required pronoun fields, gender Other, and binary-only gender choices --- src/dayamlchecker/messages.py | 88 ++++++++ src/dayamlchecker/style.py | 376 +++++++++++++++++++++++++++++++++- tests/test_style.py | 95 +++++++++ 3 files changed, 557 insertions(+), 2 deletions(-) diff --git a/src/dayamlchecker/messages.py b/src/dayamlchecker/messages.py index f6e24f5..2fedc18 100644 --- a/src/dayamlchecker/messages.py +++ b/src/dayamlchecker/messages.py @@ -140,6 +140,17 @@ class MessageId(StrEnum): ) STYLE_PREFER_PERSON_OBJECTS = "style_prefer_person_objects" STYLE_QUESTION_LEVEL_HELP = "style_question_level_help" + STYLE_CONTRACTION = "style_contraction" + STYLE_SLASH_ALTERNATIVE = "style_slash_alternative" + STYLE_FIELD_LABEL_INSTRUCTION_VERB = "style_field_label_instruction_verb" + STYLE_TITLE_CASE_LABEL = "style_title_case_label" + STYLE_OTHER_CHOICE_NOT_LAST = "style_other_choice_not_last" + STYLE_LANGUAGE_DROPDOWN = "style_language_dropdown" + STYLE_LANGUAGE_CHOICE_VALUE = "style_language_choice_value" + STYLE_PREFERRED_PRONOUNS = "style_preferred_pronouns" + STYLE_GENDER_OTHER_CHOICE = "style_gender_other_choice" + STYLE_GENDER_BINARY_ONLY = "style_gender_binary_only" + STYLE_REQUIRED_PRONOUN_FIELD = "style_required_pronoun_field" STYLE_TONE_AND_RESPECT = "style_tone_and_respect" STYLE_PLAIN_LANGUAGE_REWRITE_OPPORTUNITY = ( "style_plain_language_rewrite_opportunity" @@ -893,6 +904,83 @@ class MessageDefinition: "`subquestion` or another inline pattern instead: {snippet}" ), ), + MessageId.STYLE_CONTRACTION: MessageDefinition( + code="WS727", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Avoid contractions in user-facing text", + template="write out the contraction {matched_text!r} in {location}", + ), + MessageId.STYLE_SLASH_ALTERNATIVE: MessageDefinition( + code="WS728", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Avoid slash-separated alternatives", + template="write out slash-separated alternatives in {location}: {snippet}", + ), + MessageId.STYLE_FIELD_LABEL_INSTRUCTION_VERB: MessageDefinition( + code="WS729", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Field label starts with an unnecessary instruction verb", + template="field label can usually omit instruction verbs like enter, write, or list: {snippet}", + ), + MessageId.STYLE_TITLE_CASE_LABEL: MessageDefinition( + code="IS730", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Prefer sentence case for headings and field labels", + template="heading or label appears to use title case; prefer sentence case: {snippet}", + ), + MessageId.STYLE_OTHER_CHOICE_NOT_LAST: MessageDefinition( + code="IS731", + severity=Severity.INFO, + finding_class=FindingClass.STYLE, + summary="Put Other at the end of choice lists", + template='choice list includes "Other" before the final option in {origin}', + ), + MessageId.STYLE_LANGUAGE_DROPDOWN: MessageDefinition( + code="WS732", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Language choices should usually use radio buttons", + template="language field appears to use a dropdown; use radio buttons for short language lists: {snippet}", + ), + MessageId.STYLE_LANGUAGE_CHOICE_VALUE: MessageDefinition( + code="WS733", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Language choices should store ISO language codes", + template="language choice should usually store a 2- or 3-letter ISO code: {snippet}", + ), + MessageId.STYLE_PREFERRED_PRONOUNS: MessageDefinition( + code="WS734", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary='Use "Pronouns", not "preferred pronouns"', + template='avoid "preferred pronouns" in {location}; use "Pronouns": {snippet}', + ), + MessageId.STYLE_GENDER_OTHER_CHOICE: MessageDefinition( + code="WS735", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary='Avoid "Other" as a gender choice', + template='gender choices should avoid "Other"; use an inclusive self-described option: {snippet}', + ), + MessageId.STYLE_GENDER_BINARY_ONLY: MessageDefinition( + code="WS736", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Gender choices appear to be binary only", + template="gender field appears to offer only Female and Male choices: {snippet}", + ), + MessageId.STYLE_REQUIRED_PRONOUN_FIELD: MessageDefinition( + code="WS737", + severity=Severity.WARNING, + finding_class=FindingClass.STYLE, + summary="Pronoun fields should usually be optional", + template="pronoun field appears to be required; pronouns should usually be optional: {snippet}", + ), MessageId.STYLE_TONE_AND_RESPECT: MessageDefinition( code="WS790", severity=Severity.WARNING, diff --git a/src/dayamlchecker/style.py b/src/dayamlchecker/style.py index ddc2701..287504b 100644 --- a/src/dayamlchecker/style.py +++ b/src/dayamlchecker/style.py @@ -46,6 +46,31 @@ r"can|could|will|would|should|have|has|had)\b", re.IGNORECASE, ) +_CONTRACTION_RE = re.compile( + r"\b(?:can't|won't|don't|doesn't|didn't|isn't|aren't|wasn't|weren't|" + r"haven't|hasn't|hadn't|couldn't|shouldn't|wouldn't|mustn't|" + r"I'm|you're|we're|they're|it's|that's|there's|what's|who's|" + r"I'll|you'll|we'll|they'll|I'll|I'd|you'd|we'd|they'd)\b", + re.IGNORECASE, +) +_SLASH_ALTERNATIVE_RE = re.compile(r"\b[A-Za-z]+/[A-Za-z]+(?:/[A-Za-z]+)*\b") +_ALLOWED_SLASH_ALTERNATIVES = frozenset( + { + "she/her/hers", + "she/her", + "he/him/his", + "he/him", + "they/them/theirs", + "they/them", + "ze/zir/zirs", + "ze/zir", + "n/a", + } +) +_FIELD_LABEL_INSTRUCTION_VERB_RE = re.compile( + r"^(?:please\s+)?(?:enter|write)\b|^(?:please\s+)?list\s+(?!of\b)", + re.IGNORECASE, +) _PLACEHOLDER_PATTERNS = ( re.compile(r"\bplaceholder\b", re.IGNORECASE), re.compile(r"\blorem ipsum\b", re.IGNORECASE), @@ -199,10 +224,17 @@ def find_style_findings( _check_empty_screen_title, _check_placeholder_language, _check_plain_language_replacements, + _check_contractions, + _check_slash_alternatives, _check_variable_conventions, _check_long_sentences, _check_compound_questions, _check_overlong_labels, + _check_field_label_instruction_verbs, + _check_title_case_labels, + _check_other_choice_position, + _check_language_fields, + _check_pronoun_and_gender_fields, _check_too_many_fields, _check_wall_of_text, _check_question_level_help, @@ -426,6 +458,54 @@ def _check_plain_language_replacements( return findings +def _check_contractions(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _user_facing_text_entries(docs): + plain = _plain_text(entry.text) + match = _CONTRACTION_RE.search(plain) + if not match: + continue + if match.group(0).lower() == "don't" and re.search( + r"\bi\s+don't\s+know\b", plain, re.IGNORECASE + ): + continue + findings.append( + _style_draft( + MessageId.STYLE_CONTRACTION, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + matched_text=match.group(0), + ) + ) + return findings + + +def _check_slash_alternatives( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _user_facing_text_entries(docs): + plain = _plain_text(entry.text) + for match in _SLASH_ALTERNATIVE_RE.finditer(plain): + matched = match.group(0) + if matched.lower() in _ALLOWED_SLASH_ALTERNATIVES: + continue + if _looks_like_url_path_fragment(plain, match.start(), match.end()): + continue + findings.append( + _style_draft( + MessageId.STYLE_SLASH_ALTERNATIVE, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + snippet=_shorten(matched), + ) + ) + break + return findings + + def _check_variable_conventions( docs: list[ParsedInterviewDocument], ) -> list[FindingDraft]: @@ -521,6 +601,166 @@ def _check_overlong_labels(docs: list[ParsedInterviewDocument]) -> list[FindingD return findings +def _check_field_label_instruction_verbs( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + for field in _iter_fields(parsed_doc.doc): + label = _extract_field_label(field).strip() + if not label or not _FIELD_LABEL_INSTRUCTION_VERB_RE.search(label): + continue + findings.append( + _style_draft( + MessageId.STYLE_FIELD_LABEL_INSTRUCTION_VERB, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label), + ) + ) + return findings + + +def _check_title_case_labels(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + for field in _iter_fields(parsed_doc.doc): + if _field_is_choice_style(field): + continue + label = _extract_field_label(field) + if not _looks_like_title_case_label(label): + continue + findings.append( + _style_draft( + MessageId.STYLE_TITLE_CASE_LABEL, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label), + ) + ) + return findings + + +def _check_other_choice_position( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + for origin, choices, line_number in _iter_choice_sources(parsed_doc): + options = _choice_options(choices) + if len(options) < 2: + continue + for index, option in enumerate(options[:-1]): + if option["label"].strip().lower() != "other": + continue + findings.append( + _style_draft( + MessageId.STYLE_OTHER_CHOICE_NOT_LAST, + line_number=line_number, + screen_id=parsed_doc.screen_id, + origin=origin, + ) + ) + break + return findings + + +def _check_language_fields(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for parsed_doc in docs: + for field in _iter_fields(parsed_doc.doc): + if not _is_language_field(field): + continue + label = _extract_field_label(field) or _extract_field_variable(field) + if _field_uses_dropdown(field): + findings.append( + _style_draft( + MessageId.STYLE_LANGUAGE_DROPDOWN, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label), + ) + ) + for option in _choice_options(field.get("choices")): + value = option["value"].strip() + if not value or value.lower() == "other": + continue + if re.fullmatch(r"[a-z]{2,3}", value): + continue + findings.append( + _style_draft( + MessageId.STYLE_LANGUAGE_CHOICE_VALUE, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(f'{option["label"]}: {value}'), + ) + ) + break + return findings + + +def _check_pronoun_and_gender_fields( + docs: list[ParsedInterviewDocument], +) -> list[FindingDraft]: + findings: list[FindingDraft] = [] + for entry in _user_facing_text_entries(docs): + plain = _plain_text(entry.text) + if not re.search(r"\bpreferred\s+pronouns\b", plain, re.IGNORECASE): + continue + findings.append( + _style_draft( + MessageId.STYLE_PREFERRED_PRONOUNS, + line_number=entry.line_number, + screen_id=entry.screen_id, + location=entry.location, + snippet=_shorten(plain), + ) + ) + + for parsed_doc in docs: + for field in _iter_fields(parsed_doc.doc): + label = _extract_field_label(field) + variable = _extract_field_variable(field) + combined = f"{label} {variable}".lower() + if re.search(r"\bpronouns?\b", combined) and _is_truthy( + field.get("required") + ): + findings.append( + _style_draft( + MessageId.STYLE_REQUIRED_PRONOUN_FIELD, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label or variable), + ) + ) + if not re.search(r"\bgender\b", combined): + continue + labels = [ + option["label"].strip().lower() + for option in _choice_options(field.get("choices")) + if option["label"].strip() + ] + if "other" in labels: + findings.append( + _style_draft( + MessageId.STYLE_GENDER_OTHER_CHOICE, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label or variable), + ) + ) + if set(labels) == {"female", "male"}: + findings.append( + _style_draft( + MessageId.STYLE_GENDER_BINARY_ONLY, + line_number=parsed_doc.line_for_field(field), + screen_id=parsed_doc.screen_id, + snippet=_shorten(label or variable), + ) + ) + return findings + + def _check_too_many_fields(docs: list[ParsedInterviewDocument]) -> list[FindingDraft]: findings: list[FindingDraft] = [] for parsed_doc in docs: @@ -1014,13 +1254,145 @@ def _extract_choice_display_text(choices: Any) -> list[str]: label = _stringify(choice.get("label")) if label: extracted.append(label) - elif len(choice) == 1: - extracted.append(_stringify(next(iter(choice.keys())))) + else: + display_items = [ + key for key in choice.keys() if _stringify(key) != "__line__" + ] + if len(display_items) == 1: + extracted.append(_stringify(display_items[0])) elif isinstance(choices, dict): extracted.extend(_stringify(key) for key in choices.keys()) return [item for item in extracted if item] +def _iter_choice_sources( + parsed_doc: ParsedInterviewDocument, +) -> list[tuple[str, Any, int]]: + sources: list[tuple[str, Any, int]] = [] + for key in ("choices", "dropdown", "buttons"): + if parsed_doc.doc.get(key) is not None: + sources.append((key, parsed_doc.doc.get(key), parsed_doc.line_for_key(key))) + for index, field in enumerate(_iter_fields(parsed_doc.doc)): + if field.get("choices") is not None: + sources.append( + ( + f"fields[{index}].choices", + field.get("choices"), + parsed_doc.line_for_field(field), + ) + ) + return sources + + +def _choice_options(choices: Any) -> list[dict[str, str]]: + options: list[dict[str, str]] = [] + if isinstance(choices, list): + for choice in choices: + if isinstance(choice, str): + label, value = _split_choice_string(choice) + options.append({"label": label, "value": value}) + elif isinstance(choice, dict): + label = _stringify(choice.get("label")).strip() + value = _stringify(choice.get("value")).strip() + if label or value: + options.append({"label": label, "value": value}) + else: + display_items = [ + (key, val) + for key, val in choice.items() + if _stringify(key) != "__line__" + ] + if len(display_items) != 1: + continue + key, val = display_items[0] + options.append( + { + "label": _stringify(key).strip(), + "value": _stringify(val).strip(), + } + ) + elif isinstance(choices, dict): + for key, value in choices.items(): + options.append( + {"label": _stringify(key).strip(), "value": _stringify(value).strip()} + ) + return [option for option in options if option["label"] or option["value"]] + + +def _split_choice_string(choice: str) -> tuple[str, str]: + if ": " not in choice: + return choice.strip(), "" + label, value = choice.split(": ", 1) + return label.strip(), value.strip() + + +def _looks_like_title_case_label(value: str) -> bool: + if not value or "?" in value or "${" in value: + return False + words = re.findall(r"[A-Za-z][A-Za-z'-]*", value) + candidates = [ + word + for word in words + if len(word) > 2 + and not word.isupper() + and word.lower() not in {"and", "for", "the", "with", "from"} + ] + if len(candidates) < 3: + return False + title_words = [ + word + for word in candidates + if word[:1].isupper() and word[1:] == word[1:].lower() + ] + return len(title_words) / len(candidates) >= 0.75 + + +def _is_language_field(field: dict[str, Any]) -> bool: + combined = " ".join((_extract_field_label(field), _extract_field_variable(field))) + return bool(re.search(r"\blanguage\b", combined, re.IGNORECASE)) + + +def _field_uses_dropdown(field: dict[str, Any]) -> bool: + return ( + any( + _stringify(field.get(key)).strip().lower() == "dropdown" + for key in ("datatype", "input type") + ) + or field.get("dropdown") is not None + ) + + +def _field_is_choice_style(field: dict[str, Any]) -> bool: + datatype = _stringify(field.get("datatype")).strip().lower() + return datatype in { + "checkboxes", + "checkbox", + "yesno", + "noyes", + "yesnomaybe", + "noyesmaybe", + "radio", + } + + +def _looks_like_url_path_fragment(text: str, start: int, end: int) -> bool: + prefix = text[max(0, start - 20) : start] + suffix = text[end : min(len(text), end + 20)] + if re.search(r"(?:https?://|www\.)\S*$", prefix, re.IGNORECASE): + return True + if prefix.endswith(".") or "/" in prefix[-2:]: + return True + return bool(re.match(r"\.[A-Za-z0-9]", suffix)) + + +def _is_truthy(value: Any) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in {"true", "yes", "y", "1"} + return bool(value) + + def _variable_references( parsed_doc: ParsedInterviewDocument, ) -> list[tuple[str, int]]: diff --git a/tests/test_style.py b/tests/test_style.py index a5c88a2..d93dfae 100644 --- a/tests/test_style.py +++ b/tests/test_style.py @@ -545,3 +545,98 @@ def test_style_checks_report_review_choice_gaps_and_person_object_hint(): if finding.message_id == MessageId.STYLE_PREFER_PERSON_OBJECTS ] assert person_object_findings[0].context["snippet"] == "user_first_name" + + +def test_style_checks_report_plain_language_punctuation_and_label_gaps(): + findings = find_errors_from_string( + "id: style_gaps\n" + "question: |\n" + " Case Filing Details\n" + "subquestion: |\n" + " Don't write plaintiff/defendant if only one applies.\n" + "fields:\n" + ' - label: "Enter your full name"\n' + " field: user_name\n" + " - label: Case Filing Details\n" + " field: contact_preference\n" + " choices:\n" + " - Other: other\n" + " - Email: email\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + message_ids = {finding.message_id for finding in findings} + assert MessageId.STYLE_CONTRACTION in message_ids + assert MessageId.STYLE_SLASH_ALTERNATIVE in message_ids + assert MessageId.STYLE_FIELD_LABEL_INSTRUCTION_VERB in message_ids + assert MessageId.STYLE_TITLE_CASE_LABEL in message_ids + assert MessageId.STYLE_OTHER_CHOICE_NOT_LAST in message_ids + + +def test_style_checks_allow_common_pronoun_slashes_and_i_do_not_know_choice(): + findings = find_errors_from_string( + "question: |\n" + " Choose pronouns\n" + "fields:\n" + " - Pronouns: user_pronouns\n" + " choices:\n" + " - she/her/hers: she/her/hers\n" + " - I don't know: unknown\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert all( + finding.message_id + not in {MessageId.STYLE_SLASH_ALTERNATIVE, MessageId.STYLE_CONTRACTION} + for finding in findings + ) + + +def test_style_checks_report_language_gender_and_pronoun_gaps(): + findings = find_errors_from_string( + "question: |\n" + " What are your preferred pronouns?\n" + "fields:\n" + " - Language: user_language\n" + " input type: dropdown\n" + " choices:\n" + " - English: English\n" + " - Spanish: es\n" + " - Other: other\n" + " - Gender: user_gender\n" + " choices:\n" + " - Female: female\n" + " - Male: male\n" + " - Other: other\n" + " - Pronouns: user_pronouns\n" + " required: True\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + message_ids = {finding.message_id for finding in findings} + assert MessageId.STYLE_LANGUAGE_DROPDOWN in message_ids + assert MessageId.STYLE_LANGUAGE_CHOICE_VALUE in message_ids + assert MessageId.STYLE_PREFERRED_PRONOUNS in message_ids + assert MessageId.STYLE_GENDER_OTHER_CHOICE in message_ids + assert MessageId.STYLE_REQUIRED_PRONOUN_FIELD in message_ids + + +def test_style_checks_report_binary_only_gender_choices(): + findings = find_errors_from_string( + "question: |\n" + " Gender\n" + "fields:\n" + " - Gender: user_gender\n" + " choices:\n" + " - Female: female\n" + " - Male: male\n", + input_file="", + runtime_options=RuntimeOptions(style_enabled=True), + ) + + assert any( + finding.message_id == MessageId.STYLE_GENDER_BINARY_ONLY for finding in findings + )