From a6dad210454407edac636a4ab5ac97815a404a9e Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 12:18:09 +0100 Subject: [PATCH 01/13] initial pef to md --- .github/workflows/pdf-to-markdown.yml | 69 +++ scripts/pdf_to_md/extractor.py | 292 ++++++++++ scripts/pdf_to_md/pdf_to_md.py | 806 ++++++++++++++++++++++++++ 3 files changed, 1167 insertions(+) create mode 100644 .github/workflows/pdf-to-markdown.yml create mode 100755 scripts/pdf_to_md/extractor.py create mode 100755 scripts/pdf_to_md/pdf_to_md.py diff --git a/.github/workflows/pdf-to-markdown.yml b/.github/workflows/pdf-to-markdown.yml new file mode 100644 index 0000000..1c60f02 --- /dev/null +++ b/.github/workflows/pdf-to-markdown.yml @@ -0,0 +1,69 @@ +name: pdf-to-markdown + +on: + workflow_dispatch: + inputs: + pdf_name: + description: "PDF file name inside legacy/ (example: report.pdf)" + required: true + default: "input.pdf" + type: string + +permissions: + contents: write + +jobs: + convert: + runs-on: ubuntu-latest + container: + image: python:3.12-slim + + env: + LEGACY_DIR: legacy + + steps: + - name: Check out repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install fast-mode dependencies + run: | + python -m pip install --upgrade pip + pip install pymupdf pymupdf4llm + + - name: Convert PDF to legacy/source.md + run: | + set -eux + + PDF="$LEGACY_DIR/${{ inputs.pdf_name }}" + OUT="$LEGACY_DIR/source.md" + + test -d "$LEGACY_DIR" + test -f "$PDF" + + python scripts/pdf_to_md.py "$PDF" "$OUT" + + - name: Show generated files + run: | + find "$LEGACY_DIR" -maxdepth 3 \( -name "source.md" -o -path "$LEGACY_DIR/images/*" \) | sort + + - name: Commit and push generated files + run: | + set -eux + + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + git add "$LEGACY_DIR/source.md" + if [ -d "$LEGACY_DIR/images" ]; then + git add "$LEGACY_DIR/images" + fi + + if git diff --cached --quiet; then + echo "No changes to commit" + exit 0 + fi + + git commit -m "Update legacy/source.md from ${{ inputs.pdf_name }}" + git push \ No newline at end of file diff --git a/scripts/pdf_to_md/extractor.py b/scripts/pdf_to_md/extractor.py new file mode 100755 index 0000000..3250f9e --- /dev/null +++ b/scripts/pdf_to_md/extractor.py @@ -0,0 +1,292 @@ +""" +PDF extraction with multiple backends: +- Fast mode: PyMuPDF with multi-strategy table detection (good for simple tables) +- Accurate mode: IBM Docling with TableFormer AI (better for complex/borderless tables) +""" + +import os +import sys +from pathlib import Path + +# Suppress PyMuPDF's "Consider using pymupdf_layout" recommendation +# This prints to stdout and pollutes --stdout output +os.environ.setdefault("PYMUPDF_SUGGEST_LAYOUT_ANALYZER", "0") + +# Version for cache invalidation - increment when extraction logic changes +# Format: major.minor.patch +# 3.1.0: Page separators now use instead of ----- +# Image extraction includes nested XObjects (full=True) +# 3.2.0: Fast mode now includes image references in markdown (write_images=True) +# Cache keys now include no_images flag to avoid contamination +# 3.3.0: Image paths in cached markdown now use relative 'images/' prefix +# (fixes broken temp directory references in cached output) +EXTRACTOR_VERSION = "3.3.0" + + +def check_docling_models(): + """Check if Docling models are downloaded.""" + try: + from huggingface_hub import scan_cache_dir + + cache_info = scan_cache_dir() + # Check for docling models in HF cache + docling_repos = [r for r in cache_info.repos if "docling" in r.repo_id.lower()] + return len(docling_repos) > 0 + except Exception: + return False + + +def extract_pdf_fast( + pdf_path: str, image_dir: str = None, show_progress: bool = False +) -> str: + """ + Fast PDF extraction using PyMuPDF with text-based table detection. + + Uses 'text' table strategy which handles borderless/whitespace-based + tables better than the default 'lines_strict' for mixed document types. + + Args: + pdf_path: Path to the PDF file + image_dir: Directory to save extracted images (None = skip images) + show_progress: Whether to show progress output + + Returns: + Markdown string of the PDF content with image references if image_dir provided + """ + import pymupdf4llm + + if show_progress: + print("Extracting with PyMuPDF (fast mode)...", file=sys.stderr) + + # Use text strategy which handles borderless tables better + # than the default lines_strict + markdown = pymupdf4llm.to_markdown( + pdf_path, + show_progress=show_progress, + table_strategy="text", # Better for mixed table types + write_images=image_dir is not None, + image_path=image_dir, + ) + + # Replace pymupdf4llm's default page separator with explicit sentinel. + # This prevents false splits when documents contain literal "-----" + # (horizontal rules, ASCII tables, etc.) + markdown = markdown.replace("\n-----\n", "\n\n") + + return markdown + + +def _save_docling_images(result, output_dir: Path) -> list: + """ + Save images from a Docling conversion result to output directory. + + Images are saved in iteration order, which matches the order of + placeholders in the exported markdown. + + Args: + result: Docling ConversionResult object + output_dir: Directory to save images to + + Returns: + List of saved image paths (in iteration order) + """ + output_dir.mkdir(parents=True, exist_ok=True) + image_paths = [] + + for i, (element, _level) in enumerate(result.document.iterate_items()): + if hasattr(element, "image") and element.image is not None: + img_path = output_dir / f"figure_{i:04d}.png" + element.image.pil_image.save(str(img_path)) + image_paths.append(str(img_path)) + + return image_paths + + +def extract_pdf_docling( + pdf_path: str, + output_dir: str = None, + images_scale: float = 4.0, + show_progress: bool = False, +) -> tuple: + """ + Extract PDF using Docling with accurate tables + high-res images. + + Uses IBM's TableFormer AI model for ~93.6% table extraction accuracy. + Also extracts images at configurable resolution (default 4x for crisp images). + + Args: + pdf_path: Path to the PDF file + output_dir: Directory to save extracted images (None = skip images) + images_scale: Image resolution multiplier (default: 4.0 for high-res) + show_progress: Whether to show progress output + + Returns: + tuple: (markdown: str, image_paths: list[str]) + """ + from docling.document_converter import DocumentConverter, PdfFormatOption + from docling.datamodel.base_models import InputFormat + from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode + from docling_core.types.doc.base import ImageRefMode + + # Check if this is first run (models need downloading) + if not check_docling_models(): + print( + "First run: downloading Docling AI models (one-time setup, ~2-3 minutes)...", + file=sys.stderr, + ) + + if show_progress: + print( + f"Processing PDF with Docling (accurate mode, ~1 sec/page)...", + file=sys.stderr, + ) + + # Configure pipeline for accurate tables + image extraction + pipeline_options = PdfPipelineOptions( + do_table_structure=True, + generate_picture_images=output_dir is not None, + images_scale=images_scale, + ) + pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE + + converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + } + ) + + # Convert the document + result = converter.convert(pdf_path) + + # Check for conversion errors + if hasattr(result, "errors") and result.errors: + for error in result.errors: + print(f"WARNING: Docling conversion error: {error}", file=sys.stderr) + + # Check conversion status + from docling.datamodel.base_models import ConversionStatus + + if hasattr(result, "status") and result.status != ConversionStatus.SUCCESS: + print( + f"WARNING: Docling conversion status: {result.status.name}", + file=sys.stderr, + ) + + # Save images to output directory (order matters for placeholder replacement) + image_paths = [] + if output_dir: + image_paths = _save_docling_images(result, Path(output_dir)) + if show_progress and image_paths: + print( + f"Extracted {len(image_paths)} images at {images_scale}x resolution", + file=sys.stderr, + ) + + # Export markdown with placeholders + md = result.document.export_to_markdown(image_mode=ImageRefMode.PLACEHOLDER) + + # Replace placeholders with actual image references (order must match iteration order) + for img_path in image_paths: + md = md.replace("", f"![Figure](images/{Path(img_path).name})", 1) + + return md, image_paths + + +def extract_pdf_to_markdown( + pdf_path: str, accurate: bool = False, show_progress: bool = False +) -> str: + """ + Extract PDF to markdown with configurable accuracy/speed trade-off. + + Args: + pdf_path: Path to the PDF file + accurate: If True, use Docling AI (better for complex tables, slower). + If False, use PyMuPDF (fast, good for simple tables). + show_progress: Whether to show progress output + + Returns: + Markdown string of the PDF content + """ + if accurate: + # Use Docling without image extraction + md, _ = extract_pdf_docling( + pdf_path, output_dir=None, show_progress=show_progress + ) + return md + else: + return extract_pdf_fast(pdf_path, show_progress) + + +def get_page_count(pdf_path: str) -> int: + """Get the number of pages in a PDF using pymupdf (faster than Docling for this).""" + import pymupdf + + doc = pymupdf.open(pdf_path) + count = len(doc) + doc.close() + return count + + +def extract_images(pdf_path: str, output_dir: str, show_progress: bool = False) -> list: + """ + Extract images from PDF to output directory. + + Uses pymupdf for image extraction since Docling focuses on document structure. + Deduplicates by xref to avoid extracting the same image multiple times + (e.g., icons/logos reused across pages). + + Returns: + List of extracted image paths + """ + import pymupdf + + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + doc = pymupdf.open(pdf_path) + extracted = [] + image_count = 0 + seen_xrefs = set() # Track already-extracted images by xref + + for page_num in range(len(doc)): + page = doc[page_num] + # full=True includes images nested inside form XObjects (common in + # documents exported from Word/PowerPoint) + images = page.get_images(full=True) + + for img_index, img in enumerate(images): + try: + xref = img[0] + + # Skip if we've already extracted this image + if xref in seen_xrefs: + continue + seen_xrefs.add(xref) + + pix = pymupdf.Pixmap(doc, xref) + + # Convert CMYK to RGB if necessary + if pix.n - pix.alpha > 3: + pix = pymupdf.Pixmap(pymupdf.csRGB, pix) + + image_count += 1 + img_filename = f"image_{image_count:04d}.png" + img_path = output_path / img_filename + pix.save(str(img_path)) + extracted.append(str(img_path)) + + pix = None + except Exception as e: + # Log instead of silently swallowing errors + print( + f"WARNING: Failed to extract image {img_index} on page {page_num + 1}: {e}", + file=sys.stderr, + ) + continue + + doc.close() + + if show_progress and extracted: + print(f"Extracted {len(extracted)} unique images", file=sys.stderr) + + return extracted \ No newline at end of file diff --git a/scripts/pdf_to_md/pdf_to_md.py b/scripts/pdf_to_md/pdf_to_md.py new file mode 100755 index 0000000..9054594 --- /dev/null +++ b/scripts/pdf_to_md/pdf_to_md.py @@ -0,0 +1,806 @@ +#!/usr/bin/env python3 +""" +PDF to Markdown Converter for LLM Context + +Extracts entire PDF content as clean, structured markdown. +Images are extracted to cache directory and copied to output location. + +Features: +- High-accuracy table extraction using IBM Docling (TableFormer AI model) +- Aggressive persistent caching (extracts once, reuses forever) +- Cache only cleared on explicit request or source file change + +Usage: + python pdf_to_md.py [output.md] + python pdf_to_md.py --docling # Accurate tables (slower) + python pdf_to_md.py --clear-cache # Re-extract + python pdf_to_md.py --clear-all-cache # Clear entire cache + +Dependencies: + uv pip install pymupdf pymupdf4llm # Fast mode + uv pip install docling docling-core # Docling mode (optional) +""" + +import argparse +import sys +import os +import re +import json +import hashlib +import shutil +import tempfile +from dataclasses import dataclass +from pathlib import Path +from datetime import datetime + + +# ============================================================================= +# DATACLASSES +# ============================================================================= + + +@dataclass +class ExtractionConfig: + """Configuration for PDF extraction.""" + + pdf_path: str + docling: bool = False + images_scale: float = 4.0 + + +@dataclass +class ExtractionResult: + """Result of PDF extraction or cache load.""" + + markdown: str + image_dir: Path | None + total_pages: int + from_cache: bool = False + + +# Suppress PyMuPDF's "Consider using pymupdf_layout" recommendation +os.environ.setdefault("PYMUPDF_SUGGEST_LAYOUT_ANALYZER", "0") + +# Default cache directory +DEFAULT_CACHE_DIR = Path.home() / ".cache" / "pdf-to-markdown" + + +# ============================================================================= +# CACHE MANAGER +# ============================================================================= + + +class CacheManager: + """Manages PDF extraction cache.""" + + def __init__(self, cache_dir: Path = None): + self.cache_dir = cache_dir or DEFAULT_CACHE_DIR + + def get_key(self, config: ExtractionConfig) -> str: + """Generate cache key from file content + size + mode.""" + p = Path(config.pdf_path).resolve() + stat = p.stat() + file_size = stat.st_size + + chunk_size = 65536 # 64KB + hasher = hashlib.sha256() + + with open(p, "rb") as f: + if file_size <= chunk_size * 2: + hasher.update(f.read()) + else: + hasher.update(f.read(chunk_size)) + f.seek(-chunk_size, 2) + hasher.update(f.read(chunk_size)) + + mode = f"docling_{config.images_scale}" if config.docling else "fast" + raw = f"{file_size}|{hasher.hexdigest()}|{mode}" + return hashlib.sha256(raw.encode()).hexdigest()[:16] + + def _get_dir(self, cache_key: str) -> Path: + """Get cache directory for a given cache key.""" + return self.cache_dir / cache_key + + def is_valid(self, config: ExtractionConfig) -> tuple[bool, str]: + """Check if valid cache exists for this PDF.""" + from extractor import EXTRACTOR_VERSION + + try: + cache_key = self.get_key(config) + except (FileNotFoundError, OSError): + return False, "" + + cache_dir = self._get_dir(cache_key) + metadata_file = cache_dir / "metadata.json" + output_file = cache_dir / "full_output.md" + + if not metadata_file.exists() or not output_file.exists(): + return False, cache_key + + try: + with open(metadata_file) as f: + metadata = json.load(f) + + p = Path(config.pdf_path).resolve() + stat = p.stat() + + if ( + metadata.get("source_size") != stat.st_size + or metadata.get("source_mtime") != stat.st_mtime + ): + return False, cache_key + + if metadata.get("extractor_version") != EXTRACTOR_VERSION: + return False, cache_key + + return True, cache_key + except (json.JSONDecodeError, KeyError, OSError): + return False, cache_key + + def load(self, cache_key: str) -> ExtractionResult | None: + """Load markdown from cache.""" + cache_dir = self._get_dir(cache_key) + + try: + full_md = (cache_dir / "full_output.md").read_text(encoding="utf-8") + with open(cache_dir / "metadata.json") as f: + metadata = json.load(f) + total_pages = metadata.get("total_pages", 0) + except (FileNotFoundError, IOError, json.JSONDecodeError, OSError) as e: + print( + f"WARNING: Cache corrupted ({e.__class__.__name__}), regenerating...", + file=sys.stderr, + ) + try: + if cache_dir.exists(): + shutil.rmtree(cache_dir) + except OSError: + pass + return None + + # Check if markdown references images + has_image_refs = bool(re.search(r"!\[[^\]]*\]\([^)]+\)", full_md)) + + # Get cached images directory + cached_image_dir = cache_dir / "images" + has_images = cached_image_dir.exists() and any(cached_image_dir.iterdir()) + + # If markdown expects images but they're missing, invalidate cache + if has_image_refs and not has_images: + print( + "WARNING: Cache missing images, regenerating...", + file=sys.stderr, + ) + try: + shutil.rmtree(cache_dir) + except OSError: + pass + return None + + image_dir = cached_image_dir if has_images else None + + return ExtractionResult( + markdown=full_md, + image_dir=image_dir, + total_pages=total_pages, + from_cache=True, + ) + + def _normalize_image_paths(self, markdown: str, source_image_dir: Path) -> str: + """Normalize image paths in markdown to use relative 'images/' prefix.""" + if not source_image_dir: + return markdown + + source_image_dir = Path(source_image_dir) + + def normalize_ref(match): + alt_text = match.group(1) + filename_raw = match.group(2) + filename = Path(filename_raw).name + if (source_image_dir / filename).exists(): + return f"![{alt_text}](images/{filename})" + return match.group(0) + + pattern = r"!\[([^\]]*)\]\(([^)]+)\)" + return re.sub(pattern, normalize_ref, markdown) + + def save(self, cache_key: str, result: ExtractionResult, config: ExtractionConfig): + """Save full extraction to cache using atomic writes.""" + from extractor import EXTRACTOR_VERSION + + cache_dir = self._get_dir(cache_key) + cache_dir.mkdir(parents=True, exist_ok=True) + + markdown = result.markdown + if result.image_dir: + markdown = self._normalize_image_paths(markdown, result.image_dir) + + p = Path(config.pdf_path).resolve() + stat = p.stat() + mode = f"docling_{config.images_scale}" if config.docling else "fast" + + metadata = { + "source_path": str(p), + "source_mtime": stat.st_mtime, + "source_size": stat.st_size, + "cache_key": cache_key, + "cached_at": datetime.now().isoformat(), + "total_pages": result.total_pages, + "extractor_version": EXTRACTOR_VERSION, + "mode": mode, + "images_scale": config.images_scale if config.docling else None, + } + + temp_md = None + temp_json = None + try: + with tempfile.NamedTemporaryFile( + mode="w", + dir=cache_dir, + suffix=".md.tmp", + delete=False, + encoding="utf-8", + ) as f: + f.write(markdown) + temp_md = f.name + + with tempfile.NamedTemporaryFile( + mode="w", dir=cache_dir, suffix=".json.tmp", delete=False + ) as f: + json.dump(metadata, f, indent=2) + temp_json = f.name + + os.replace(temp_md, cache_dir / "full_output.md") + temp_md = None + os.replace(temp_json, cache_dir / "metadata.json") + temp_json = None + + if result.image_dir and Path(result.image_dir).exists(): + temp_images = cache_dir / "images.tmp" + final_images = cache_dir / "images" + + if temp_images.exists(): + shutil.rmtree(temp_images) + + shutil.copytree(result.image_dir, temp_images) + + if final_images.exists(): + shutil.rmtree(final_images) + os.rename(temp_images, final_images) + + finally: + if temp_md and os.path.exists(temp_md): + os.unlink(temp_md) + if temp_json and os.path.exists(temp_json): + os.unlink(temp_json) + + def clear(self, pdf_path: str = None) -> bool: + """Clear cache for specific PDF (both fast and docling modes) or entire cache.""" + if pdf_path: + # Clear BOTH fast and docling caches for this PDF + cleared = False + for docling_mode in [False, True]: + try: + config = ExtractionConfig(pdf_path=pdf_path, docling=docling_mode) + cache_key = self.get_key(config) + cache_dir = self._get_dir(cache_key) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + cleared = True + except (FileNotFoundError, OSError): + pass + return cleared + else: + if self.cache_dir.exists(): + shutil.rmtree(self.cache_dir) + return True + return False + + def get_stats(self) -> dict: + """Get statistics about the cache.""" + if not self.cache_dir.exists(): + return {"entries": 0, "total_size_mb": 0, "cache_dir": str(self.cache_dir)} + + entries = 0 + total_size = 0 + + for entry in self.cache_dir.iterdir(): + if entry.is_dir(): + entries += 1 + for f in entry.rglob("*"): + if f.is_file(): + total_size += f.stat().st_size + + return { + "entries": entries, + "total_size_mb": round(total_size / (1024 * 1024), 2), + "cache_dir": str(self.cache_dir), + } + + +# ============================================================================= +# IMAGE MANAGER +# ============================================================================= + + +class ImageManager: + """Manages image extraction and cleanup.""" + + def __init__(self): + self._temp_dirs: list[Path] = [] + + def create_temp_dir(self, pdf_path: str) -> Path: + """Create tracked temp directory for image extraction.""" + pdf_name = Path(pdf_path).stem + safe_name = re.sub(r"[^\w\-_]", "_", pdf_name) + temp_dir = Path(tempfile.mkdtemp(prefix=f"pdf_images_{safe_name}_")) + self._temp_dirs.append(temp_dir) + return temp_dir + + def cleanup(self): + """Clean up all tracked temp directories.""" + for temp_dir in self._temp_dirs: + if temp_dir.exists(): + shutil.rmtree(temp_dir) + self._temp_dirs.clear() + + def extract_references(self, markdown: str) -> set: + """Extract the set of image filenames referenced in markdown.""" + pattern = r"!\[[^\]]*\]\(([^)]+)\)" + matches = re.findall(pattern, markdown) + return {Path(m).name for m in matches} + + def get_info(self, image_dir: Path, referenced_only: set = None) -> list: + """Get information about extracted images.""" + if not image_dir or not Path(image_dir).exists(): + return [] + + image_dir = Path(image_dir) + images = [] + + for img_path in sorted(image_dir.glob("*")): + if img_path.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"): + if referenced_only is not None and img_path.name not in referenced_only: + continue + + try: + size_bytes = img_path.stat().st_size + size_kb = size_bytes / 1024 + + try: + import pymupdf + pix = pymupdf.Pixmap(str(img_path)) + dimensions = f"{pix.width}x{pix.height}" + pix = None + except Exception: + dimensions = "unknown" + + images.append({ + "filename": img_path.name, + "path": str(img_path), + "size_kb": round(size_kb, 1), + "dimensions": dimensions, + }) + except Exception: + pass + + return images + + def enhance_markdown(self, markdown: str, image_dir: Path) -> str: + """Rewrite image references to use relative paths (portable, Windows-safe).""" + if not image_dir: + return markdown + + image_dir = Path(image_dir) + + def replace_image_ref(match): + alt_text = match.group(1) + filename_raw = match.group(2) + filename = Path(filename_raw).name + full_path = image_dir / filename + + # Use relative path for portability (POSIX format for Windows compatibility) + relative_path = Path("images") / filename + + if full_path.exists(): + try: + size_kb = round(full_path.stat().st_size / 1024, 1) + try: + import pymupdf + pix = pymupdf.Pixmap(str(full_path)) + dims = f"{pix.width}x{pix.height}" + pix = None + except Exception: + dims = "?" + + return f"![{alt_text}]({relative_path.as_posix()})\n\n**[Image: {filename} ({dims}, {size_kb}KB)]**" + except Exception: + return f"![{alt_text}]({relative_path.as_posix()})\n\n**[Image: {filename}]**" + + return match.group(0) + + pattern = r"!\[([^\]]*)\]\(([^)]+)\)" + return re.sub(pattern, replace_image_ref, markdown) + + def create_summary(self, images: list) -> str: + """Create a summary section listing all extracted images.""" + if not images: + return "" + + lines = [ + "", + "---", + "", + "## Extracted Images", + "", + "| # | File | Dimensions | Size |", + "|---|------|------------|------|", + ] + + for i, img in enumerate(images, 1): + lines.append( + f"| {i} | {img['filename']} | {img['dimensions']} | {img['size_kb']}KB |" + ) + + lines.append("") + return "\n".join(lines) + + def finalize_images( + self, temp_dir: Path, cache_dir: Path, output_path: Path, show_progress: bool = False + ) -> Path | None: + """Finalize image directory after extraction. + + Copies images from cache to output location (next to the markdown file). + Cleans up temp directories. + + Returns the final image directory (next to output) for reference. + """ + if not temp_dir: + return None + + temp_dir = Path(temp_dir) + + # Clean up empty temp directories + if not temp_dir.exists() or not any(temp_dir.iterdir()): + if temp_dir.exists(): + shutil.rmtree(temp_dir) + if temp_dir in self._temp_dirs: + self._temp_dirs.remove(temp_dir) + return None + + # Clean up temp directory (images are saved to cache) + if temp_dir.exists(): + shutil.rmtree(temp_dir) + if temp_dir in self._temp_dirs: + self._temp_dirs.remove(temp_dir) + + # Copy images from cache to output location + if cache_dir: + cached_image_dir = cache_dir / "images" + if cached_image_dir.exists() and any(cached_image_dir.iterdir()): + return self._copy_images_to_output(cached_image_dir, output_path, show_progress) + + return None + + def _copy_images_to_output( + self, source_dir: Path, output_path: Path, show_progress: bool = False + ) -> Path | None: + """Copy images from cache to output location (next to markdown file).""" + output_path = Path(output_path) + + # Determine output images directory (sibling to markdown file) + if output_path.suffix: # It's a file path like "output.md" + output_images_dir = output_path.parent / "images" + else: # It's a directory + output_images_dir = output_path / "images" + + # Don't copy if already at output location + if output_images_dir.resolve() == Path(source_dir).resolve(): + return output_images_dir + + # Copy images to output location + output_images_dir.mkdir(parents=True, exist_ok=True) + copied_count = 0 + for img in source_dir.iterdir(): + if img.is_file(): + shutil.copy2(img, output_images_dir / img.name) + copied_count += 1 + + if show_progress and copied_count > 0: + print(f"Copied {copied_count} images to: {output_images_dir}", file=sys.stderr) + + return output_images_dir + + +# ============================================================================= +# PDF PROCESSING +# ============================================================================= + + +def check_dependencies(docling_mode: bool = False): + """Check if required packages are installed.""" + missing = [] + + try: + import pymupdf + except ImportError: + missing.append("pymupdf") + + if docling_mode: + try: + import docling + except ImportError: + missing.append("docling") + + try: + import docling_core + except ImportError: + missing.append("docling-core") + + install_cmd = "uv pip install pymupdf docling docling-core" + else: + try: + import pymupdf4llm + except ImportError: + missing.append("pymupdf4llm") + + install_cmd = "uv pip install pymupdf pymupdf4llm" + + if missing: + print(f"ERROR: Missing dependencies: {', '.join(missing)}", file=sys.stderr) + print(f"Install with: {install_cmd}", file=sys.stderr) + return False + + return True + + +def convert_pdf(pdf_path, image_dir, show_progress=False, docling=False, images_scale=4.0): + """Convert PDF to markdown.""" + if docling: + from extractor import extract_pdf_docling + + markdown, _image_paths = extract_pdf_docling( + pdf_path, + output_dir=image_dir, + images_scale=images_scale, + show_progress=show_progress, + ) + return markdown + else: + from extractor import extract_pdf_fast + + markdown = extract_pdf_fast( + pdf_path, + image_dir=image_dir, + show_progress=show_progress, + ) + return markdown + + +def add_metadata_header(markdown, pdf_path, total_pages, image_dir=None, cached=False): + """Add metadata header to markdown output.""" + filename = os.path.basename(pdf_path) + + header_lines = [ + "---", + f"source: {filename}", + f"total_pages: {total_pages}", + f"extracted_at: {datetime.now().isoformat()}", + ] + + if cached: + header_lines.append("from_cache: true") + + if image_dir: + # Use relative path for portability + header_lines.append("images_dir: images") + + header_lines.extend(["---", "", ""]) + + return "\n".join(header_lines) + markdown + + +# ============================================================================= +# MAIN +# ============================================================================= + + +def main(): + parser = argparse.ArgumentParser( + description="Convert PDF to Markdown for LLM context (with persistent caching)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python pdf_to_md.py document.pdf # Output to document.md (cached) + python pdf_to_md.py document.pdf output.md # Custom output path + python pdf_to_md.py document.pdf --docling # Accurate tables (slower) + python pdf_to_md.py document.pdf --clear-cache # Clear cache and re-extract + python pdf_to_md.py --clear-all-cache # Clear entire cache + +Caching: + PDFs are cached in ~/.cache/pdf-to-markdown/ + Cache is keyed by file content hash + extraction mode. + Cache persists until explicitly cleared or source PDF changes. + """, + ) + + parser.add_argument("input", nargs="?", help="Input PDF file path") + parser.add_argument("output", nargs="?", help="Output markdown file path (default: .md)") + parser.add_argument( + "--docling", + "--accurate", + action="store_true", + dest="docling", + help="Use Docling AI for complex/borderless tables (slower, ~1 sec/page)", + ) + parser.add_argument("--no-progress", action="store_true", help="Disable progress indicator") + + # Cache options + parser.add_argument( + "--clear-cache", + action="store_true", + help="Clear cache for this PDF before processing", + ) + parser.add_argument( + "--clear-all-cache", + action="store_true", + help="Clear entire cache directory and exit", + ) + parser.add_argument("--cache-stats", action="store_true", help="Show cache statistics and exit") + + args = parser.parse_args() + + cache_mgr = CacheManager() + + # Handle cache management commands + if args.clear_all_cache: + if cache_mgr.clear(): + print(f"Cache cleared: {cache_mgr.cache_dir}", file=sys.stderr) + else: + print("Cache was already empty.", file=sys.stderr) + sys.exit(0) + + if args.cache_stats: + stats = cache_mgr.get_stats() + print(f"Cache directory: {stats['cache_dir']}", file=sys.stderr) + print(f"Cached PDFs: {stats['entries']}", file=sys.stderr) + print(f"Total size: {stats['total_size_mb']} MB", file=sys.stderr) + sys.exit(0) + + # Require input for all other operations + if not args.input: + parser.error("the following arguments are required: input") + + # Handle --clear-cache + if args.clear_cache: + if cache_mgr.clear(args.input): + print(f"Cache cleared for: {args.input}", file=sys.stderr) + else: + print(f"No cache found for: {args.input}", file=sys.stderr) + + # Validate input exists + if not os.path.exists(args.input): + print(f"ERROR: File not found: {args.input}", file=sys.stderr) + sys.exit(1) + + if not args.input.lower().endswith(".pdf"): + print(f"WARNING: File may not be a PDF: {args.input}", file=sys.stderr) + + show_progress = sys.stderr.isatty() and not args.no_progress + + # Check cache + config = ExtractionConfig(pdf_path=args.input, docling=args.docling) + valid, cache_key = cache_mgr.is_valid(config) + + result = None + image_dir = None + cache_hit = False + + if valid: + if show_progress: + mode = "docling" if args.docling else "fast" + print(f"Loading from cache ({mode} mode)...", file=sys.stderr) + + cache_result = cache_mgr.load(cache_key) + if cache_result: + result = cache_result.markdown + total_pages = cache_result.total_pages + cache_hit = True + + # Copy images from cache to output location + if cache_result.image_dir: + output_path = args.output or os.path.splitext(args.input)[0] + ".md" + img_mgr = ImageManager() + image_dir = img_mgr._copy_images_to_output( + cache_result.image_dir, output_path, show_progress + ) + + # Extract if no cache hit + if not cache_hit: + if not check_dependencies(docling_mode=args.docling): + sys.exit(1) + + from extractor import get_page_count + + total_pages = get_page_count(args.input) + + if not cache_key: + cache_key = cache_mgr.get_key(config) + + img_mgr = ImageManager() + temp_image_dir = img_mgr.create_temp_dir(args.input) + + try: + if show_progress: + if args.docling: + print( + f"Extracting {total_pages} pages with Docling AI (~1 sec/page)...", + file=sys.stderr, + ) + else: + print( + f"Extracting {total_pages} pages with PyMuPDF (fast mode)...", + file=sys.stderr, + ) + + result = convert_pdf( + args.input, + image_dir=temp_image_dir, + show_progress=show_progress, + docling=args.docling, + ) + except Exception as e: + img_mgr.cleanup() + print(f"ERROR: Conversion failed: {e}", file=sys.stderr) + sys.exit(1) + + # Save to cache + extraction_result = ExtractionResult( + markdown=result, + image_dir=temp_image_dir, + total_pages=total_pages, + ) + cache_mgr.save(cache_key, extraction_result, config) + if show_progress: + print(f"Cached: {cache_mgr._get_dir(cache_key)}", file=sys.stderr) + + # Finalize images + output_path = args.output or os.path.splitext(args.input)[0] + ".md" + image_dir = img_mgr.finalize_images( + temp_dir=temp_image_dir, + cache_dir=cache_mgr._get_dir(cache_key), + output_path=output_path, + show_progress=show_progress, + ) + + # Format output + output = result + img_mgr_for_output = ImageManager() # Fresh instance for output processing + + referenced_images = img_mgr_for_output.extract_references(result) if result else set() + + if image_dir: + output = img_mgr_for_output.enhance_markdown(output, image_dir) + images = img_mgr_for_output.get_info(image_dir, referenced_only=referenced_images) + if images: + output += img_mgr_for_output.create_summary(images) + + output = add_metadata_header(output, args.input, total_pages, image_dir, cached=cache_hit) + + # Write output + output_path = args.output or os.path.splitext(args.input)[0] + ".md" + with open(output_path, "w", encoding="utf-8") as f: + f.write(output) + + msg = f"Converted {total_pages} pages to: {output_path}" + if cache_hit: + msg += " (from cache)" + if image_dir: + images = img_mgr_for_output.get_info(image_dir, referenced_only=referenced_images) + if images: + msg += f" ({len(images)} images)" + print(msg, file=sys.stderr) + + +if __name__ == "__main__": + main() \ No newline at end of file From a6e411b0fe841f79e92f5eceea059604e4f7690c Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 13:10:37 +0100 Subject: [PATCH 02/13] update pdf to md route --- .github/workflows/pdf-to-markdown.yml | 77 ++++++++++++++------- scripts/pdf_to_md/extractor.py | 4 +- scripts/pdf_to_md/pdf_to_md.py | 96 +++++++++++++++++++++------ 3 files changed, 130 insertions(+), 47 deletions(-) diff --git a/.github/workflows/pdf-to-markdown.yml b/.github/workflows/pdf-to-markdown.yml index 1c60f02..0bbc91c 100644 --- a/.github/workflows/pdf-to-markdown.yml +++ b/.github/workflows/pdf-to-markdown.yml @@ -1,13 +1,13 @@ name: pdf-to-markdown on: + push: + branches-ignore: + - main + paths: + - legacy/*.pdf + - legacy/*.PDF workflow_dispatch: - inputs: - pdf_name: - description: "PDF file name inside legacy/ (example: report.pdf)" - required: true - default: "input.pdf" - type: string permissions: contents: write @@ -15,9 +15,6 @@ permissions: jobs: convert: runs-on: ubuntu-latest - container: - image: python:3.12-slim - env: LEGACY_DIR: legacy @@ -27,43 +24,73 @@ jobs: with: fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Check whether conversion should run + id: pdf_gate + run: | + set -euo pipefail + + if [ ! -d "$LEGACY_DIR" ]; then + echo "should_convert=false" >> "$GITHUB_OUTPUT" + echo "Conversion skipped: $LEGACY_DIR/ does not exist." + exit 0 + fi + + mapfile -t pdf_files < <(find "$LEGACY_DIR" -maxdepth 1 -type f \( -iname '*.pdf' \) | sort) + pdf_count="${#pdf_files[@]}" + + if [ "$pdf_count" -eq 0 ]; then + echo "should_convert=false" >> "$GITHUB_OUTPUT" + echo "Conversion skipped: no PDF files found in $LEGACY_DIR/." + exit 0 + fi + + if [ "$pdf_count" -gt 1 ]; then + printf 'Found %s PDF files in %s/:\n' "$pdf_count" "$LEGACY_DIR" + printf ' - %s\n' "${pdf_files[@]}" + echo "Expected exactly one PDF file in $LEGACY_DIR/." + exit 1 + fi + + echo "should_convert=true" >> "$GITHUB_OUTPUT" + echo "pdf_path=${pdf_files[0]}" >> "$GITHUB_OUTPUT" + echo "Using PDF: ${pdf_files[0]}" + - name: Install fast-mode dependencies + if: steps.pdf_gate.outputs.should_convert == 'true' run: | python -m pip install --upgrade pip pip install pymupdf pymupdf4llm - name: Convert PDF to legacy/source.md + if: steps.pdf_gate.outputs.should_convert == 'true' run: | - set -eux - - PDF="$LEGACY_DIR/${{ inputs.pdf_name }}" - OUT="$LEGACY_DIR/source.md" - - test -d "$LEGACY_DIR" - test -f "$PDF" - - python scripts/pdf_to_md.py "$PDF" "$OUT" + set -euo pipefail + python scripts/pdf_to_md/pdf_to_md.py "$LEGACY_DIR" "$LEGACY_DIR/source.md" --no-progress - name: Show generated files + if: steps.pdf_gate.outputs.should_convert == 'true' run: | find "$LEGACY_DIR" -maxdepth 3 \( -name "source.md" -o -path "$LEGACY_DIR/images/*" \) | sort - name: Commit and push generated files + if: steps.pdf_gate.outputs.should_convert == 'true' run: | - set -eux + set -euo pipefail git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add "$LEGACY_DIR/source.md" - if [ -d "$LEGACY_DIR/images" ]; then - git add "$LEGACY_DIR/images" - fi + git add -A "$LEGACY_DIR" if git diff --cached --quiet; then echo "No changes to commit" exit 0 fi - git commit -m "Update legacy/source.md from ${{ inputs.pdf_name }}" - git push \ No newline at end of file + git commit -m "Prepare migration: extract markdown from legacy PDF" + git push diff --git a/scripts/pdf_to_md/extractor.py b/scripts/pdf_to_md/extractor.py index 3250f9e..b9d5fd2 100755 --- a/scripts/pdf_to_md/extractor.py +++ b/scripts/pdf_to_md/extractor.py @@ -214,7 +214,7 @@ def extract_pdf_to_markdown( ) return md else: - return extract_pdf_fast(pdf_path, show_progress) + return extract_pdf_fast(pdf_path, show_progress=show_progress) def get_page_count(pdf_path: str) -> int: @@ -289,4 +289,4 @@ def extract_images(pdf_path: str, output_dir: str, show_progress: bool = False) if show_progress and extracted: print(f"Extracted {len(extracted)} unique images", file=sys.stderr) - return extracted \ No newline at end of file + return extracted diff --git a/scripts/pdf_to_md/pdf_to_md.py b/scripts/pdf_to_md/pdf_to_md.py index 9054594..4ee6aad 100755 --- a/scripts/pdf_to_md/pdf_to_md.py +++ b/scripts/pdf_to_md/pdf_to_md.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +from __future__ import annotations + """ PDF to Markdown Converter for LLM Context @@ -65,6 +67,35 @@ class ExtractionResult: DEFAULT_CACHE_DIR = Path.home() / ".cache" / "pdf-to-markdown" +def resolve_input_pdf(input_arg: str) -> Path: + """Resolve either a direct PDF path or a folder containing exactly one PDF.""" + input_path = Path(input_arg) + + if input_path.is_file(): + return input_path + + if input_path.is_dir(): + pdf_files = sorted( + [p for p in input_path.iterdir() if p.is_file() and p.suffix.lower() == ".pdf"] + ) + + if len(pdf_files) == 0: + raise FileNotFoundError( + f"No PDF files found in folder: {input_path}. Expected exactly one .pdf file." + ) + + if len(pdf_files) > 1: + names = ", ".join(p.name for p in pdf_files) + raise ValueError( + f"Multiple PDF files found in folder: {input_path}. " + f"Expected exactly one .pdf file, found {len(pdf_files)}: {names}" + ) + + return pdf_files[0] + + raise FileNotFoundError(f"Input path does not exist: {input_path}") + + # ============================================================================= # CACHE MANAGER # ============================================================================= @@ -459,6 +490,10 @@ def finalize_images( return None temp_dir = Path(temp_dir) + output_path = Path(output_path) + output_images_dir = ( + output_path.parent / "images" if output_path.suffix else output_path / "images" + ) # Clean up empty temp directories if not temp_dir.exists() or not any(temp_dir.iterdir()): @@ -466,6 +501,8 @@ def finalize_images( shutil.rmtree(temp_dir) if temp_dir in self._temp_dirs: self._temp_dirs.remove(temp_dir) + if output_images_dir.exists(): + shutil.rmtree(output_images_dir) return None # Clean up temp directory (images are saved to cache) @@ -480,6 +517,9 @@ def finalize_images( if cached_image_dir.exists() and any(cached_image_dir.iterdir()): return self._copy_images_to_output(cached_image_dir, output_path, show_progress) + if output_images_dir.exists(): + shutil.rmtree(output_images_dir) + return None def _copy_images_to_output( @@ -498,7 +538,10 @@ def _copy_images_to_output( if output_images_dir.resolve() == Path(source_dir).resolve(): return output_images_dir - # Copy images to output location + # Replace previously generated content so stale images do not linger. + if output_images_dir.exists(): + shutil.rmtree(output_images_dir) + output_images_dir.mkdir(parents=True, exist_ok=True) copied_count = 0 for img in source_dir.iterdir(): @@ -624,8 +667,12 @@ def main(): """, ) - parser.add_argument("input", nargs="?", help="Input PDF file path") - parser.add_argument("output", nargs="?", help="Output markdown file path (default: .md)") + parser.add_argument( + "input", nargs="?", help="Input PDF file path or folder containing one PDF" + ) + parser.add_argument( + "output", nargs="?", help="Output markdown file path (default: .md)" + ) parser.add_argument( "--docling", "--accurate", @@ -673,23 +720,30 @@ def main(): # Handle --clear-cache if args.clear_cache: - if cache_mgr.clear(args.input): - print(f"Cache cleared for: {args.input}", file=sys.stderr) + try: + resolved_input = resolve_input_pdf(args.input) + except (FileNotFoundError, ValueError) as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(1) + + if cache_mgr.clear(str(resolved_input)): + print(f"Cache cleared for: {resolved_input}", file=sys.stderr) else: - print(f"No cache found for: {args.input}", file=sys.stderr) + print(f"No cache found for: {resolved_input}", file=sys.stderr) - # Validate input exists - if not os.path.exists(args.input): - print(f"ERROR: File not found: {args.input}", file=sys.stderr) + try: + input_pdf = resolve_input_pdf(args.input) + except (FileNotFoundError, ValueError) as exc: + print(f"ERROR: {exc}", file=sys.stderr) sys.exit(1) - if not args.input.lower().endswith(".pdf"): - print(f"WARNING: File may not be a PDF: {args.input}", file=sys.stderr) + if input_pdf.suffix.lower() != ".pdf": + print(f"WARNING: File may not be a PDF: {input_pdf}", file=sys.stderr) show_progress = sys.stderr.isatty() and not args.no_progress # Check cache - config = ExtractionConfig(pdf_path=args.input, docling=args.docling) + config = ExtractionConfig(pdf_path=str(input_pdf), docling=args.docling) valid, cache_key = cache_mgr.is_valid(config) result = None @@ -709,7 +763,7 @@ def main(): # Copy images from cache to output location if cache_result.image_dir: - output_path = args.output or os.path.splitext(args.input)[0] + ".md" + output_path = args.output or str(input_pdf.with_suffix(".md")) img_mgr = ImageManager() image_dir = img_mgr._copy_images_to_output( cache_result.image_dir, output_path, show_progress @@ -722,13 +776,13 @@ def main(): from extractor import get_page_count - total_pages = get_page_count(args.input) + total_pages = get_page_count(str(input_pdf)) if not cache_key: cache_key = cache_mgr.get_key(config) img_mgr = ImageManager() - temp_image_dir = img_mgr.create_temp_dir(args.input) + temp_image_dir = img_mgr.create_temp_dir(str(input_pdf)) try: if show_progress: @@ -744,7 +798,7 @@ def main(): ) result = convert_pdf( - args.input, + str(input_pdf), image_dir=temp_image_dir, show_progress=show_progress, docling=args.docling, @@ -765,7 +819,7 @@ def main(): print(f"Cached: {cache_mgr._get_dir(cache_key)}", file=sys.stderr) # Finalize images - output_path = args.output or os.path.splitext(args.input)[0] + ".md" + output_path = args.output or str(input_pdf.with_suffix(".md")) image_dir = img_mgr.finalize_images( temp_dir=temp_image_dir, cache_dir=cache_mgr._get_dir(cache_key), @@ -785,10 +839,12 @@ def main(): if images: output += img_mgr_for_output.create_summary(images) - output = add_metadata_header(output, args.input, total_pages, image_dir, cached=cache_hit) + output = add_metadata_header( + output, str(input_pdf), total_pages, image_dir, cached=cache_hit + ) # Write output - output_path = args.output or os.path.splitext(args.input)[0] + ".md" + output_path = args.output or str(input_pdf.with_suffix(".md")) with open(output_path, "w", encoding="utf-8") as f: f.write(output) @@ -803,4 +859,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From e80ff061b1d7cb58167b6ba206bd1b73b37fb012 Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 14:34:29 +0100 Subject: [PATCH 03/13] fix github action push process --- .github/workflows/pdf-to-markdown.yml | 7 ++++++- .github/workflows/prepare_migration.yml | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pdf-to-markdown.yml b/.github/workflows/pdf-to-markdown.yml index 0bbc91c..cf04edb 100644 --- a/.github/workflows/pdf-to-markdown.yml +++ b/.github/workflows/pdf-to-markdown.yml @@ -23,6 +23,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ github.ref }} - name: Set up Python uses: actions/setup-python@v5 @@ -82,6 +83,8 @@ jobs: run: | set -euo pipefail + branch_name="${GITHUB_REF_NAME}" + git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" @@ -93,4 +96,6 @@ jobs: fi git commit -m "Prepare migration: extract markdown from legacy PDF" - git push + git fetch origin "$branch_name" + git rebase "origin/$branch_name" + git push origin "HEAD:$branch_name" diff --git a/.github/workflows/prepare_migration.yml b/.github/workflows/prepare_migration.yml index 54c0ae6..4b65cec 100644 --- a/.github/workflows/prepare_migration.yml +++ b/.github/workflows/prepare_migration.yml @@ -18,6 +18,9 @@ jobs: steps: - name: Check out repo uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.ref }} - name: Set up Python uses: actions/setup-python@v5 @@ -40,8 +43,18 @@ jobs: - name: Commit extracted text run: | + set -euo pipefail + + branch_name="${GITHUB_REF_NAME}" + git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git add legacy/source.txt - git diff --cached --quiet || git commit -m "Prepare migration: extract text from legacy PDF" - git push + if git diff --cached --quiet; then + echo "No changes to commit" + exit 0 + fi + git commit -m "Prepare migration: extract text from legacy PDF" + git fetch origin "$branch_name" + git rebase "origin/$branch_name" + git push origin "HEAD:$branch_name" From ac3fefa9dd99ee1a7b80e445cffccd174d143cda Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 14:42:04 +0100 Subject: [PATCH 04/13] change primary source for protocol conversion --- .agents/skills/protocol-migration/SKILL.md | 19 +++++++++++-------- .github/copilot-instructions.md | 6 +++--- docs/PROMPT.md | 10 ++++++---- docs/USING_THIS_TEMPLATE.md | 4 ++-- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/.agents/skills/protocol-migration/SKILL.md b/.agents/skills/protocol-migration/SKILL.md index 55ad0a5..952b7ac 100644 --- a/.agents/skills/protocol-migration/SKILL.md +++ b/.agents/skills/protocol-migration/SKILL.md @@ -1,16 +1,17 @@ --- name: protocol-migration -description: Convert legacy/source.txt into README.md using the repository template, preserving scientific meaning and marking uncertainty with CHECK:. +description: Convert legacy/source.md into README.md using the repository template, using source.txt only as fallback and marking uncertainty with CHECK:. --- Use this skill when migrating a legacy protocol into this repository template. Goal: -Convert `legacy/source.txt` into `README.md`, using the existing `README.md` as the target template and structure. +Convert `legacy/source.md` into `README.md`, using the existing `README.md` as the target template and structure. Primary sources: -- `legacy/source.txt` is the main source -- also consult the PDF in `legacy/` for tables, layout-dependent content, and anything unclear +- `legacy/source.md` is the main source +- `legacy/source.txt` is a fallback source only when `legacy/source.md` looks malformed, incomplete, or unclear +- consult the PDF in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous Core rules: - Do not change protocol meaning @@ -57,19 +58,21 @@ Output requirements: - content copied verbatim but not confidently placed After drafting, verify the migration against the source: -- compare the migrated `README.md` against `legacy/source.txt` -- - compare the migrated `README.md` against the PDF in `legacy/` +- compare the migrated `README.md` against `legacy/source.md` +- compare any malformed, incomplete, or ambiguous passages against `legacy/source.txt` +- compare the migrated `README.md` against the PDF in `legacy/` for tables, figures, layout-dependent content, and any remaining ambiguity - check that all protocol steps, notes, warnings, reagent names, quantities, temperatures, timings, and conditions are still present - check that no source content has been silently omitted, merged, or reordered without justification - check any tables, layout-dependent content, or ambiguous sections against the PDF in `legacy/` - leave `CHECK:` anywhere the mapping is uncertain rather than guessing Verification checklist: -- `README.md` still matches the scientific content of `legacy/source.txt` +- `README.md` still matches the scientific content of `legacy/source.md` +- any malformed, incomplete, or ambiguous passages were cross-checked against `legacy/source.txt` - no protocol steps or warnings were omitted - no values were invented or made more precise than in the source - tables and layout-dependent content were checked against the PDF in `legacy/` - any uncertain mappings are marked with `CHECK:` - any meaningful normalization choices are noted in `# Migration notes` -Prefer preserving meaning over making the output prettier. \ No newline at end of file +Prefer preserving meaning over making the output prettier. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 4026df8..8a3b767 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -4,8 +4,9 @@ This repository stores laboratory protocols in Markdown in `README.md`. ## Primary rule Do not change protocol meaning. -Use `legacy/source.txt` as the primary source when rewriting `README.md`. -Also consult the PDF file in `legacy/` as the reference source for tables, layout-dependent content, and anything unclear. +Use `legacy/source.md` as the primary source when rewriting `README.md`. +Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. +Use the PDF file in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous. ## Migration behavior When converting legacy protocol text into the repository template: @@ -68,4 +69,3 @@ When drafting a migrated protocol: - Keep ![Created with ulelab Protocol Template](https://img.shields.io/badge/created%20with-ulelab%20Protocol%20Template-blue) at the top of the file. - Delete the "Template repository: Click `Use this template` to create a new protocol repo..." note. - diff --git a/docs/PROMPT.md b/docs/PROMPT.md index 03f22a6..7e58fef 100644 --- a/docs/PROMPT.md +++ b/docs/PROMPT.md @@ -1,12 +1,13 @@ -Convert `legacy/source.txt` into `README.md`. +Convert `legacy/source.md` into `README.md`. Use the existing `README.md` as the target template and structure. Also read and follow `.github/copilot-instructions.md`. Apply those instructions even if you are not GitHub Copilot. -Use `legacy/source.txt` as the primary source. -Also check the PDF file in the `legacy/` folder as the reference source, especially for tables, layout-dependent content, and anything unclear. +Use `legacy/source.md` as the primary source. +Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. +Use the PDF file in `legacy/` as the reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous after checking the generated text sources. Requirements: 1. Preserve all protocol content. @@ -22,7 +23,8 @@ Requirements: 7. Do not delete repeated warnings or notes. 8. If any text does not fit cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. 9. Mark uncertainty with `CHECK:` instead of guessing. -10. After drafting, add a short summary in `# Migration notes` covering: +10. If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. +11. After drafting, add a short summary in `# Migration notes` covering: - formatting normalizations performed - ambiguities and uncertainty flagged - content placed in `## Unplaced content` diff --git a/docs/USING_THIS_TEMPLATE.md b/docs/USING_THIS_TEMPLATE.md index 833102a..1044b53 100644 --- a/docs/USING_THIS_TEMPLATE.md +++ b/docs/USING_THIS_TEMPLATE.md @@ -101,11 +101,11 @@ This route can save time. It helps keep the template structure consistent, norma > **Recommended**: Also fill in the `source-metadata.yml`, even if not fully. Helps track source protocol provenance. 4. Keep exactly one PDF in the `legacy` folder, otherwise the process will fail. -5. Once you push a PDF change in the `legacy` folder to a non-`main` branch, the `Prepare migration from PDF` GitHub Action will run. This extracts the PDF text and writes `legacy/source.txt`. Check that this file was created before the next step. +5. Once you push a PDF change in the `legacy` folder to a non-`main` branch, the migration GitHub Actions will run. `Prepare migration from PDF` writes `legacy/source.txt`, and `pdf-to-markdown` writes `legacy/source.md`. Check that these files were created before the next step. 6. Clone the repo locally, and switch to `import-protocol` branch. If you already have a local clone, run `git pull` to get the latest changes locally. > **Note**: Alternatively, you can complete steps 6-15 in GitHub Codespaces. On GitHub.com select the branch you want to work on, click **Code**, go to **Codespaces** tab and click **Create codespace on import-protocol**. This will open VS Code in a new browser tab, with all files loaded automatically. Note that this uses GitHub-hosted compute, and free usage is limited. 7. Open the repo folder in a code editor and use GitHub Copilot or another LLM assistant. We recommend [VS Code](https://code.visualstudio.com/). -8. Use the `protocol-migration` skill (or if you prefer, paste the prompt in `docs/PROMPT.md`) to ask GitHub Copilot or another LLM to rewrite `README.md`. The model will also follow the repository instructions in [`.github/copilot-instructions.md`](.github/copilot-instructions.md). This will edit the `README.md` file in-place, using `legacy/source.txt` and the legacy PDF as sources. +8. Use the `protocol-migration` skill (or if you prefer, paste the prompt in `docs/PROMPT.md`) to ask GitHub Copilot or another LLM to rewrite `README.md`. The model will also follow the repository instructions in [`.github/copilot-instructions.md`](.github/copilot-instructions.md). This will edit the `README.md` file in-place, using `legacy/source.md` as the primary source, `legacy/source.txt` as a fallback when needed, and the legacy PDF as the final tie-breaker for tables, figures, and unclear layout-dependent content. > **Note**: Use the best model you have access to. We tested capability with the Copilot Free Usage plan, and it works reasonably well, but advanced models will likely work even better. **In VS Code**: From 99a9dc5537982176e10dbe6428db090e54b3549b Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 15:17:08 +0100 Subject: [PATCH 05/13] sync agent instructions --- .agents/skills/protocol-migration/SKILL.md | 109 +++++++++------ .../skills/protocol-migration/SKILL.md | 122 +++++++++++------ .github/copilot-instructions.md | 51 +++++-- docs/PROMPT.md | 125 ++++++++++++++---- 4 files changed, 295 insertions(+), 112 deletions(-) diff --git a/.agents/skills/protocol-migration/SKILL.md b/.agents/skills/protocol-migration/SKILL.md index 952b7ac..7b806de 100644 --- a/.agents/skills/protocol-migration/SKILL.md +++ b/.agents/skills/protocol-migration/SKILL.md @@ -8,55 +8,90 @@ Use this skill when migrating a legacy protocol into this repository template. Goal: Convert `legacy/source.md` into `README.md`, using the existing `README.md` as the target template and structure. -Primary sources: -- `legacy/source.md` is the main source -- `legacy/source.txt` is a fallback source only when `legacy/source.md` looks malformed, incomplete, or unclear -- consult the PDF in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous +## Primary rule +Do not change protocol meaning. +Use `legacy/source.md` as the primary source when rewriting `README.md`. +Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. +Use the PDF file in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous. +If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. -Core rules: -- Do not change protocol meaning -- Do not invent missing information -- Do not delete source content -- Do not silently summarize, compress, or merge steps -- Preserve exact reagent names, quantities, timings, temperatures, and conditions unless only formatting is being normalized -- Preserve step order unless the source clearly indicates otherwise -- If anything is uncertain, mark it with `CHECK:` instead of guessing +## Migration behavior +When converting legacy protocol content into the repository template: -If content does not fit cleanly: -- place it under `# Migration notes` or `## Unplaced content` +- Preserve all protocol content. +- Preserve all procedural content, warnings, notes, reagent names, quantities, timings, temperatures, and conditions, preserving their location. +- Do not change scientific meaning. +- Do not invent missing information. +- Do not invent missing values or steps. +- Do not delete any content from the source. +- Do not silently summarize, compress, or merge steps. +- Keep exact reagent names, quantities, temperatures, timings, and conditions unless only formatting is being normalized. +- Preserve exact reagent and equipment names unless only formatting is changing. +- Preserve the step order from the source unless the source clearly indicates otherwise. +- Do not delete repeated warnings or notes. +- If any text does not fit cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. +- Mark uncertainty with `CHECK:` instead of guessing. -Allowed formatting normalization only when meaning is unchanged: -- add a space between numbers and units -- standardize temperature formatting to `37 °C` -- standardize volumes to `µL`, `mL`, `L` -- standardize concentrations to `mM`, `µM`, `nM`, `% (w/v)` -- standardize time units to `seconds`, `minutes`, `hours` -- standardize pH formatting to `pH 7.4` -- normalize bullets, headings, and markdown tables to match the template -- use tables for reaction mixes and other tabular content -- use HTML subscripts for chemical formulas where needed -- normalize note-like text to blockquote style, e.g. `> **Note**` +## Allowed formatting normalization +You may normalize formatting only when the meaning is unchanged and unambiguous: -Do not: -- infer omitted values -- replace vague wording like `overnight` or `room temperature` with precise values -- reorder steps unless clearly justified by the source -- remove repeated warnings or notes -- replace one reagent with another -- omit unmapped text +- Add a space between numbers and units. +- Standardize temperature formatting to `37 °C`. +- Standardize volume units to `µL`, `mL`, `L`, using the micro sign `µ` consistently. +- Standardize concentration units to `mM`, `µM`, `nM`, `% (w/v)`, etc., using the micro sign `µ` consistently. +- Standardize time units to full words: `seconds`, `minutes`, `hours`. +- Standardize chemical names to match the source but with consistent formatting (e.g. `Tris-HCl` instead of `Tris HCl`). +- Standardize pH formatting to `pH 7.4`. +- Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). +- Do not use Unicode subscript characters such as `₂`. +- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. +- Normalize bullet formatting and markdown table formatting. +- Normalize heading structure to match the repository template. +- For reaction mixes and anything tabular, place them inside a table as in template. +- Normalize markdown headings, bullets, and tables. +- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. +- Remove empty columns from tables. +- Synchronize `Contents` with actual headings in the protocol. -Output requirements: +## Disallowed changes +- Do not infer omitted concentrations, times, temperatures, or volumes. +- Do not infer values for missing quantities. +- Do not try to calculate or infer values that are not explicitly stated. +- Do not convert `overnight`, `RT`, `briefly`, `room temperature` or similar vague language into precise values. +- Do not replace vague language with precise values. +- Do not reorder steps unless the source clearly numbers them in that order. +- Do not remove duplicate-looking content unless it is truly identical and both copies are preserved in review notes. +- Do not rewrite scientific wording for style if that risks changing meaning. +- Do not fill in table cells with values that are missing from the source. +- Do not replace one reagent name with another. +- Do not remove repeated warnings or notes. +- Do not omit unmapped text. + +## Output requirements - edit `README.md` -- use the template headings -- keep the template badge at the top -- remove the template instruction note +- use the template headings exactly +- use the template headings in `README.md` +- keep all source content +- add `CHECK:` markers for uncertainty +- use `CHECK:` only for genuine unresolved uncertainty. If no uncertainty remains, do not mention `CHECK:` at all +- add `# Migration notes` +- after drafting, add a short summary in `# Migration notes` covering: + - formatting normalizations performed + - ambiguities and uncertainty flagged + - content placed in `## Unplaced content` - add `# Migration notes` including: - imported protocol metadata from `source-metadata.yml` if present + - imported protocol metadata from `source-metadata.yml` using only the non-blank lines - template metadata from `template-metadata.yml` - ambiguous mappings - - formatting normalizations performed + - normalized formatting changes - content copied verbatim but not confidently placed +- keep the template badge at the top +- keep ![Created with ulelab Protocol Template](https://img.shields.io/badge/created%20with-ulelab%20Protocol%20Template-blue) at the top of the file +- remove the template instruction note +- delete the "Template repository: Click `Use this template` to create a new protocol repo..." note +## Verification After drafting, verify the migration against the source: - compare the migrated `README.md` against `legacy/source.md` - compare any malformed, incomplete, or ambiguous passages against `legacy/source.txt` diff --git a/.claude/.claude/skills/protocol-migration/SKILL.md b/.claude/.claude/skills/protocol-migration/SKILL.md index 55ad0a5..7b806de 100644 --- a/.claude/.claude/skills/protocol-migration/SKILL.md +++ b/.claude/.claude/skills/protocol-migration/SKILL.md @@ -1,75 +1,113 @@ --- name: protocol-migration -description: Convert legacy/source.txt into README.md using the repository template, preserving scientific meaning and marking uncertainty with CHECK:. +description: Convert legacy/source.md into README.md using the repository template, using source.txt only as fallback and marking uncertainty with CHECK:. --- Use this skill when migrating a legacy protocol into this repository template. Goal: -Convert `legacy/source.txt` into `README.md`, using the existing `README.md` as the target template and structure. +Convert `legacy/source.md` into `README.md`, using the existing `README.md` as the target template and structure. -Primary sources: -- `legacy/source.txt` is the main source -- also consult the PDF in `legacy/` for tables, layout-dependent content, and anything unclear +## Primary rule +Do not change protocol meaning. +Use `legacy/source.md` as the primary source when rewriting `README.md`. +Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. +Use the PDF file in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous. +If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. -Core rules: -- Do not change protocol meaning -- Do not invent missing information -- Do not delete source content -- Do not silently summarize, compress, or merge steps -- Preserve exact reagent names, quantities, timings, temperatures, and conditions unless only formatting is being normalized -- Preserve step order unless the source clearly indicates otherwise -- If anything is uncertain, mark it with `CHECK:` instead of guessing +## Migration behavior +When converting legacy protocol content into the repository template: -If content does not fit cleanly: -- place it under `# Migration notes` or `## Unplaced content` +- Preserve all protocol content. +- Preserve all procedural content, warnings, notes, reagent names, quantities, timings, temperatures, and conditions, preserving their location. +- Do not change scientific meaning. +- Do not invent missing information. +- Do not invent missing values or steps. +- Do not delete any content from the source. +- Do not silently summarize, compress, or merge steps. +- Keep exact reagent names, quantities, temperatures, timings, and conditions unless only formatting is being normalized. +- Preserve exact reagent and equipment names unless only formatting is changing. +- Preserve the step order from the source unless the source clearly indicates otherwise. +- Do not delete repeated warnings or notes. +- If any text does not fit cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. +- Mark uncertainty with `CHECK:` instead of guessing. -Allowed formatting normalization only when meaning is unchanged: -- add a space between numbers and units -- standardize temperature formatting to `37 °C` -- standardize volumes to `µL`, `mL`, `L` -- standardize concentrations to `mM`, `µM`, `nM`, `% (w/v)` -- standardize time units to `seconds`, `minutes`, `hours` -- standardize pH formatting to `pH 7.4` -- normalize bullets, headings, and markdown tables to match the template -- use tables for reaction mixes and other tabular content -- use HTML subscripts for chemical formulas where needed -- normalize note-like text to blockquote style, e.g. `> **Note**` +## Allowed formatting normalization +You may normalize formatting only when the meaning is unchanged and unambiguous: -Do not: -- infer omitted values -- replace vague wording like `overnight` or `room temperature` with precise values -- reorder steps unless clearly justified by the source -- remove repeated warnings or notes -- replace one reagent with another -- omit unmapped text +- Add a space between numbers and units. +- Standardize temperature formatting to `37 °C`. +- Standardize volume units to `µL`, `mL`, `L`, using the micro sign `µ` consistently. +- Standardize concentration units to `mM`, `µM`, `nM`, `% (w/v)`, etc., using the micro sign `µ` consistently. +- Standardize time units to full words: `seconds`, `minutes`, `hours`. +- Standardize chemical names to match the source but with consistent formatting (e.g. `Tris-HCl` instead of `Tris HCl`). +- Standardize pH formatting to `pH 7.4`. +- Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). +- Do not use Unicode subscript characters such as `₂`. +- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. +- Normalize bullet formatting and markdown table formatting. +- Normalize heading structure to match the repository template. +- For reaction mixes and anything tabular, place them inside a table as in template. +- Normalize markdown headings, bullets, and tables. +- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. +- Remove empty columns from tables. +- Synchronize `Contents` with actual headings in the protocol. -Output requirements: +## Disallowed changes +- Do not infer omitted concentrations, times, temperatures, or volumes. +- Do not infer values for missing quantities. +- Do not try to calculate or infer values that are not explicitly stated. +- Do not convert `overnight`, `RT`, `briefly`, `room temperature` or similar vague language into precise values. +- Do not replace vague language with precise values. +- Do not reorder steps unless the source clearly numbers them in that order. +- Do not remove duplicate-looking content unless it is truly identical and both copies are preserved in review notes. +- Do not rewrite scientific wording for style if that risks changing meaning. +- Do not fill in table cells with values that are missing from the source. +- Do not replace one reagent name with another. +- Do not remove repeated warnings or notes. +- Do not omit unmapped text. + +## Output requirements - edit `README.md` -- use the template headings -- keep the template badge at the top -- remove the template instruction note +- use the template headings exactly +- use the template headings in `README.md` +- keep all source content +- add `CHECK:` markers for uncertainty +- use `CHECK:` only for genuine unresolved uncertainty. If no uncertainty remains, do not mention `CHECK:` at all +- add `# Migration notes` +- after drafting, add a short summary in `# Migration notes` covering: + - formatting normalizations performed + - ambiguities and uncertainty flagged + - content placed in `## Unplaced content` - add `# Migration notes` including: - imported protocol metadata from `source-metadata.yml` if present + - imported protocol metadata from `source-metadata.yml` using only the non-blank lines - template metadata from `template-metadata.yml` - ambiguous mappings - - formatting normalizations performed + - normalized formatting changes - content copied verbatim but not confidently placed +- keep the template badge at the top +- keep ![Created with ulelab Protocol Template](https://img.shields.io/badge/created%20with-ulelab%20Protocol%20Template-blue) at the top of the file +- remove the template instruction note +- delete the "Template repository: Click `Use this template` to create a new protocol repo..." note +## Verification After drafting, verify the migration against the source: -- compare the migrated `README.md` against `legacy/source.txt` -- - compare the migrated `README.md` against the PDF in `legacy/` +- compare the migrated `README.md` against `legacy/source.md` +- compare any malformed, incomplete, or ambiguous passages against `legacy/source.txt` +- compare the migrated `README.md` against the PDF in `legacy/` for tables, figures, layout-dependent content, and any remaining ambiguity - check that all protocol steps, notes, warnings, reagent names, quantities, temperatures, timings, and conditions are still present - check that no source content has been silently omitted, merged, or reordered without justification - check any tables, layout-dependent content, or ambiguous sections against the PDF in `legacy/` - leave `CHECK:` anywhere the mapping is uncertain rather than guessing Verification checklist: -- `README.md` still matches the scientific content of `legacy/source.txt` +- `README.md` still matches the scientific content of `legacy/source.md` +- any malformed, incomplete, or ambiguous passages were cross-checked against `legacy/source.txt` - no protocol steps or warnings were omitted - no values were invented or made more precise than in the source - tables and layout-dependent content were checked against the PDF in `legacy/` - any uncertain mappings are marked with `CHECK:` - any meaningful normalization choices are noted in `# Migration notes` -Prefer preserving meaning over making the output prettier. \ No newline at end of file +Prefer preserving meaning over making the output prettier. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8a3b767..f312e0e 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -7,16 +7,23 @@ Do not change protocol meaning. Use `legacy/source.md` as the primary source when rewriting `README.md`. Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. Use the PDF file in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous. +If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. ## Migration behavior -When converting legacy protocol text into the repository template: +When converting legacy protocol content into the repository template: +- Preserve all protocol content. - Preserve all procedural content, warnings, notes, reagent names, quantities, timings, temperatures, and conditions, preserving their location. +- Do not change scientific meaning. +- Do not invent missing information. - Do not invent missing values or steps. - Do not delete any content from the source. - Do not silently summarize, compress, or merge steps. +- Keep exact reagent names, quantities, temperatures, timings, and conditions unless only formatting is being normalized. - If text does not map cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. - If any interpretation is uncertain, mark it with `CHECK:` rather than guessing. +- Do not delete repeated warnings or notes. +- Preserve the step order from the source unless the source clearly indicates otherwise. - Preserve exact reagent and equipment names unless only formatting is changing. ## Allowed formatting normalization @@ -26,15 +33,15 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - Standardize volume units to `µL`, `mL`, `L`, using the micro sign `µ` consistently. - Standardize concentration units to `mM`, `µM`, `nM`, `% (w/v)`, etc., using the micro sign `µ` consistently. - Standardize time units to full words: `seconds`, `minutes`, `hours`. -- Standarize chemical names to match the source but with consistent formatting (e.g. `Tris-HCl` instead of `Tris HCl`). +- Standardize chemical names to match the source but with consistent formatting (e.g. `Tris-HCl` instead of `Tris HCl`). - Standardize pH formatting to `pH 7.4`. - Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). - Do not use Unicode subscript characters such as `₂`. -- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`.Same for `ChIP-seq`, `ATAC-seq`, etc. +- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. - Normalize bullet formatting and markdown table formatting. - Normalize heading structure to match the repository template. - For reaction mixes and anything tabular, place them inside a table as in template. -- Normalize markdown headings, bullets, and tables +- Normalize markdown headings, bullets, and tables. - "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. - Remove empty columns from tables. - Synchronize `Contents` with actual headings in the protocol. @@ -56,16 +63,42 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: ## Output requirements When drafting a migrated protocol: - Use the template headings exactly. -- Use the template headings in `README.md` +- Use the template headings in `README.md`. - Keep all source content. - Add `CHECK:` markers for uncertainty. - Use `CHECK:` only for genuine unresolved uncertainty. If no uncertainty remains, do not mention `CHECK:` at all. - Add an `# Migration notes` section listing: + - formatting normalizations performed + - ambiguities and uncertainty flagged + - content placed in `## Unplaced content` - Imported protocol metadata from `source-metadata.yml` (only the non-blank lines). - - template metadata from `template-metadata.yml` - - ambiguous mappings - - normalized formatting changes - - content copied verbatim but not confidently placed + - Imported protocol metadata from `source-metadata.yml` if present. + - template metadata from `template-metadata.yml`. + - ambiguous mappings. + - normalized formatting changes. + - content copied verbatim but not confidently placed. - Keep ![Created with ulelab Protocol Template](https://img.shields.io/badge/created%20with-ulelab%20Protocol%20Template-blue) at the top of the file. - Delete the "Template repository: Click `Use this template` to create a new protocol repo..." note. +- Remove the template instruction note. +## Verification +After drafting, verify the migration against the source: + +- compare the migrated `README.md` against `legacy/source.md` +- compare any malformed, incomplete, or ambiguous passages against `legacy/source.txt` +- compare the migrated `README.md` against the PDF in `legacy/` for tables, figures, layout-dependent content, and any remaining ambiguity +- check that all protocol steps, notes, warnings, reagent names, quantities, temperatures, timings, and conditions are still present +- check that no source content has been silently omitted, merged, or reordered without justification +- check any tables, layout-dependent content, or ambiguous sections against the PDF in `legacy/` +- leave `CHECK:` anywhere the mapping is uncertain rather than guessing + +Verification checklist: +- `README.md` still matches the scientific content of `legacy/source.md` +- any malformed, incomplete, or ambiguous passages were cross-checked against `legacy/source.txt` +- no protocol steps or warnings were omitted +- no values were invented or made more precise than in the source +- tables and layout-dependent content were checked against the PDF in `legacy/` +- any uncertain mappings are marked with `CHECK:` +- any meaningful normalization choices are noted in `# Migration notes` + +Prefer preserving meaning over making the output prettier. diff --git a/docs/PROMPT.md b/docs/PROMPT.md index 7e58fef..b991a6d 100644 --- a/docs/PROMPT.md +++ b/docs/PROMPT.md @@ -5,28 +5,105 @@ Use the existing `README.md` as the target template and structure. Also read and follow `.github/copilot-instructions.md`. Apply those instructions even if you are not GitHub Copilot. -Use `legacy/source.md` as the primary source. +## Primary rule +Do not change protocol meaning. +Use `legacy/source.md` as the primary source when rewriting `README.md`. Use `legacy/source.txt` only as a fallback when `legacy/source.md` looks malformed, incomplete, or unclear. -Use the PDF file in `legacy/` as the reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous after checking the generated text sources. - -Requirements: -1. Preserve all protocol content. -2. Do not change scientific meaning. -3. Do not invent missing information. -4. Keep exact reagent names, quantities, temperatures, timings, and conditions unless only formatting is being normalized. -5. Normalize only safe formatting, such as: - - adding a space between numbers and units - - using `seconds`, `minutes`, `hours` - - using `µL`, `mL`, `L` - - using `37 °C` style temperature formatting -6. Preserve the step order from the source. -7. Do not delete repeated warnings or notes. -8. If any text does not fit cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. -9. Mark uncertainty with `CHECK:` instead of guessing. -10. If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. -11. After drafting, add a short summary in `# Migration notes` covering: - - formatting normalizations performed - - ambiguities and uncertainty flagged - - content placed in `## Unplaced content` - -Only edit `README.md`. +Use the PDF file in `legacy/` as the final reference source of truth for tables, figures, layout-dependent content, and anything still ambiguous after checking the generated text sources. +If `legacy/source.md` and `legacy/source.txt` disagree, prefer `legacy/source.md` for general structure and prose, but use the original PDF as the final tie-breaker. + +## Migration behavior +When converting legacy protocol content into the repository template: + +- Preserve all protocol content. +- Preserve all procedural content, warnings, notes, reagent names, quantities, timings, temperatures, and conditions, preserving their location. +- Do not change scientific meaning. +- Do not invent missing information. +- Do not invent missing values or steps. +- Do not delete any content from the source. +- Do not silently summarize, compress, or merge steps. +- Keep exact reagent names, quantities, temperatures, timings, and conditions unless only formatting is being normalized. +- Preserve exact reagent and equipment names unless only formatting is changing. +- Preserve the step order from the source unless the source clearly indicates otherwise. +- Do not delete repeated warnings or notes. +- If any text does not fit cleanly into the template, place it under `# Migration notes` or `## Unplaced content`. +- Mark uncertainty with `CHECK:` instead of guessing. + +## Allowed formatting normalization +Normalize formatting only when the meaning is unchanged and unambiguous: + +- add a space between numbers and units +- use `seconds`, `minutes`, `hours` +- use `µL`, `mL`, `L` +- use `37 °C` style temperature formatting +- standardize concentration units to `mM`, `µM`, `nM`, `% (w/v)`, etc., using the micro sign `µ` consistently +- standardize pH formatting to `pH 7.4` +- standardize chemical names to match the source but with consistent formatting, for example `Tris-HCl` instead of `Tris HCl` +- standardize chemical formulas with HTML subscripts, for example H2O to H2O and MgCl2 to MgCl2 +- do not use Unicode subscript characters such as `₂` +- standardize `RNAseq` or `RNA-Seq` to `RNA-seq`, and similarly for `ChIP-seq`, `ATAC-seq`, and related names +- normalize bullets, headings, and markdown tables to match the repository template +- use tables for reaction mixes and other tabular content +- normalize note-like text to blockquote style, for example `> **Note**` +- place note-like text immediately after the step it refers to, or at the end of the protocol if it clearly refers to the whole protocol +- remove empty columns from tables +- synchronize `Contents` with the actual headings in the protocol + +## Disallowed changes +- do not infer omitted concentrations, times, temperatures, or volumes +- do not infer values for missing quantities +- do not try to calculate or infer values that are not explicitly stated +- do not convert `overnight`, `RT`, `briefly`, `room temperature`, or similar vague language into precise values +- do not replace vague language with precise values +- do not reorder steps unless the source clearly numbers them in that order +- do not remove duplicate-looking content unless it is truly identical and both copies are preserved in review notes +- do not rewrite scientific wording for style if that risks changing meaning +- do not fill in table cells with values that are missing from the source +- do not replace one reagent name with another +- do not remove repeated warnings or notes +- do not omit unmapped text + +## Output requirements +- Only edit `README.md`. +- Use the template headings exactly. +- Use the template headings in `README.md`. +- Keep all source content. +- Add `CHECK:` markers for uncertainty. +- Use `CHECK:` only for genuine unresolved uncertainty. If no uncertainty remains, do not mention `CHECK:` at all. +- Add an `# Migration notes` section. +- After drafting, add a short summary in `# Migration notes` covering: + - formatting normalizations performed + - ambiguities and uncertainty flagged + - content placed in `## Unplaced content` +- Include the following in `# Migration notes`: + - imported protocol metadata from `source-metadata.yml` if present + - imported protocol metadata from `source-metadata.yml` using only the non-blank lines + - template metadata from `template-metadata.yml` + - ambiguous mappings + - normalized formatting changes + - content copied verbatim but not confidently placed +- Keep ![Created with ulelab Protocol Template](https://img.shields.io/badge/created%20with-ulelab%20Protocol%20Template-blue) at the top of the file. +- Remove the template instruction note. +- Delete the "Template repository: Click `Use this template` to create a new protocol repo..." note. + +## Verification +After drafting, verify the migration against the source: + +- compare the migrated `README.md` against `legacy/source.md` +- compare any malformed, incomplete, or ambiguous passages against `legacy/source.txt` +- compare the migrated `README.md` against the PDF in `legacy/` for tables, figures, layout-dependent content, and any remaining ambiguity +- check that all protocol steps, notes, warnings, reagent names, quantities, temperatures, timings, and conditions are still present +- check that no source content has been silently omitted, merged, or reordered without justification +- check any tables, layout-dependent content, or ambiguous sections against the PDF in `legacy/` +- leave `CHECK:` anywhere the mapping is uncertain rather than guessing + +Verification checklist: +- `README.md` still matches the scientific content of `legacy/source.md` +- any malformed, incomplete, or ambiguous passages were cross-checked against `legacy/source.txt` +- no protocol steps or warnings were omitted +- no values were invented or made more precise than in the source +- tables and layout-dependent content were checked against the PDF in `legacy/` +- any uncertain mappings are marked with `CHECK:` +- any meaningful normalization choices are noted in `# Migration notes` + +Prefer preserving meaning over making the output prettier. From caec00571643839902d0ea2bee956c121ddbdab8 Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 15:32:30 +0100 Subject: [PATCH 06/13] fix pdf generation --- .github/workflows/readme_to_pdf.yml | 30 ++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/readme_to_pdf.yml b/.github/workflows/readme_to_pdf.yml index 97bacb4..c53c3de 100644 --- a/.github/workflows/readme_to_pdf.yml +++ b/.github/workflows/readme_to_pdf.yml @@ -42,14 +42,37 @@ jobs: set -euo pipefail cp README.md README._pdf.md - - name: Trim README preamble for PDF + - name: Drop leading YAML front matter for PDF run: | set -euo pipefail awk ' - found || /^# / { - found = 1 + BEGIN { in_front_matter = 0 } + NR == 1 && /^---[[:space:]]*$/ { + in_front_matter = 1 + next + } + in_front_matter && (/^---[[:space:]]*$/ || /^\.\.\.[[:space:]]*$/) { + in_front_matter = 0 + next + } + in_front_matter { next } + { print } + ' README._pdf.md > README._pdf.tmp && mv README._pdf.tmp README._pdf.md + + - name: Remove top template preamble for PDF + run: | + set -euo pipefail + awk ' + BEGIN { started = 0 } + !started { + if (/^\[!\[/) next + if (/^> Template repository:/) next + if (/^[[:space:]]*$/) next + started = 1 print + next } + { print } ' README._pdf.md > README._pdf.tmp && mv README._pdf.tmp README._pdf.md - name: Exclude migration notes from PDF @@ -107,6 +130,7 @@ jobs: pandoc README._pdf.md \ -o "${repo_name}.pdf" \ + --from=markdown-yaml_metadata_block \ --pdf-engine=xelatex \ -V colorlinks=true \ -V linkcolor=blue \ From a876657a9855081dc0c1823c3454e747384d9604 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 16 Apr 2026 14:33:10 +0000 Subject: [PATCH 07/13] Update protocol-template.pdf --- protocol-template.pdf | Bin 31652 -> 31589 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/protocol-template.pdf b/protocol-template.pdf index 808d2c0096bb25caa23927776170faf9d345894c..d2997b4926c7d5c90cade28f0dff156c85df5231 100644 GIT binary patch delta 12216 zcmajFQ*M^fE0q*OG3a0sqsss35 z=m-Bt=j4C%JMZmo zMtTzq%ge{iezSUjQIy3Sy|B6oT}gQH_P_sWWgwLD)ot!cL<$so;<<^niEJDHNTT1G zdoUyEqxFJ~n*(Wm{U z>AZLNdhKH;mS}iFDu2RBpa#l=x5DhLSq`hCN{GiMTSgI#6KLq%A(vzd6+hU@Rg9(F zoIYob{ru5mTVo76X$o=RR!tis@dxE`mtt}(GIrN!DiKKZqD{#9NoOB?;Yc=~x&fil z_A)%ZX*i}83~C`_m=4a-0HlCC7^cZaZ?aHmbOcM|T??7;d^}EzB@Z1~)rQ9~9>ydT z^HZbx39v9Re(F4h59cB^h&{zRBO~uV52YvMOzq*R|5fG5%jA_DmWiH?v+Q@W$2C_8 ze41bIQUH2A%zH(RT&qyv9Dq{fqsM$&;|eE|-j-YgR1|LiiIg|N%2?0+y@CA5r%FuS zKE=n@*I|AJV=-{{MI>!c_hQ!><3phKlZ9iVPC2ZyY;ojrtDl}Huc5Ao%EP0KMVhjB zPYx^#Q0R@ckk89XBUzdFDH2oZUE|4JQT?vH;SapK#ssn;`;{n%+Hk68It(?6E6Tz! z$P8lXj;lmb0<6g7+=6-BU}xoFS-+E$3;wiwC61Y&%JRH*5Vs(T7ZD4{gmWFeISX@? zyGhr|zd>EYWQ*Z(PrBzLQpf$IgcK-svvw$PV=t@qLM|xYH8ofdl60ATS9;6gEX1q8 znFFpaK(jX-XO(VENV1bZv1XMCs7`(8h#&f`%Q0O}z5y z_|{>~NerG*?EdacdvCb1{W4tEIg#*$5CX_o`Ou8XGUPnKD0xqxC~bw~Gc03drDfmW zvbU_Zow4e$lQ5+ z&XkjT)J7;Zo;c$|=iiqniSJXpzLcd>qB7i<{C=eS&uK%-T|oLQah%cRd}FBs{Yb&A zX483RL`F+dU|y;C1_$@i;O7Ah1|{1fLU`_VV@o4rdd>1+r1@C0o8O?8xhl87WD#&_ zFxH&iio6WHxGtmo(%y`J|}PhMi4i>4`rW6z?YizVT>ZA{Y|BGZG&4saE|3kU#x`lX9o`4JXAoO_a*<=DL^oJ%P%uIHApz}rKksc;}j|H&~ z(dx7N?sMtOQQPkf1o*BbYbZW&?xa#EHsE?9gAaespcNVgh8IAELgJpB@a3`);T=Q& z#~0G|y?&qH{7jpD%h!(JQaJI><_z`Ao{8%k4w`crHT4|z*frQ8rKyMa=K(3CFTBY3 ztI<4`Xh3u=tN$&*4^X?%t>C#Jt;4eV3-|eV$NdEn4WW_Vf!Se8&)y~TJxZrBHL$K5 zGY;lByb^X;Oun3uef|$H*Wax#o2m_ly1@mV)tj;L9=&%mRpkUt?-=4{n^^QYeHq76 zA>GfsNuZ3?pV?u(lUwQU2*C=>J<01HxkvS-8&Zl%yl~=o}WrLMwVO-*?KL)*@5e46xM5Q48|0oo|!l7iQ*)*;_Jt6T>eJq$ZtsY zLG^6vwKdPEjFYdHI7}`1n9M=IVy+m^Av$`BagxCvcDml7jh&osM!uI32mE;&F6)^+ z8OomSqefVQi#cjp(12KxvgAJa&Hk&t2_BF(rqH5?b-L{c$XJ1Gqr{VvBw+1ed0DY8 zF|}y>)bKg6DM~bQav06Ea)0vm4!O1wMwcdSN3B|I!<5OQdzCnFsCHfxlrXi+rsGYRF87 zMI8nrL68Grb>=u^=?`5%9Q4+OgF`zaW0~9l<>GMyd71l2(}H4^?aD)rW9d7%2U{p> zH;S#8F)_oJgDRh&_IE>_KGh8R`ZCYwX|(W#(=CmK6PqvLoL&`#KzrpRSP%4rr{|g# zG+uhfgv2x>Ii*#(D5^}|22ZiC$i=|$XUKc$##6ZbP^IxjWyCW~7BBGFV3P!>crQEXkW)bA@F< zOvsGRa%Rhub3MEMhSyz@K}b_*CPgoVi&oFDNOz`Hr(mvsMz-_D0$I{_F0kP^!m zjp{cCnVpd@hQqpW^BQH{HxZ?pb(y9sknpekZ6#*4Cj@zqK!k|Wh3 z4*4NbHvSnAchd8k9D|_Nbue3MT5wVvr9t8)SXs%Tf#V_S6ZUgABmy@Y)c>1l^2 zx_5aS&mq=>rkX_fNI;U`Jh*tVNSd6X$StSEa2AU_9C84R59xXZ?%!J(5R5>CMK0+T zju*%wCTG6Y2k*JQq|sxK&w{x9f$1G+0#GTW_!wD(^e>EyrP{+s{j68 zQoOJ=)-il$C*d0}diRfFMoQLt_>Ll zngy~2Id+gZTRa5F61mKQPh5~(wldG76CFrK=$7(+M_yRuZQ!ZsJw1-^w#4S}^yzfC z!dB*5+PQ9WG-V;drSWY%!FZUmvgd#6&-Jsgk{y9VBBl4A|EsMaVQKxZ_-@!lvS3AL zG)jTV+)fE`*K90ok}@@UKcle?vOhe+4ZC*Kz}$ zF>e|&r#=!@tykHbn4q~O9QDK7R?0%*0`i!}KASnS6#y*ghCVtd{pU+_R&bR)-n$|G5UL=_c9X9zm z3{tH>2|}Q;?9slGYVfq0)Uf(8IQYM%doAyf^CrIRS>ly)V&do3-)XH44u0nRaQqTWr zT+)^zi&wF3c)b`<`24`nH4YiYfA7KASe&{(g|41d_?i%u^eycl(BRp51 zGnY-BWHa)9&pv;}ol9=^eZZQ2T<+p_gx`fj}A z6V`DD)$6+ZlIvf1mL-uYERou;-LVVS^$&O|@7_0h{`qZ?r$>?Vo80Q?X^Apo^;~>$ zCDRevRCcZ^vHRaG45U#$LJ}GhFOdEJR=R%TMpQ5}YS@iml%BhG)X$n$5>d^Sq3eWo z!GPEzx<%Z=p3{xG<1_!i=vXALZH6a}ABmueN>i;=DBp-WW)Yyh|3-C zoF;p@N<5>Pn8u+TwYf59&MKMqSCb3!S_yVvN$%E`sk2A+-&l)%qKIJ5uztwLA6YO# z(;N8r+_w0$X4tKv*!r;J!n$I;CJ|-%l;g-D0Px0-UzncudP>mucM33&gg_2V60wM= zAEvO%VyJ&HluMh3kAqcEtznHG2o@Ggd|Z4^T0Ct-+pPqU#xCRV{_$66%$xM6)tRTf zeS=xOSBw6KG)B@8GAB^pCggwcen;`x(?k>7Fd&e%M=j3VO>`pk44XFhXNHu+{PUXv z_?!y=m^sYJn7lAIGZ?preAxgP?7_-Yhk``p!^js`-RJKhPzi~-3bS71`0U-y-;#X*3T>$Z1P4hh z6Uc{j9VR$uNECwAE3zB3yg>IRAkTlP;GuE?xBf?Tq5lzGhpl#$(3TE7nJbovR@=n7 zps<`Imr@eyL8wC?Rf55#@Q!P#8%7vlOU_?!)hAQa0nccY_Nkx z+e3F=bgU2wu|*s_w0;|1-{iKonbHJN17soYNyS2xKR#Gxf9qe?KMGM3V=o%@se*wf z4Bl|s*kB|#?xU|mvkopOA1PlITofq+pL5BTbiM~rItVvT-E5=<`NmAzo!&f`?OUZqZ=MuY>$OO{cUV6<}chAI7_Yb zTlAkyZ0tUnLmdk>1y=1DVS|y3InVvl&H8L?9S#dCMJG62-k;?@=-;~E23Om;+a5b& zzb}}*w-JFW&u^XpF-jd?@+U`P8S%*_cVM3RIh`NTp!XHr9#HqliqqpRzHK0qD6K_t zA@S@{f}bF|M|W*!_)b>fOdHZ5m8S#M$0jVbw84IK@m+MAw_cHIxx|n4kLw2$qb7ha zCo6BA{%mi!6h?%=JPuw89#bMhf+P}GH7*@eL;}bnB5|CrX*wK7!LI&8Rs{Yrh&QfA zSZ{<-MapRxupXr40u>ZR`=3CD?rc>NB&`!1Jeo;RX$S_~v^@gBP@D?YW3(1VFNL0F z&~AokUozYmx@YMulbI+?UygG01`j$2lWJK&a%io_+ zL0?yTMSFOwN5^0~gHB6=qxi%^agwcD5%_SztkRX{Hqt+9JSixj6Rs?<27MJsc&Vi*n_|*&eSgEMSsV3GGh6;$6UKV9v@NCDZP?^1)Bm zzqqeca=4G&G$k8BtEHM*!KBD>i?&cC2U0vko+fA9xx5uwPlUp+Ym@+OLa%*-G40z1}w>FTB^l#{HK@k z7U1yZ-=0l6@h53KwQ9^KJeqPi{7ulHH7ZyMJ`%QfOrtH}K}+mP*q`PzPi*F8b}=Q4 zf9&SD?pwL}7|`~Ro3qv7dye<@?g3t&Cc;{>yxp@4-zQwyJzNsHwpeq}M1^?b)ot;8 zP$ad?a(NR6ya8ky9+au_ z2m5?@Q@3=!Cv2t5QWR$zDH$M+q?dH zr>;TnN4AIXt6G=A_`>AR9tP+$?Zvl#Sr?5OQy$r08mf-;r=6&}9#EMt4&uM;e6Xp2 z9L)c3ojLfQiu{D>vSvo}ou&gDwrEc>d&%gA;B&E>R9%o7yRUb6wK9*_SRV!{7;7Hq zDv6=~;q4F}TKL=k{HMVw`|9?cS!ZY5+2{$VlA^@_6uaPdC-$nk@O*Y!Xgz_(db>4z zXX&MF;Df!DGZIL=^XB_yy>)Oh!Z^410M{jC&!$S-U6-|Cw&&!ah+)SW1<+Xto7lI1 ztiLKF2R_&c>Cf2LTX?@#XV%Z_oxDX$wIU2v(u;LKj$D-HGP!It*j!~SQ5p0la&|q@ zVQqZ(dD_Rj^Sc3cPTHuME}prklUS9oq=V-%8!(eesesKA^biG6*xnNKFjuG$X5|$K zsMh9+B(eNRtgAGSAvuXJh>4kn_QHkXnO+2d8j-?p#GqV%a1jwrl8(H?$RJd`@!ujG z#`!r^_@3#%KO2B63{(jsCM|(7k-mgR zY|-66gfO+bf;W~Ky$1n*%W(3ziUes-Ar?hORaO_*qkiEI=`_zGj3Fslb0QWDh)5?E z|AlJ8hv&ktpi)*=-I_%s?aa-bad~)GUy-e(*#7O5PP}+|)As8gn&2bFSC?J1bFb1i zZERm_%s?a+&5?832iRyHxD{E2{QL8I!;xJSxxNd>Zne2-8HGSuCqgC{rpr`*1am}2 zH7sl#Tn4*qjI^tL3{Lj@=BvK4Gxtm7VC}5SWaREH8|Ja0kTu@~ZPv(boqc=3;{wcJ2qOmHmv zhzQwVHkzA5^Xr`>`Z_f=%e}T8MF-E9;x*T6CS7bU*OQ+QGd(+B-~awyOQ6BPKuXNv z0YyL?$uj$gCEFVmDm9|jjrc39Y#EvX0jAO+1xjJLgP)Nj-RFNh5j^#OgiB?ud06lg z0bfV&upmWT^e?Z@@Mg7dXHY1YOJJYD?W@)ybx|)X_u5 zt5XLT)(<@+qn1Z09y_l{3m$dM8pDtiM`RM0(APJzN|(^#7#1v5Qs3UKn^lNQfU@UV zf8WaeuoSZwd!PSxuh?&6o)b2c8cvnQa|mX-BIV?(&E28LYSk)zd@?ok^*E?hCJw@f z@R^e}p`w@)u@c{jrIJh}q>R!I5^Mjl6>lXgkj8C9h?GD_q9Ie`wttnWSO(d=Kn@r1fY!~t& zmtdb-IQ2Yo=n1+nMM<%FafRZf6r>Y{{3_X%LxO{-C!Hnoi{ekcb$8=#@Qn2=Dimz$ zC~2+jpzefJnqsgR*NF@y9TTDTG|4`Dd{@7hMFk0HX%KhA3<1 ziTa%@-g$@K^Dy?Y>(pgf5?HP*m{7h5y|ZL2j-$4J^fkEZ7>aW7vcNMO@E_x z5Nm-trM06nL&$M2Dm>H-HOG;~mHza$8s`vm53f$s&R{swX1n5asz$?yCV26w9E^(F zRU5;gF<70esrp=q7}dZI0}Wg9>$}4)?QZJM$h;LQho;mXY6}@6$t*ngMQV{l7rZ?k z;j#d}uT&aP=L^jw0@0sO^7IJBT#Lc)w8&=3+wgq7)-8os2h7JdL`~=lSQ@8+A}BE# zUWYpJ($=^QPG>x)a5#E5__h2FYHli4P@O$vg^RrUE-qh28{sycK+;J8k-X@%aCiV5iC?GsW~cm zbX7N0t}Qmjx(PStYYW_8vckF~>Z5m*ew+j0 zI|e!%e%xqZx-Ng609W$aT(9#ii(_2q(@)S%mkEQ9 zPEjwA7d=Nu>d)OQN-FbELW7B~kx(ZYe=rkBum^&J9Xbo6wYD(kMgU@Z)6`XahW<-i{@>)q=Z7(DVCAeMWumZ@si z#PDKewUea{faG>#BzIl!_0{bm(sRFiH=wujCuV|I6iV_GEXn?a+d@6S>5~E06)Ce8 zbmO9|Wh*CW$LOQ&6q%e(&QoSSQm#HK%j1&#@`FaXE@x&?&wef7S6L%3CE1ch!*cw+ za6=agc@6YIVvx8B&noXcPqn6VLh!T#jQSi>Y_^iE1G5Gpo?J9v^vii%y|O!}gy8k% zCfe8Z!8r|sNfDHl zs65mDynK3)E*UC&*9WQ@O8B*eT12p=P#ut%4RD@{FhM~y{1)W#IQtrpn4-vfB9pT4 z#DNAz08bLv{SAIgm#53zom^UHnOaf37pZEj+p1asC zDBqE@V!o0ef*pueUsB}F)@-~cvlsn4j66ZXKo&kfzPz~?sQT+`3a7Nv=JqH68Hs4O zm`qO|;Yvfg7EuEmZ7l27!GXr|!CHjv7O_htj@V&S(*RD~VBH%bZ{EkZkYZPuE?(V; z*$!yTj5isf;G_DZ8Xd9YMK}ck) zlo`GzzpOJYx?$uY^-Ys4VmSHI#+-x#nP&g;=<$blU0m|8u<+hd_J(Fm%Ha^nTSsrx zW5d`;FZc<5Gkly!@3U)Za|;V^?piJweTUsEzt$pn!ISD(&!fgX~~)p zL<}gEp=50;x>k*pQga+2*FL0+Sh%>Db}B>)m?z+#kG8&zBGoHmbt?7e+zd%PNKWbt zNEgm#{eN|eH#;J)UL?HNJ**v)29IUSUqXGb0hW;_AUm**`}^0)O&dD88;b{NTDw*Z z_r%*y&PPqYCZv7ZrNWU}nmpxzhY+Flyj|>ERl<4t!i_@To&9t1HuM10s|s!<4OKk@ zWx0N0;#x<3Ee$oj$svEvG~IS2=rcVao~lmxNB~wJ#LeHce$pd%+ZIt;eUx5)(SFWT zBNG9`cyu;~)Tw7y?h5A51Ey6C!HZjzDUwLVpFH1l<@z;`TQMeF@8YFE&p+zc`u0{r zP^81tK18V;>Azox11R4FPGnwZ%6`{^2NMr@8wCn)(viE>lp$wROh0iUx2c5wdVLNZ zyZ^fsluHnaIGpPI6W5HP zpXMsqqUHs7b%`Hc>!r(`Mh)WG+{V^6-8q47x5iHo7+~($-26*j#vMWnl@w@LX*XU| zAs;HSsXYl*^(YYd*L(I};S3bDA3Q{94r=}fHmyu``+p|Eq;@={BpwPLRHgKG!yu^v zG8||aFj7CrkpCHyn^Gx|rB%59m(OygW8<*Fjs4ZzCrVtTKe+$F0)Rhc$tX=~mYBD< z7&$lB_n{L-vW+s1G7J}^jpi&lNWc2$Ek)k|c zg@`sQVMBv>r$~sQtzb!QI40+*Fv9NS#A*YTO?z2*7vgRB`2+Om`1wVm;|6^%3Seq5 zEE}dp%KLdwNQk`!MtFgzPwiIuGZ2Uo$%xE&!Pt^Nqp=n6(qxcWSSi- z84`2>6ne=&B(;SRp#m^bSszI?e?5|pI8TP_jkq4e{ycRGkYvsPL6#VXK(IqUqp4wE zpu!(HE+ojliZMFEx*^wkrtp+&%pTy8h$#_>K=Uw*@Wss<=fD63iDtc=;gOCFON_$m zB@py)2OZ^m3qm~=(Wt7yl7&wLlPJv%JBOL6i|GLaA|xhz^x=#L`4I!s6R4cXYZ|kJ zlLLC;2vGdYc+8fAacqwahy6%lPehhrQnx6~0acdlR-u~WJPJ6Br$Iiu*seYzA&1b& zB7~k$#B+r=(xgBtFO&#y{vgGqoG^UQvI=AQ|#ygfA2;C*!`9fE@3 z(BOTr3C3s?oU+HZmy%isY$42{rQ&brj|#5J+UN}{9zvi$t^KjOer)BLzrMkaT)10C zpRcxFa#1G=?=$_YRrjg(AYS(+9zTq5U$~Uj?fud3@1asG-0s}PAxo_|+y!iuDA#!( zs^MyaC`Gfg7Ca}=$bsU)9(9|u1j}-g`&Cq8claqk2#bhsE+wz>Mct{pEud0{OD+6_I@HAxJmJOQ((1A46RSN&9|pEsP3v;hIOvw#X(fNxKdNz$Q3ft;d(R zs=047f$WvHZ5sZ|HD9G}JcBfRF@Q!snZj;I^F{7xiSCOj<5y>y^U$(D4qfkc20`dj zrG*RD=h-Uuu-^7~xXnQ(3s!nJSCu)XWQz8ScP*fhF}~@#snT)gS>m$b1Yhdr8)Qb9rf1Jf^tRcU`Xr<{9Ni1QUa?#@90E`- z*^#>#2|r8&erATU2cfI25~)qumo0!=jTn6Z!bt-$FTUO__toLRdnO)R{Xc7u^(-V3 z-dc2!90JvbZ?bL79w}8s)@O(9kFC^R-g;WE$C~F}9t0(DuUSM=cw4G3)td1;qPaUQ ziqF^h=9PBagcmVaTM%&t?dv{&LCy}>PSfoMnb?Gu*N9b|_s&&zFG?yMxzcsRt&12| zTPj8;GI8*@jIBXH*$>UtR;|p0&4}CwU6HLu-(O1geB)Q7eO*Lwd+mCQ2;My?Sw;YO zs;6e(yN=IKyp3klNn;33{#K0xl$XPJ7SOOl+=xlV>CYFbhM^ByRjsN_&S|^lLJkHm zNiLJeaD<1Bjz7zzrFMtwHLcxp-4L*q#M?+GN7bcik5}73O8TWg4Pn;6>gYPHX3whrGPA)TYXr`Ippw=|dN6}5LOkQc5;6jMOxHa%wF zs?%6C9;MiL9bG@1Z*_n6L`ohM?AV8g5Ek0%EZ-_gZ74C-cFa0P9Yc!C+4x)dQdVI+ z)h`R#+eNekZEMq%ak;vQIBfLb*%smgei!Y&LI5KEQDI){WBy*>3>>OK$w#H*iKWdi@yW6k#JB7{s;!(@ zE9+{T16i^mJs`4%it?q>WlVx8x2(d|_mUJj{Ky$RDiQmMj;t=5jcE@TSA3VwyM5wRedi|?&hZr>o&Ym|b2$?8voD`PtN zXxudK@GU-SH1X;Z*00rP&mVi8C;Gi?0?Gppb^0r1ryY#l*1!K0T*B*S)Mm=$`YQ6` zsjhlU{3uF?a21oh%Cy*Fe+#HXxQnM7FI7S*ad?&H>?KTj=&3u{q2@Z6S^3?x^9+dr zy;1kYFjK^fdgQ5<<})G`LOO(`nVbNeDw20_7+(|FRejiZVXc4xtvy)lVH0B?ZEOI& zc{4ySw^*TZ8f z`SR(dewH!CYo1L>5GMWbbjC#=3R!^%MwzN2tk0y~>=YzV+!9hTTDx{*d!-!cB8hg4tlDyb~$;=3i2$qhHX+h8) z)CzL(Ir+HjEEma;o;fT(p$l^I86P<;Tx{<0Zc+b5dvRX?>`w@CJEdQA0g{%6Rjv>= z9~4Usy{g_F%mi%V8;p}rMy|Wy`Hy(Mg&;W^(TJzCXlX2qN{2IXwa#S$VppIevZBB; z>>L-YcyRPHruNvEFpG7gVWFPiAkQFe@H<2HQfK6{*xx@VykL2nAk4)603vV)D6Au_ zZV~gXA&0`LR3~8=rD=>kOYDICHZuMhyH-f% z+-Z_kt~-14i(Auj!@A;y%OwHDFfd*PqCzAW(ajbJ;*hw5?WE^h_5@%-KCu8y;od*i zztZ)JTsIlt9b1~PW~0Co0d13Xi`pRMH+7Yl1A zw5BgrV}-fE4L)4Wyzy?&Ul!y^YfIZ_xE;PKz=?v>V{;t?)l!S*V6ji94%{hfuF=1H z1lj0IcT$JM8or1pms!9!e-{a-dfZU>V7zxhjM|qD%^}L_cRoOrtH(oSww_<43$lOT zJ2&3S@7vvSlJof15Wjc*kzQc}?pN873wxz(GI<3_n5HVLjD~$v-fkUJXy?8HS^OV6tP8- zb&N1(jSpJw9V(|<^W1s6VsD+`Bvrb;`Szy8pnEC7zx(sor?d6c0c&1=F$QC&xbhz|`1|8bfM`S*K%>P|)MfcCq`_TqZa1=+m{8{3 z2}yBwa63uCH<1+XR)@f9+Fp}jj?dAw>rr1D-DxL3awCUIh%h@Rw~iGbXC_6V{j70N zfF_4IPmX%C#mX3I_2~2j-8C1a-0pkh+DO>Xsd-7I`3XeUTch>0s4VAGyX#Utx}AC( z8#zd!x$Gz>(+Z~*<+nRgmZ_AUD~%$r+`US}$1W%9<5O=HTP=**#aBafhN-m0>Sa@x zOQS7M^EeqF+RJKMfnm@ki=P3d+Vjxms!dxb(OWD@8h|$NM*CSA3BRNk-U=qS--Jly?HWq2&rSh^BLH!kSF!O74KW<5QK`EE#v!{e%$XY=`>K zK7)~z==D>SgN237$kfc3i-Vnmjgy_tjFXGqjMLoI%$&p2%#7VwkpKVhMbfpG)c?MM zVwf9Y9Z<#$xbuk*A%ROk&4E-de8J|BT@-hilfXmJu#JVeqgd6^&ALNf{_Nx7?0Ykx zp3LDp!8P#*vc(Kc4iL!o^HP9D zXEzO$xBbIgKY$42Ceh7SAUqVtY6n3;DZv^5eh_nmP?nS+T4o^`X5mFfLyZA}V9BC* z$zo7yqC_G2$ma#%{j;!BsDZwYBKR?4FwJ5(uA)q?VknPd#EAW1h#*)*F-%D#M02Ab zr6g2uq!&T89teMC3U`D5>p;~mP?9-TF{-pjQs9NttN>z XG#SH^e0_-EnK^jiDJdlsCE@=+h|R^X delta 12336 zcmajEQ*b6wvo$<1Cbn(c&ct|P+nyv7>xu0rwrzW2+vdc!lkYum{TJt4{TI8ctMl|V5oNho6*KPUy}2t^6!WUR}`TqHrpsd(yG!OgI%ELcMxr|&7e zlk%Ka>o?@wg`_cK-KXgzFx+h`1grQSwzC`a(g2 z(A_`2@L*OaxuuIHv(^Jzk?#x{?oX&oQ+6p@kv67V>G@23d#yr1`s+}SFYW*QE{=3=C}UVFpR&zY<}LQHZF{6-ebmXKDdg!ZFe zh2czSdXO}%Kba4#rw+eOJTQgrTbeAKlQq!hIi;oS%2G?LEm-QGTI}PKi)+g6)$p-M z;^5-QJ*9;JfMxrjKW7MWGmF-wat7k4KkD2%s%SqJwtNEjmuSCC2;O81;aBV`IJP6r zBXW|EbP~haY2g$pQ}T);SXK~^7D+|8$wyyQRYHaKQ)H+z^XU)&rTQ(hp~Uz-P!Jj< zA5Xw7t#_CZ?mZCfxA ztoNLzMJ{b0>QPLci)1}a%;~I4D&?yG!|njy`X&r)FLNqIX*&>c=#P+Fs}vzLn~bxk zc|O=sN12l=#eTi)O=vi}-;CV3DxDk7sdRfRQ7ccnWiIx6T?XnsC+H)qbQCqvxv~7| zNBbl516*hNFkh?$>*%NTL2YVBMjIf9L3UOF?mmWGoGzTo&jAv%; zbp#xD9`5Qgh>!$53;7|jq3w^Q`@icQwEX}!qi)~y_1<}-#GUTt1T(;ZP?pzK8U8HG zSYD76#eT?l1_`$e9MNsTOofJlIf$TwRREk&&8WjqOJ95J3 zi-MnT==$KFh*x*|e|_hVbz9fJe&-G&3-`3=0SGiJtTja&$i<|~@wU>yNVdKIl zk=b3qI~5+HG-62!C1lQgH>G0YZ-LJ3bO~&`*FD!HeK0lDaUl=E^i?0 zm}SD_-0>U9GIjDBto?cik5cal!3xYB$$uo#z3Rdhk$l9+w`|2EH;g|7E3@Qj&6B|k z9XJy(BZT3$LkRVhH2&*#;`2(#uvo1bTVEMSFESY!+=J2)w4NO-Wy@r}F=xOMH9|u# zIyc(T0cL354U}HU8MP(sGVo+>t@e&rV3IUU%dbtx>HWD&GX>z3V*H^&BHklJ#2Wod z?!&qn>6j4?S6L*TzapGbi6mkHrI? zgeBbYr2n*Lc`oCVWu=L?$*)H!PhN5x{Nj+SYlNps9aC%;pquU9Q(~({b&zAzkR)Jh zXMJ9=p)|K_{m}3}wk=FFcKSV-FoKO8`NJ@+?$vyxbRxu}@%Ej6+B@*#X53LlGo+O*R3 z&H925z?#i3&?LkiH)ij=WoHT`pDgdUdWlBJv67aE<{R9X?^qk5(oX--x5%+Jy(=P` zT5_GdnFXxDum9oBiQ%HY=#1S`otP=iJx zd)y@TEq13-2pr%UDPxw;u)pUSN#Pg!KIB=UwNB!uTVtQ*q%Uz|dPkf1W|^+nV3D&f z!yg77GrZ||TXu17*9TJnNPfn-O+!pni6e_?ie7eI4iwd#B|t-fxZr3rr2aV0{Q`5k zWW@o!!0`b&goNP#xHy{|+rxXTFKKi;Y_Ongzhn4->Bo*7BtRk{LuLHfn)jmT8+6Mq z#+@&&-6^CQFCrg|Z6Tv7i+}#!@=f&QT+aK_MwbR!Au+%01l1f{%slT9m4pe;9NkZE zZt0?vM%1lkM{B{NO^F}`&RxU`bA?|8)08vJbf5#t2whSeHi;6-H{%+ly4~sUU03*j zKl%xeofDq77dC8&Crr5)+B4i0GH;wzG90+w__6aAm~hamiy!*D|9M{WS(~zslj)$6 zi9}*~i()uEC7|WTo!hg!vH2u`3MWe~c{bb$VVLAPlqdVA=3X9mHtZ z91m_nMdF!+PUYm^cMp&ubZpGVbaG$k9ZY`t&m#WL`x z6b?Z6(|;BmNue&e>4_GjGoG=0&@OqxAd5(*q>2x>k6f2AVV3Xve;hv*w3P=axHxU9 z;GCNBYh|kV3D}d{Ph+spFZh`y(DjG0lgG#?gAhuk^|{bUjZPZUV{0tYQ7;7eezE{h zt!V#FlKYXeYPtDoJEd`ifnNhpjq5#O(II8++s~C}G+mkF*X)8Iq{rni34(dn>)@R4 zXQU9#qV%N5;#DfzU(U^_zn%h0aY#Eb@Bj0^Vtii(Yech(A^6O#)GZR&>L-8U+D*_Q7`k;EjmV(|bAr1!j0VL#)=p!$(k5QIsv8&db{w{I6*` z^0SqUwyaUT0&d3Fms8>$oZJ<7a|g5kug3oq{Wh_I=j9?{A^D#G#E3`?~t8I<BV9FZ4v*$|rWj zX099r+WBLzCE1<8_VZ%$NuaxXsYGTYx=H(tH~g(oBOP zck`$QP;Kn^PhY^oW8?7Wro2lCxw~0D$Gjl9Kg-7(d_LRs)Mx%#12vHA|L!<{!-8k3 zduDQq)j1`o-?Nwi4M!zKF?fGmxmpWETZHP9sl_mJJ)1*jD7MsfmSUP$$ehDG(Ma87 z@szlIy>Zq+B4s#n$hVyQ+D2pj^@>&>r1Ie{T%+zf08FnxBhg;W!6q^(NIHo}2?3kC zZJPHijy{<(q@~uONZA(8Xm@R0s5S?TyqHZkK)lLctxE7AMgY#6=UXDDiDL$VvLFZ) z8K8n~Fw1{yj@N-Uig-gE>z5T~U-6Ua)}JC; zAYd10ErXwxcMxbza9Bkv7A1&nfCF~mvOap{LNb1TpHYU)M)D$jeD?A5`C+i4GP>{; z_~}FK1?)^QMnK>%?Ph`}U2mhyFwX)ymi?k8RS^6EWIQ-1!d~rWTiXYX`PzhLL=m~Q zcGag_Xq?4DM8bhHPh7a>+s4X<7Ls9bq6&Ih*GWQKw@V)78+}isYc9Dxq*@gs-K!Hq zI;G)8BE^!)F((v)icL?-nlNBe&`P7T*?4*A2ng+AHgrp{P4@;w`E0ut&3w^X4ASe{ zdPg*A%H@;wf%4{o4In*KJ|O4+VJ1lTf0!Av^NIPssuG#H#2G+Bv7RKdAjgy>gqK|; zre|(Sba&l(g?pzYy0?+e+vuJ3c&}bRKSlBZSWwr+8?2ve&X;rN#EgK*#=8tjD9f}l za^9(6^q!enG5z%cfI|5d}!Hru`eGu@dVCgpC5lZ;L<5Cpq6*)0?14ZFgG^L z^W2K#veTvnNIzoKdk~JOh>l#{^G2L52Nuone9@5Keajif`J3@L*r zd%(e?ng*5xqr*)*{3IBPRiS!_(n9Z{&})V4V-NA8MSr8bkOZMNVIlv`$4 zL4H>|$?2tpno*&;kZ7RFir*wuEV|)4L}yeG`SV5p!tNIBVK5wkSKIfArKV|mq1Dm3 z5AByVCAsj)_oy$d(f*8_u)2-~a$-aWDWh`(4gW)LjG>)R49`uRA>o*MC{!K${5pri z_3jOi%V0dp^)SD8YsC|Bs1QTvtH;fMZKtICWfo>vaaU&lQBZt!YrKxmltCsJ`=kSy zDw}^+V?O4(PqCRq6JM2f5$&(Pm5bI!j&B#6i|@LJS~}IE{?tE1RhES|c{~h)PlPc5 zfe7TyFzW6Mv4q2$3yYpOk3rq`6<1={_v@`S(%fg`+tbz#8h741J7tudY%?-w3)5lT zYw8>i#T5D#^t|=lf^bpK3m~4Niqub|m+))7aoxmPGyqLH$ybdmWYaS;J zguQYrWd4LR#5(RiK~or76M_ios9S^sf)lZ;ZM?cpAUhH>!8D|g^9Iy3PuC40%>WV2 zLtWUK_y<61KW&ShA}<;oC5f~I(-BJ>Efm}Z^=Xo&x;I@SWtX!5LbYAFX0mzhJ)PMx zc8lHhuI3Q~0;GUm$NMv;z|!ZvCbx3(9occcXYjwU!(h??x&9AX=B8{0SP*{fdgq9Z z#44ke0`Y@IQftWVwj>vb++6I7**RLAHpO@R>M_02NCRe4FeDT!y;N-vNk82rIZpD` zb@(o(mf(WXbCMOckW}D=>3Sqa>=QNk9fUo@;(Lg<-GoO*;5T>26Ax!ANWAJQT;6c(VBbuyS9mjRojL@ zj3#xIr%-)kLtFk1sLT{C;*aNze+W!=qI(l&(=rxpjXt_|3jE`1rI+W4lAd-7rl<3U z_6m0*rOV+gi3+-L@?U?iDnuvmT5UXP`4SuT+nL9&OIKR;chsp6a@>I~9Xa&xUEk}$ zvRp2MSG5}=Y$n@ShnH+Bt(+qUOju%ye4rQeChpfdyEh2Ij%MVIc>5_Cp^N+Z+N2d@ z9#tueq<~R2X6$_r*PU7OGMk2E-UKbkh19Vji#ImWl2>d1$2`)=TWN!XcUwjaj3}B# zjx*rDT9b#vT0&_8WQC=Rw2sjNUnhZ%%r93#;W)sK7s~;+zA*FZQ=HBW(gJO9hJWUz zL*^8K4_^I2`B;R9gh>v1@yi-Pk}p$ffW{0DP~L-!xU@9@;8}1Vm6z1vmbx zioh=}>k16A2o#STr{}SvvKlU%P;-tT`T^_og#7tpbK#KtjHo`d=ly0$)3#LkhWspSa!TCxL}*A{ zxQ*fa9crupPU*jC^~MneDg_dg7DJg@U6!ORTStJ9xm@B@Q__kH$JU2PS@d?=V-im~z_) z@cTMl@CbDsgLN~NA)F{dHW_AV#5_Of3se|^SedkG7(UiU(9wvqZHl|;vV<3~JlOt^ z+XeYCLW6p7g}`~(6Y(&BD2WkxqPPV)(b?%0FCwBpn%z%iz!|jypkZZ5DB>Z( z1JRU8%tVMJNc>SqC{QFsp(a!%bTo90qvqGwT~k}0^4&IS7~LH=bF#}Sy`NUEvU%Mt zzFx1pcRsJafzJ&jQc~eVCQ}IT+tu9h?1JYNV5K)Zj0$7+c zbuP}Cgq$hM)_X$Y>%5)J3Es~W)?-Xq6LbEJ^P2SD2;n%-OsYfXI{ful2b38w6H!%C zODir8><;8P1}@w2dnc|Ty22q{(;@g6dC=8iyUEbG-*}NrvOL)miUGZb2Xo_!P;f0Z zDovz}lFbqvn^+MroDy6x!U$l}sWPEuK4?-L7>*?;amOQuD-|3$5RTETRZK-eXD*xQ z6+w+kms}24t&%Ll0!Y?k!YUuC{}oB5QANg~!iri)O_8)(rDhdQq8+W3kA+S|1euQ& z1y?Q!YO;8BR)`!_El*^KGxb@VI0R%bPF!9Ja9ytwouLAomS%w7;eSi16vPK<)qhoK zktdEV<$TLs0DD^L)g#Dw2b0=oeMS*=;2DFuNPb%}Hi50fzKZGQ+0Qc3V891$2NdLM z(I6#?Mkw)R?j_Rx$>Vj?Ak1e=`Du%I&SSf}=Nd-TGUwEDU^Butl9`1!ZR)_Z0?t1m z*wx(}_gEXQuuktL{2uLRPMD``oE%2AjBIH5mw zMH@Kp1l;JZE-M^elK)y|Zo1@+spzgO;%dw9E2QK!STq5ZE@DOoyzjV=9AtK1WmLB# zQOWpnVPpo}*I}w85pFRtx#jqj@0%+&crLw_{nLP3+*Fd6aWAl(eig713$7c9p2fg$CpdH zq64ja_k_}7Y8g;2X4WzZf+&lAO*|JQ><@Es`py2rT7(zH(38VT(lpht?q0gII_7OI zgZmBiDs{9BoQ2EEo?LF|eNg1mQlN$<>ITD?QMG|jPYL*Zg3b~xS;~|KC5YnYHwLor zaOi4RACR2!b2Wp>qpO`rcgIc&#|POj2UCRQZ}>0;4jBzyUgeKZyjE}ta00xQNabr5 zxH!{o>@+_&x~*4=Aq7EL0(#&j;2#ue2BAP!6zLxUkU#2sz)|pnl^38G&xQwoHS;OY zmDyNZb8wqJl*Ufku$!vVqTw$^SJxlnQdyI0{kGtYgZLa(RVr?$Tnl@Yx<^RMoTL{< z_vK%oujo7QMieI$o1Ly^rc(GuXqW1o_lbLbK=68Ej`Bx$Jj=G~h45DrId%%LT-yTL z(DSiS6zKnSq%u&c0!>>iTOrpy)s=hHDc%^V##P`gbXW2usE?o9cC9^!+<3#&Rneq+c%JIx z)>xW|i*yQ^$&L?RGnGj{D&j7on|d%zE@fZk5T*Xm$<@#pSXEilDiT z!%%sL&jo6-d&vvRDWpumCL!-Bk5Lk}93)-;R-{!3tAB)D$5bVuk{xk;Oka#DrrkW4 z7)$!>YK+V3^d16y8*$i$&jPl)ocpYYv`JpLj50anRw$7CjN6S&U}0`y5!ezRQGB5ZU)ZMVq>sB2eLo#@Z&2SGfU<`_M{Nh2e`Ei$ex3M%=2yzzW7K2tbyRT)bYk=hp_{-A=h zyF<$>NGMj>=HPC=C1A5a4pP5B2Yy&$bs3B+h~?vFdVObA(r6L9}$u~2mX7gUHB zJDZ@(%B#1kuR9y`q=t9LyvOBH8GF$$O7d||ja}=j)czYr(z0uE4sH{sPgDD_w;%eF zM9VR`1#;jCrCFH(dJWQFuLpXzLU|BD7RN5YbI8&u5|KRU0h~V=dBPd^wAXa%Idh#3 z&y=@BSk!|pbrh^5EEIQvt!#Jgo%m?BoTXfZh+`=TA@})H^6RdX{amcm7U8wKZQ)-K z^`ft-YC3X8E(>-2P7O;UH?KwMb@lOsuZ+&Y2FqYOD`yEA_~QokdgY+-Xe4hVyng3( zb-3E*4Fq=myu*xC(fJoJO|A%5B=->!F)RG>n~=}&Z1`v?T*9b3b!#Tdhc2F zx48BUfzLxoR3nT#9KQdTJb28kx4lt1?q+j_9ZVGyx@n#tf%XSb%~n6kiw#iyn>K% zrd|EHt0bJc53bqgts^H3LB&r=yJhx|68l{P2_C@Z;scR4zf>W{Q0YHe{wv1Jn$eHA zhIFFA`F^ZQT+7eT%}tT1)_{pQA^#!EW2X+%)L9&Uo9;JIcs+Lu*d}QR^mqRV=T%^M zFpws4k^6c-WVmSCR!R&ZqKGRxXrlM=|BvgTr705(z)Vik#KOKW;O}^_z~FJ;#%T!) zBTer3^G?yI4aK$r*#1WVraxM&|B&45r8gE+Iz&LmcpD|PN1Q>@> z*DhYe_3Wq;i2izQeI)R8^E1d{Y23Hcc2)M2K!x`6Ys&dkRaI9OZr$Y%XD|pI*u?ZV zmbO~lvpg5No65bCWWQaB;1ae+gwx# zQ@1L}MRlCN86b{72P3283gfzup5%#e=ZtTDUhXs+`Rw0QM%MwbKa1| z^3AR7Eo4z8^DY5RjQ{n20r9UBG_5LQl(f|Dp6{6&WQ;gIx)We`FVQb-918mjZKlz!Iu z^OR|6C4)f^NE4jBOzc8Ka0||gboB}M{?OUam1GXnOvx{T!FK5r6derCCV@PLM(T4p zxe0~NyOJh_JrN{QP7w>|yd?gE z5yc_=$t*mq0-HH9s+QZ_i6v9r)yeO@yktFd?1Fg6n3Fj5UT6T#ENDM$zllc-3LRz$ ztRx%zdPecI@wvU`H`Wg3&{ENt&wDv{MNQO(H7}tbox_p3eoV!wpT5D4e3&aR{jZO9 zR&rqnGT#%!i*?tD_8@N81@4dE=~u?Ihz30$6Mt8mz_V9C9XK&rrU(o;yj)#Phoj{1 z;h$&-_VAl&f(ZRWgX4Pm^V|3n zfOyWAQu*T=Z;~7RrS9aKP_yv!-ixR9zFXB}wedCY#o9yP!NpiSFp>x8e>`7fOP1=O z4{Con<zlU2&;->9BAnsM6rftlR~Rw9FpgX$a}zuRN6Z{_ED}q@Syt?4WWsU*Sxz zv^qv)$xHClM{l<{&zN*=YAjZdVqC6%*X~JZ(Bx{7vRXogFYj1@6qhfBTV7QJjW?fj zoa((Rg2oWfIo39B-amjSjNBhs%)cD|l}M*&#_G~>6V|#o@G15+#RBT84iYC6M#yt@ zmm9uD#ysXUMftNI%UMWeUFLs>J|2+0zHD4;OOx{i!fymstX;bBtu9ZQBg4#^aUTfg z=2Q!u?|GzLwA6$x7&mO-#FK4RV+LkiY1H`?>!x*%t0P;Un*9Ynv`~9?7;3#7X`Xqu z5=)`qaBtO;8g8*0iu(QZ^ji^H>#WTzZ}D`mVsCLr<_X=mxdww-=x!h3J@B&Damj06 zs9qnPsBYO3mpSwz6U=NONLXgXbjvZT_1`n0FKYsZ*hdxPj>Vkt{>5BmTxf-a>MYzO zy6e18K#84@=*|PKyw~A|iuV=eq%bp0Ovnv44Qf>5Y(JY=a|&63#D^jA`FV-riVG~8 z;!Gl*)k|13In4<+CMB$O?xG#uPJ&?mtTh#KG|=L=yHzqWqhJ3BoJPJ5y?54Fkb85x zMoT#x(GtJxp_Te4GmfU1k|lo>mC?hQ=h%f?lN(o|tJMaa(QFw$m;msE&8EWXUTVnc zj&nv%9CMIW#7N7s)p5$rz!@edQt2ABr)XXY)oxEs2xnMv5WQWxNw1QQmzW_UyEZ9A z>*`tYNSK0?m2mbf{7qbn@Jc%;=H~vT$2BuS6OC^Wm%+;vl5Q>4TdVB&0fsRMM2CN3 zf)Bus-HHQ>tyd&bbst$_5b@&e891xi9hTWkP$%O{G~QWFtzOYZw?t4j96&cy(Dj7K z%9`%>`AUZ;xvYqZwZz9G*DiAb)WIV(dMGIR+2ag;=k1bK){MaD^)Z1Mq`(%V+r+ha}(1r{~@eDpF!EMeeQ zPvI!;4l7cV{q!$A&2DHeZ(AP>`tRYCh_n6KJKa;I*%nf!M$mD3;8C z`QVnhtqwlag{$=JS^3uJzbn5StU**Q&<-qlJ!!LB7=dw6<*?hEC4kkL7-7#CW(_z+awfzuxbXZq&BzF~Of;0<6#BH`+?ybps?g;4=MqS|y^;>VjXvH!}w;)5A~$VP<>lT@&cSJ}(f zo2MoT>hG1tUz|tCLxW$X+v;5xl5wrNPBKtLaG%Ow3#k`+Koamz5()w=rqdRanM_fr z45gpxc3>sfEFA?QP4AVD5Rkq>gIPPy<@D3H6VpgaU=g9pPalvAxVQ|R2BiY~p<73Z zd+8N$Onr9PiS(n4XOZAiL$v?l>qDEu2Lc!|T;pfs**oA`m-;*cwZU$UI6|F~N@EQ1 zCOlzz8zIcaV?4rf1}Nyloo*2FY#@ii$W$j`n51bw0_nJpT*q_Vm`!N4E6) zuIlQ@Mmpwr`#3LPss&i4s!QK$f+PgLJKR}ngpxgP+4bY)Ei}DLO(`B^J~_h*GBd3` z6;5;JxR7o~4ox_6H%Sn~CLG5#Mu_8swQfoET7FY~PT>yqnP9}?PR^!s?0M#Tt7i>V z7>X+Y>&he^A6q)+IoburRU(6&K5>6aK`DndF<&gi@jpTB_x5Ran7Rk;W3)T7@ulf2 zOhjW+FYGOmvB$q-N`+uc*EHHF_ZUqGPjP)2aP=q$D)vi3C{it`%v1VozWuP$F0^U& z=!Ty^C}{i(>H8hF|Grj+M~)&<$=J_H)E^q@rc1~@5q0)y7dQtrKbrEDw6h$a8@XSp z)omfrSh)Q&JLqcFzou%{->z^wNVBsaVUgvr&mQ}9QyG3KfGX)Ym1ySl9B)n^XkYSz zSmM|Nq1rOGolNzGq;PS?@%s39dOl!iG`eA{ zaa%cn{FmW%eCSUxxF!{B=GwGM@styyIP+7o3iyCbVppA+lVO zH0ayF9qnghI(5(WAYG-H5YiN+LLB*7Vc;Nf0U)$jRcg_0^lZZHB(O){cRz~hoWSql z|2;juL!1i)+G*eW9AnK1?v7IvI>rFdOMQFPmCz#!JzlEoKU1znWj@yJ)&#c4^vJhB=GvGY!ASWE6XhqhCL#Sa>bKj!2Poh&WQk%&i2rCRiJm<5OvAoGoeYL5Z~^qlxy<6$^=1H@qSw3X^-){&ckC~0PXa{F#(NMNoekAQC0puk$;OJ1#9u3+%9&7M$^wpBm_=x8Rs s%;}^qc53L%3sobwyKabY3*+Czgl97%f&{&Z;91#N;VCI40Fv Date: Thu, 16 Apr 2026 16:04:38 +0100 Subject: [PATCH 08/13] rename workflows --- .github/workflows/prepare_migration.yml | 2 +- .github/workflows/readme_to_pdf.yml | 2 +- docs/USING_THIS_TEMPLATE.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/prepare_migration.yml b/.github/workflows/prepare_migration.yml index 4b65cec..71ec8d9 100644 --- a/.github/workflows/prepare_migration.yml +++ b/.github/workflows/prepare_migration.yml @@ -1,4 +1,4 @@ -name: Prepare migration from PDF +name: pdf-to-text on: workflow_dispatch: diff --git a/.github/workflows/readme_to_pdf.yml b/.github/workflows/readme_to_pdf.yml index c53c3de..964112f 100644 --- a/.github/workflows/readme_to_pdf.yml +++ b/.github/workflows/readme_to_pdf.yml @@ -1,4 +1,4 @@ -name: Build README PDF +name: README-to-pdf on: push: diff --git a/docs/USING_THIS_TEMPLATE.md b/docs/USING_THIS_TEMPLATE.md index 1044b53..0f81379 100644 --- a/docs/USING_THIS_TEMPLATE.md +++ b/docs/USING_THIS_TEMPLATE.md @@ -101,7 +101,7 @@ This route can save time. It helps keep the template structure consistent, norma > **Recommended**: Also fill in the `source-metadata.yml`, even if not fully. Helps track source protocol provenance. 4. Keep exactly one PDF in the `legacy` folder, otherwise the process will fail. -5. Once you push a PDF change in the `legacy` folder to a non-`main` branch, the migration GitHub Actions will run. `Prepare migration from PDF` writes `legacy/source.txt`, and `pdf-to-markdown` writes `legacy/source.md`. Check that these files were created before the next step. +5. Once you push a PDF change in the `legacy` folder to a non-`main` branch, the migration GitHub Actions will run. `pdf-to-text` writes `legacy/source.txt`, and `pdf-to-markdown` writes `legacy/source.md`. Check that these files were created before the next step. 6. Clone the repo locally, and switch to `import-protocol` branch. If you already have a local clone, run `git pull` to get the latest changes locally. > **Note**: Alternatively, you can complete steps 6-15 in GitHub Codespaces. On GitHub.com select the branch you want to work on, click **Code**, go to **Codespaces** tab and click **Create codespace on import-protocol**. This will open VS Code in a new browser tab, with all files loaded automatically. Note that this uses GitHub-hosted compute, and free usage is limited. 7. Open the repo folder in a code editor and use GitHub Copilot or another LLM assistant. We recommend [VS Code](https://code.visualstudio.com/). From 1f4e69a3fe652d0a1a5f4f821293aa7d22f3bc0b Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 16:07:10 +0100 Subject: [PATCH 09/13] rename workflows --- .github/workflows/{prepare_migration.yml => pdf-to-text.yml} | 0 .github/workflows/{readme_to_pdf.yml => readme-to-pdf.yml} | 0 .github/workflows/validate_protocol.yml | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{prepare_migration.yml => pdf-to-text.yml} (100%) rename .github/workflows/{readme_to_pdf.yml => readme-to-pdf.yml} (100%) diff --git a/.github/workflows/prepare_migration.yml b/.github/workflows/pdf-to-text.yml similarity index 100% rename from .github/workflows/prepare_migration.yml rename to .github/workflows/pdf-to-text.yml diff --git a/.github/workflows/readme_to_pdf.yml b/.github/workflows/readme-to-pdf.yml similarity index 100% rename from .github/workflows/readme_to_pdf.yml rename to .github/workflows/readme-to-pdf.yml diff --git a/.github/workflows/validate_protocol.yml b/.github/workflows/validate_protocol.yml index 8af38c8..bd4ea95 100644 --- a/.github/workflows/validate_protocol.yml +++ b/.github/workflows/validate_protocol.yml @@ -1,4 +1,4 @@ -name: Validate README protocol +name: validate-protocol-README on: pull_request: From 516e5ccff92f151c0176a1f2c5ff0340c7cf4b24 Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 16:07:42 +0100 Subject: [PATCH 10/13] renamed workflow --- .../workflows/{validate_protocol.yml => validate-protocol.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{validate_protocol.yml => validate-protocol.yml} (100%) diff --git a/.github/workflows/validate_protocol.yml b/.github/workflows/validate-protocol.yml similarity index 100% rename from .github/workflows/validate_protocol.yml rename to .github/workflows/validate-protocol.yml From 15759fb1accb693eacb5fe5d7e44b58cad06425d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 16 Apr 2026 15:12:41 +0000 Subject: [PATCH 11/13] Update protocol-template.pdf --- protocol-template.pdf | Bin 31589 -> 31699 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/protocol-template.pdf b/protocol-template.pdf index d2997b4926c7d5c90cade28f0dff156c85df5231..dc56efa4a7fca80cca2a9428d57141cf7ec0a79e 100644 GIT binary patch delta 12301 zcmai(Q*^(I{;_#H zzFM|(7ea0fq4*5(!g0#Dyl>BU+HVhQYYMV@6%<}+7`Fm4-Wf@`GURNFP>!pxU9t`u zFlPS9OUi>0W2|)bdIUH+$*U(68nOXIf^r?{#G8sL$x}6U86SD}kn=3+=dgGz-iW-$ zgKpJ;p-RTcVrIvJ7#~bv1cL@q$}M+6jHpRL9O1}SGo7U^>#w4K+9N-Hyjd{~6nfuj zmiJYj9{QMtq*xF_AWTW7K=FgEZ24GtVj#VDkg14`=Q zkoaw#nN-|p6#}+j)WojYDze$w6GY{Nw~2*aoq1LNu67*8Nm!6NWlFz0-8At(f-FQT zZT`~urXmJx@7A+o@k1JH>X|XriGDSO{M5(+6m&FHob|^r;7Bffy~tCF6N3D1>~MNQ z&eR^ZT3`94f*c;nL9v*n4(lE#2OMm9pVyhEKv|dP8Sn7SwK6H@K|gF3M$(I|ZzR3K zvBm?0G*|x*WWh8sX}KC$FY0Abrlhu4QLUznRsSmLkjK~^Q}C|!@th@DEp7H=GK#)D z^_a$-^`7}T&@iV&VqM;lhMz?m=Xd7JV{$;5zkE-Gw_ILU8sW+Wj!<-kSG5OcdDXkp z#g19!b z#4+l}?e_|JUS z>g#q=AWd`2%#Gj~SDuI~!F7|Sf^Dwz_@I?teeHr-ld)`j@d-WcQMugZW^`6x^|u1O z^(NI27>pCDyzaO@;GEFJ&sL`lp*f4EwWgb0tTKbV)Rj z>S?0&`=BH4(kp(REJdw8C}U%(9py;gta8%{xHBrHIm0)vSZt4p^{9_?0EtS*vIrKI zJJZnIP?KK0Jd|NR)#U0oq$#e#xiwX^wDGu{S9f8=sm$&CgTX%TgsC2Mba}mwT~2{d zwZi8R=5tj76!nsLsk1#aZ!J02n};0lp0jw>G_u0>b$cfe&vW*X_ojS@7!M=qgXCuf z21^_wCdZ9i;NW|U{vRi7PI@^UFMf!N}U*T#S5`*wx7bq*7pWC`E_vhwr8IJ-ER z8U2CrSRdA$vir?})bm313TpiCjRYEt8_g2*F;%^_FibMm*$QF0q;?1U+9|D5TkW|c z#7HVR6nFZEqtR`Kz9SzvM>ebBkaqKak<`=cT6y*JZ1s76koEC)$nO(P|3NH#{dh?F zPP&HY-xYi*9|zpZbqls%@e31MfimSfvXsasJs7DMhogY3a%0WG+k=7u_+N6&mvv{gE zq`cP7QX8Sq(OSIXT1=6i@>Bk`E!(}tQVNI72>J|NCmX4Ho=H)2nC7Q8j~CLkadQr5J0s7Fh`wMk|XK$%i}7lF)Y1}_o4hH4sv#QZ+wNe)lc z!ESd9ATwW+^e8QYcsC;Dl|L7*ILB8Lr%D}TwzQP6wl-VBtM9$4KW-j}Sz0Vll>q6` zU?6M6*<+_Ta6qylZyoH`sSdupPTi@vxn!rt#_$t)mW2hj!Y6;|lgt&5w>Ap1U=Ycb zv~oN8C(g5*1fQ1EtjCaq{WVOHZlbgq8PlWddN*$+4%^uLJ%0PqX4*-c=#knMej|yK zAd`TAkq2?8Aqsf1fYc`A`c0h($qpBtPiQH&LRlX_16Zm18-aqEr}i&XGlWvvES8+~ zO|$`tS$P{seaLn$c?bPa1+aD{-HIL)F?>0w^sdnNslUXxk|QSk4E*a@U3JR* z;u~CkQDcp=CvV)P-}}TZ7&c6%e6KtT9x_#IS=f#q#j%(tiRWRdKp00FmHSH@szerV zI9?t(33Jx+80Q;-Pq3$}YeH!GGo5?d=6y5K***;zLd;*%CGj2TT61tgQZmci83ncJ_Ufm%Me%wJ|1!u?)0mkA>NzFf$U%_aA4H-E)0} z2Dx`yhQ)k3Z@HMWD;^8)qe@HQ^Ja`i$rRK_waLrk{e$I+Cn@G$3=HP#>2hO@x@A+H zGwGI4D+?q^%W0pYvuM*?ICn}dPC^S~PXxyw!u88Ul8XwN+#M}st%6y7v2U}1w(1FU zb``i>)E6bY;;YdZcn9)D$RtB(JbAJceTPG&C0K9#z|`!{qYKLsuB(LJ-L9q69mZ*< zbFd`y4c_Vcqu5y>*R-oJ&QXl;bF{NulvVab&(OUw=K&LX(knDKkokX##gclr-EXGnlJJ*7akyu$rXj!)cH=5+7Y(o~+r z(_6lsT?`CTMh>?w+V5N3#b5+YNLS9ERjWS?+Rn5DTv|#%8{V<;qeh8pywrzD?^I`+ zE+KU;5|WeAaRv+dQ;Z2~`4EmAv#pL>3!Ty=O#QMK)VUg*fkIYjHnEVNAJ5l{9^B(P z=lB*_lpsLlC$Y4TCazIbTNfzZb#waEZ` zyK)n}5B-mDESJitQ?E;|z=~T=6%3ALMMpVv zkJHeOVH+YZl(ja+>Le&$`5GqQ2@EKQpeG@RO4lMVq(9Jp$-1pOzu1s~5fFkgW)AR*9cZsk8~Cu<)m@wI zLYXjA%g<5(3PT3KNl}^?i!7nFC||~Aw8FSGI9j~9GFAJEBVB`Sf4q*>uBtwk+f+R_ z{j~R^r_UvYiLva)1plrbHj^hW-UFHy?RArEF)o^0e;bxVcqo|1RhT99nHQ^2P=<~L zfZPg57v*Pnhl!iyn7Rxoqvk^Q_>qCF91h|R99=EzOc?Q8S_aLK;?9PgPV;Zx6qspf{!B-to!}$i?qpYcPO8VdkgVUbZGto(a+O3{7v{a z=V+0WT3I-iOqyXn@cGD=3?wz{}B&l=t9p#vBj&*0o9?-?Re2*P{@482PDxCc?yTk95%+>uDWRTKy|Biza zy!=lzc?D+!?O%4G2ASv289^5?ZxP1K&h-DS!~aC~#?~-g?1YSj|2N>}O=5t@19JY) z8PH3Vvk4MJ3cdM`T$}&i2IP#qrb=vDdn}|G0Y0baV{A<$y_KI)~J+oO4ET@LZ~(#8a4bz`_RzAtbYM;t7 zrU8;DGVbyk2auZ!KQ1`Sb=vQ&)C>?t`z{@m==t^aHVZiYC(#;|P7N{Iw zCebzRg#Fw=I)jItr2LR{CO1D(*QG;q)@~~zKui7g1)}0v^O{tPzzt;iU$o=pqJkLz zOPkA0E0Vyq6HT-(lCbGRNhftnM^;?puT;3#?M^LrNf@5wZblbNlIK>_KbSRyT|ca0 z8Qgrk62{faIfr1FEOl6NR-nGqz42l)*ti~T?$y-h!S zuz~hG2K=KgV@y&uo7kztE^#=jVRmNBro2R9V4Y?7J0XeitKN3z`PEyuRdM{u7b3z# z&gs{a@-P+SW%qLfWw7Z%hi%%}E>JT{UpN);YfpQGq6l-fn|W<7Y0B3+G&2gsZKcpP z!%h7>mIMY&DgNZTW0gaKY)}e0GBX?`h*p~ve6S?-i99xJb(;dOMjTmILre76;2zgfZ#GUZ4hT>^9x(`ZV zP*B{UX7>f63>Vmvq>Ie`zm2V4(uCcAUXIv{Pe@vIMPyh%t8%cYWcj>=_R$>(cqm#q z)?vKZch89XqF90;cWwsHv)LtP)|0q<(rxdg;*Ax`pR(T~X?xbzC=}f8>#!tqE>A;; zJ+1~ZxduL}2QBAPul7gJM;A|fUnU%nDF{WQOTgO&UVKEezoH^^%80EAqC;I;H}+`} z?=g0e^}1fduRN7L8Iy9GM^g@kAD&kWtft()81AFSH|ozTj-Lg&kEEas6WO}q-Af`O zY4sIjs_(Mf%ykOnsU^P5aV`%G^q7=9%}?%T$BzzX3qgbkED|9kfJovY;&{<$Dhb)Z zd|+XEKC$yG3$uv?N+vZ(2_e8s7;hAVu;u`vyol2Qty-w6{g;p!`hN~4*-qD#KvUYl zL&BMaR0blzEm{Gw#u8Mh?_yQZ1}HT&LbtL72IIkg(7YTOm4JWdX8WM zGv;MyH{3CUoEq&fm}b1|P)Uw-s7Xqw9Kfx}|8yzj#y3*;Kl3dRQx8Da{!>u*RnUTH zIJJsyoovbQ{#?mbfJsB?GnR2D>$K^TccsS1_)@jOTo$0#oS6CkTq0xa?zK362S$G3 zRA<^_rs5Ktg&%pX$vU)o@aDsZ*8@aIYoSWMnc}!onYLfaVMs5AnZM{H!O+~c0KRn+ z{9UW8Fmt?QJk}qX{>tC6ZArb#=_zeaN-ef|GHlez%u4xOa#gUFgr9Uy9BO*ltF|S3 z(o;&LUku7#{hb#)+<7O3t9-xXS0}vIO$~(oQs7U_i#_xl;6MuwEktW#MY9v6QG{5} zw*wJQ8^&p38{)ErB(p^J7mmL|vBTcyKUXKLYy!8y)#uEHem!h~&35$I(@_lNQ#>_7 zYz%wNs~ckKw2&0U1`tw;$kOc6Hju&rPKhrwjI;w-qGwnmh z3YJ``82dR#)ZyWPERq|f{ex>ozvWtkFqIw0sYK7CSGkC{qb>`Z_*LcGHzTC^ zF*Pj#AVip6$p(G{Eoiuc$yNUvkq=ibiyUsV6BA#a?@(!vr7w{c5yMf zmBczY-Hs%noYWUQrleaP7Da7M){9~eb2+({Oe0Rg&A|h$ZuPAU~g^jmxm`_*a-REAWa^9UW~T~aDzOPo8S0l!_`sI0m+nVXT%~pc{D5Y%+RM zZnehtJYFbjvSMxGG- zTNuI~ew0sVoDaw53L-ppiCmme1{8we3Bj*dxz80;R#}9Xd{ktK11n*ZT+9=kU6>)H zJcS{BN#a#P3Y2++P$hw&&QG%!f;JXboY}Pt=wyHnxEB17bfBk{7^57Z0N%prM4Smya-0bf+}C!q=yWH+feDR*99vZ-S*@{| z?>2z1zdoZXKbM~iDIDhn^?5xTx6PO*sY*~JIH8j>BKB|W!wZ~mRkt6I`c(IH|LvkT z7A27Ve>n@q3F|>dq^=8^=XB*j0-_RtiRB+(y&cat1B2|sj4H9@19iK|2R4D z#!PH+RCF~ht0Tg9auOVuSt>ec36=yw%X*DWGAc>3&By^d2sBHs&V`pYd#6bgM{mUmN6j_{F&9Fj`qKq@Hj(gS&c^6K=Pz3SBA|wsy6GVYS zObO=|wu_fY&`7o*L=qJYfFTYvwz?|6K5uB=;B))qt61*4<|HwCwHDFVVWF^*Gd5}Z z{dTf_+s->Q03x5Px{-zyoOWQz_c9a0a%r&lJ!RKyU<>cE(Mjyaf`rcu z>nM6m0-GQee01l@lK0X`6dRjh8r#+7*URtCvkM9>}-J4*hYBRecp?GZRFW>Q?5VZ$u!`13GGqe$Q16Z53FPpqSXff!tuEdt zwXWAKxc@XT{&DUNQYVi;#4N%j76t&;Q*(Z}a6FlRc%m+`{FRy8~pnvvlL;oOe#DUv^YTrVA zp=O$#SaI+J7d%Tnj6CU&5tSM1~hb#+We}lW%U4kx*2$iJlXWQnmF4c zx|366-04sj@;x7?5+SUFRvNZcN!ci+%Yh(4u1B+XjtW%=-vUxthHh<1AMUsa&J$7dB9_+m!iQ`LG)s# z(aDxNOYH$hbnY&T{ovYwG~KL~kdZlly%b(HPNcn#Gk%BA4FkkTg*Z%#-eC{uRDh|H zbxvt}`*-@$LChA7W||oqPZ~PEE{ugx1Ku={ih>jB7cnC;aaS$TKv=;fu2EJ66_r$p zw6vXaOEX)BHuN(~mU>D=6jf4ErmAjL`4u*~7OJkre)D!7mYN)Mbm5I={?X` z%nTY@!1X892NnUZcBh7VhGv>V2;|=w#XJcyV$@x?(#?u^VwkkcukeQc0@*}7K_>h} zkfv;PdH7ynaa&jNs$3Kd%2^4mZszcv6xd)6IoCnd2@kQ{T*_ldk-uykbd^= zWbZfM8Nm<-GDXl1*o31TF(tPwd0J^}Q&cn=+mV`R>R3ZBK`-#@d?~$!r05C!Vuo0l%>fwG7fZ~)DRXbj~~i5W*red~cK32g>#9++}|>3AFMFhCX`_d|Z=A3^^Nt(6nL23O!DnL1p; z%uI|PCw}Xl`>9XD#}9KgzMc7IUaBvJMcSDneeeS&pDCV}E%b28^JbXcEV8k{=P$g7 zl%oNU+g#?%@M01+V_sQX?hMjnp?u8*kUn*GRg*Mmp`aReltP9G4j0>8&2)SGbsvsAW2TE z5aPBouz~^B53hQi=kPkE)PoedV;usTV`yHWaU;E}?%#`H0RrzBv42)^ zzQ?sB13Pu|WZASMUKdsKnH}s;Y|r#8NscHj6wP9Q$YBs`;U7hR--(`kodrH!Gpqrk z(#B>>MFTxCOz5q-T$opCG$gMk#k`bil#)dY_GOM`O&*~3xbGD>`RZoh1y&$r*lYB! z4yYMR?RgyNH)yN1Q0vY`l${DCC2x}(z3}8}bqwS+hz5)Nl9t*7FQ-EV|(I6;hkuZ5kMor~>NIE!h-a=MW4r2d-a$d2tkH zr^p#;1u4E7rYxJd)`awc4lJI_DVVi1t!TA+xmlpm1P6G_Ho|>xomqr{FX)95UGWlV~5*O_dGn7BZ5tF(Zw9Mp6HQt_wWcrL65jw7D1GzP&CdBQVj8&7UF;OiuIhmn6u2E z0fP%qhGWDILJ`M_LXpQ#lR^NZ>+a7gNIDY85^J=tWQbAuljfWxUjfOCaycL*}-fy4p!A%B9w?NY<5t%kDD9c|0@?58)zB@YMdL ziISa!jk~3bf+^Jv`ZxQ%o#<3?55I-iTfOVYO%DUIQA-W;gd@+UqC0o;TM1YF!IA5R zg;t22(vKP_ABp+ba6g~3ujyfbuoLxt<3o96C5j+&-$@zR{=NrKkgu`Itq1v#9x zr`A3fVhW(n1mR6{(?e}{xyYO>F<4QpS?wrw9$sNfBB|G2vaJaez}pbcn<)Bn_@lc~ z2AcF@zEmYz%Y2dfFGX*~v<8Q#HUK2%@dB9nk4@)4`EMSQYHizkyRDh~7&>n=6JFuU zSVhUD9C9pUZn?Su<*@Woi3#Ige+n+J8rx3O_#3u z_A^ev0ylquL#zxv2VR_yotEiW<(S8uh7ixuyJ|WT+4$pz(U*C`=lez(tu2Y95HRFQ zXz9|N8_)LQxFHOjb#2OHrp$t7#!K=iaclW0UC#YCm~wGvv#{dA7EEfb3#_wF`#DjX z{IiU`cVgK`NQk{(Nv``xLZ*N3nP2f7pX;a(;0^iyv&FZ;U4uiV-m|rerNteRGj!kj z<`?8bcl#jMftQuGOMd%OwejJRI&kAUzu1W{A(yWECZfr%+~J)CoZcomN z?R%CyEE<~K&50NPU~+_IT|zG6MXi)kquGq5B`0cL^gi0|;}j9`xbTmJ70^_t#pz02 zCzoOy1pElmV$hST@{G*4(;ZsoNu;X4Z6}rFp41GQLME2tK}^m7o)Whz}$*#GnM7OBTN1mk6SX4PNpjyjI{o?7ZPx{6CBtbBfJ}8;$9TrLR}(N%e4+=&$-^$efW2ETJ1o$*LiXY|6t;*$S9G;AhSQUr?y(Aksg$?Dl$^YWCPr{b@QjLYhW|!rL?uXJm0mN^wlCbr6w#{x z#zUXx_b+aMeR?;9FQr9POJybF7g44z=^0&kl)}cNrsr1O{0fr>^NteRS5&GLsg~nh zKS?jlT)4|*W!&ypu?7~mj{=j>2>xjDuh*@g@O;Iw)8=u@U#>Hft&>@<3vAJDxV-oM zi3~+X_#=!&M?hfTlDkbWQhYid2{Y%@m*0LfF6PQYkOJG$SID(@vAA~Iplkh9n8H@6 zr1vORXFTiEuRV(=X(DN`D&Dq1tlIzy^O7}?_FLY7{bs@`>j-!skU9~223-yV&!6;C+W)xGypmFB~92H`;_KR=kw=3cJTpj&OtRIwK zZZfe=`IBXp$FFHedCK*`1M6Pj`7)61ESbfzoM5Oemd>Mi+@k zYCc4tKHSA#QwLnXyYJMJ;CWAr4`z5hK!=k*pkCEL3yN zIkb+OCTvdmL1vtDRisPZ)0fiPbS#fBVMI=yxC7aRRvBP2zii({Ito%Lq;!C2E}9Yu z{5Bg7QWxU{BaU1w6dtZnz?|wVBU(@So4KT7Kq9%;Uqg@nX0Y=2aAv(ncuA%-Y*2-j zY%sI;X((agqEfb3sH~XcUVkVUFp@utGA#b$l!lcqU^XA5OZ7b}UR+GNEMexDBk!|3 zx4#tM;rM|{7NuxJgYOek#gQyatu7?gdW*+F2-ADzqqs!x5CN8sbGgJc?F5wKVi@?y zqSILt9;c^n3y=&U@OB9T{u-H#Bd4>`V%B*`S zE%3D^w}Bc^wPvw3Ayi$d^n(F(xUHtGcH#1l!!-ayi5a{q$GXHw+Aj6%JPQ2>DNX34Asn)eh zb_HPQFK&7KLO2uQ&1NuyV7vEjiYp_wgo1p6YOuL`V0*y4Yu~y2!SERJQh5-1*q|7; z7a{@z@a<`EM?bHpvL=+J_ot?-D+A$#_5E|BkiJ&l98+y|i-97_>-*1*p{_6e!#>?s zjI4Q@H-*XNQPR5(g4oK+!hNKhmpBFI5CJeHYR<D+DeG`<)*(_oZE9n5*g?fHv&r{-wgz{y87b^?GoBV5$r3A5aonB*KIKcy)l8XJe^9aTLkRp zf+m3H%$1dqdbyb@hZLO1wA*X-=!TgeENtur_x&l`A6@&4LxLo>xZKBvKM)#;puLry zbW@4g2rZtL!QqZwpF>3e^!B}dC~Kw*TXOkCV|B#Kba=+fbg)z5Wg3^@or^|Q@EMtt z7YJwE@Fb4j%_JM!zb2ee2RA8wg$I^8^nj_fjBTfoe`Cm9UbDWvogSRs(O@}gPgRs{ zDzu%Vi1CF}u5zr1A`3T()>H38$7tP9L=S&NKq!!Yol4Hf*?0t!KgV7TQD7(jV%V4T zcJJH|-wq(zD7hI=gm{6^Y}>NBKYyK?XzXQp8y{BrL$T?iwU6JN!l#Am zdd5jkTwvyBTgwinJDkReWh$k!xE=bAM@4M2bo^fPlLBV0oijLfN?$2ojo$vb+>uxruC8q7eMv&F9sLGr1500+#OI9ZL@c5+qUig-*c|MbM@cWSU2^oxyG!T zYvBd_=ovho8=NbFhZ0!Gn6yu!ZSNaecelg!s^{F|Gf#WQ>M^fE0q*OG3a0sqsss35 z=m-Bt=j4C%JMZmo zMtTzq%ge{iezSUjQIy3Sy|B6oT}gQH_P_sWWgwLD)ot!cL<$so;<<^niEJDHNTT1G zdoUydT^HZbx39v9Re(F4h1BY{w8pNJrosp4upNG;D za;En1)c>mT2%zH(RT&qyv9Dq{fqsM$&;|eE| z-j-YgR1|LiiIg|N%2?0+y@CA5r%FuSKE=n@*I|AJV=-{{MI>!c_hQ!><3phKlZ9iV zPC2ZyY;ojr1J+N^lh;t!L*?O7#v)Bwye9{i1t|1JTFB>RrID;m{1l0)^se#buBd+3 z-tfP>#ssn;`;{n%+Hk68It(?6E6Tz!$P8lXj;lmb0<6g7+=6-BU}xoFS-+E$3;wiw zC61Y&%JRH*5Vs(T7ZD4{gmWFeISX@?yGhr|zd>EY0JFvLxF_B75vk*TQbG!px>-Av zxUrYjdLb7S@0uE{2T8h2zAL@ua2DcK;LNQqK(jX-XO(VENV1bZv1XMCs7`(8h#&f`%Q0O}z5y_|{>~NerG*?EdacdvCb1{W4tEIg#*$5CX_o z`Ou8X0vU21V3fS4Pn5R8@fnseveL5eZ`oT`+s;^ZSW5NgRdG`97irz?hvj!Q{!!y_ zTx0G-L3O}UGn&=?aEoUVYOUD(Q>Q5+&XkjT)J7;Zo;c$|=iiqniSJXpzLcd>qB7i< z{C=eS&uK%-T|oLQah%cRd}FBs{Yb&AX483R1Sq4WC@`;7e1n7gXz=p@27{7q5g|PH zy0N8^F}-GaFw%Ui+0AcI%UqRPV6tdw$h*7SCJWJN?$w3`RRG0%noC!ccwMi4i> z4`rW6z?YizVT>ZA)B)iD)fgYAtGl zksZ9}`jFO?{UIk}?+eZ2kD`=UYB<T-R{WoXe=G z=cvc7!44@+J-j~;NFjZJ@FL@{M)O#r0nxRr{e}K9EZhhHQZ7|dgF6gY@jE(o` zy_2abCun-d5I5VzqR;8eIF<_Oe&$UAWvu?p4(pxVN`FTPR$%UNNM85IJ*qF=kWx(Q z1!JtQyxLVQ8?3C8I-v1^MLnlcI^f0{6px{^gp1b{dNm!zc2YhddZMCgND1TSa9Pjn$x!xm zA2q@fT+C6+f(FEjlqL7UZ}wmHP4IxUF@+XAtkZ2rK*kDe8zr8UBmrv&%gc&&iK#`~ zr-sjoO;Mtelf!7Pm6IL$(RWRPrOMHUb^%^=eCfQd6G47)GXpNE_j#0i@DVCjwMrzm z*%Wt&P3TJ3n%yXHuk|sE$UiP(x-qEb1^2 z34$B|t24(TOMmDB;-I%K930va8O!7bC>M_t$jjVEnidqRY*!v~982H9J=j87yHRY# zjENb(98~%Iw7(nb^r>dh*Oz%dPosq|oNj3>oY;H`=kx+r5ZWsz!Fr${JU!Q}pz+c( zCM2dA$tkVMMNwtyHh7ACMJ@)0KSSP2H=e@fhboOPDkGj@vUq{V9@p;OtU?@4Omq>L zPW`Dk=q{aMpnP%H2JEYj`#dsHyqu53b68o0f0rEE@Cg)pg2IMk9#*Y${C{>$tIC}q zqtwhynK%Qp^td*(LV472k@6Bi7)TWnbKx;;XXeCNre^mZ8D6EaV@clZnky^=VnSwg zmNQ$Poa@>3H@xnO3__YhGbwr@T(o+IMY=PsIt6q6GqRmG7RZvebAb)V5tc=0ZdAWH z$n1=KF&x%~o7X6-PAh-q?yR9|7t9)neVg^S+fAUGbE+cdV!SBZ7hkQ!A~{km;w1fs zBUC6kG;lmbeZqe3hGd}hxLBEsaak_{l6qCm^%-TGEaFg2wR&ZnOb{0QbI!gJMXU;moCY^3 zV*0Of#*0|xi@WxHod)v<{#A+SZ%7CGub`#iT5e~|n}*D(k3?1LRrV$(Xl@Ay|Czs1 ze-EdcfKI2NPY7!|3T7wzWSM)lh@?2e2~QC-KY+>8o7@MgZ3FgB{=8xMl>_CzdZ`~s;o^IDru zxzdyX9moTfV#c45sLZZ&F~t{i;*fkT)9`ZZI&wzVcL;GH-sg*;0RWGh=f7VFRYPLw zKoTXL(yoMeucc-9j456{Fq$+lwh%4XRmhH4>iHjyOWIOo@ha90uNMOfpC9!isy!MX@%LJp)4?TzGAE_kVT|@rVZ*>Q8gy-sW=CY}iY)0Pi+2^mgbIGj_C|=*6 zVy=SVoeUrbD6SLE{NSs+WiKynv&~?JPU_fjU-6C#b&*?_pahidx@ZQ9v zP!i9oOzv?bVa8XHkMiv3xrRn&*mo&Z0|G})v)**2Mhrqm->IzG+L%Q}?|59g-zxF+ zjl&Fa-bgRqk-cB>u{*;P@ZEQOGeBK!;LTNY@U`DdXEQvZQ*T)ynk<^dC$VLq{KmGW z?j1p3%44UKY->EMW?II`J}OH^^ISS^vSEKU#O026PLsV{C7#htOyf|F+FY44XO&F* ztH}j+^WQA99jSU=?Bk1Uv==?(mQZd?3WGwjw-Y<<{qVO_Ca zlZdi>%5mfn0C?laFHBE+JtgS-I|Ud>LLdhwiC9F`4^voWG1R{p%B9W2$H6M7*04qo z1PhBLJ}y2dEuOZa?N$OvW0!Gw|M)93=1qFk>daH#zQL^Ct3{J)k$HhE|A%zEyiLge zkp7P1v8Rb9wqZaZYmZu-x0~oh=ovO`?#~P1@Ji){xNfylQDT=Ze}oU5BahI zFxZ2Yrw#>)$cK?HuDZ|PL7);6a}{R2$nn{`o4+SL!P@zcCGP{^N4Y%vELAa)H>hX*XpS4%{fUQJzoOXi=MB6R6$D3@k7 z_nQ21hMbW>w?3)+eIUD=k0&)?8m(~2kRYr_pTG?m>U*)Qsl@-Dh*5_*D}3(0jYl;T z4^IXlMid^%mZdls;s8s6EEBZ6k*toFw-gB4QUwSOl2|5?59vBgaLy1yDJ7vEggW$5B^X=^|JXT{ zV|78tcGE_%C;41XY_-fk39`yQtfmnShrkiZ20K`^J#^$lPMO>S$O zDNPVHKo;VjR4hdKpZ4Hja9mltDT=wa44&@;8g zE;cm;)+II9DaS|mG>8@&2pDzo(7((oO57`km5?6{5W%iV5YwnhfB~%bJv7ywd%yo` zRv#Ivj4NcHW}uW(j4XN?To{_8c~;|*C6}_Olk2(neN^-2?LcmAvt?TDPYZTk_Rbl7 zyF~A9G;s>7t5Z^0qiy>Jqt*yYPtr%@1+x7g-6U;U2Mhl%r5;`{WBAi76)ZD;sSR^Ut<(jb+m z1J%bSEVZ=3esu9&bep$ck!rcbkM)o12NRZZ@3gjgupxwUJ4#lB0_>B z5?3`Y9a2OB$RZ+foUds*97n;f{zFy-{xOI*u0~jIgiuAwX&0~_q~!t?6h-@AAwzez zstA(S2@W33B&ak518&+LfnX?3h3YX{3!|4pPcvvYL$og$?hDeT_`5;+b ztFvn#+CO7TVPVJZXSh>^^9>V+?b=&X10H6Q8U`nD>OU!ChQ0_&q!NbhE1KAfgj8a| zFH9@xn%h0Cqgf|g;`n{cWzR}Lg);HHc-uQ)n&w@bv9W$gxtS@DSK({Z{Ok;;jnqF| zmY#UhT1qY={bJZCY`Z*EdK@XFG#?oed(QIrCsfeam0r;v-s;gYn9iWnlHe#lu~3|3 z>sACloG`0&rMZpt&l*n(%IAbD3#=LIx@o?ruk@-*R3>n>wo`N%j1l_*%Tfy}FdCS%a!1Lud#-%&ll3p|>y#YsBR5USM$l@hW>zpM@>{5c zpzVZd0FqnqA&gZFmSvGHO3;*+M}fZaXXy5p`;6zDBt`6y76b;|*>JCir3l$ho_l;s zQ3CNdhA2%RuX)UaEL}x1!g!t}06#gpU3z8;cCS;i$qXZ%KsE$K(64~CGUpn-3mdi< zw%@x1U7F|P;VP<#uAFXvkLlSH4eqYG{A=fLgy=PTEB_ZeJQfEqVJm8YK|0i4--zxn(q1!co2oX+QgWQj758)SOwJwA4g~^{i4A5uVi*Nn1 zE*dqaJhHzuR2}J0J5hB#pfX<^#Q)NA$EF5nPRhl`1Der%r|H0kE!vaJUNX8N_*|?e zRTrej?&}?1t<2*!)`vj~#+t{uN@A#gcsqoL7XG$B|7mc_zPf#9*4Y_%HhKc8q$u$} z#V)wriM^^WJfEEwT2G*{-fj)wS$b(3_+W44j3nN9^Zl~kIyf0&oLhW=>k_hOQ>E>$ z%i1v813Eb#xelfe$u9`ZMELMZ8rWXOAMx^i? zF(}s`Ttq~Zq$BSzG6+>~{I>{)asChQH{@gzD#$0qYwY%qOjH1hX^3O0n)SXURQ8Gz zv#1haw>vBdO>_#ew>lL=PNP0$mH?^=He9KmS`94-BV#cHBcuG_8d`tOSpk5{gRnTo zsnO|mQ(p0}CZ9Ov#`8Qc2iSQp`9=P`;6dDbeZI*HZzMht|K^2Dzf8(uu`?p_=gFx$rBfl$BMtX3e``UkDDCTBnetre_zl+i=1KHl=Sn~h*8ek2 zg=!$EVQBu$Wr-Jw%}1mWz~f`n9U_C*s(xB}0>3~2;gkz|;gV1!2$P`ni&5qaCnAUf z?$vSwg)zag=p!Ozf7xhm4$ZH3j_B*u)GYVfb`%{vUy9dUubFhQxm-_vKFsv&e0~4> zcP)Vi0|O~B2gDNrZ6wR=AC_!yP^i?1RyX3Wu(D-n1_YQ&hZHD<gwdP^LO9Xr!y~BbOanZlLI>VdQzNLL(U(#gWus=ohBvD90#moq5C%GP#J7nxH z4G%P8^X*TtZ*p$SPezhhtc57z-uQqpw9;;QW^zq5m z(AVRjQkggiAHru&)`W^;O2kThCzeVwk&rS?*Rfs5hg^bvYT?xL$e}0bz7!?J=EW6?lTwgQ6!NQNR}KjdqMme?$S;aN_14{u zyTLQov#3z8siUN|wu8D8QfZ38Vq7OOkaSGcnyYoIofha;Pp>RfaY z1Z;^K{2HRHnJ4OZu6XAide6hy$F5VCVM$=QvS32_BJ|FZu{e&}{?XUqs$(e1#mfTE zaKL|zs|LG=I76%j>Xg=w$_ydLy{Pa|Gt?YM7FYVy+iILc%ssq1O*@0(NSp17)2SK_ zADZCBr*beVZdYv#gT`QWuBPg9A!1Ym3mi6V$*=DYyR^HhJ0tT}s2rM7d#EjBh$OS{ z+!v`u4qfo}bcD+S_`XtUK%Fl%lL$nAKFQM~6mu;GztbX{C2zy?^;)+SUL7zW+YmLO zD`07y28y7>WOyCw$V*$}HaMN}oWkMg-Qd^qJE*y-SV496j1?~O>btmn8Eu5y06Vc8 z?H?@Nq3ODXa1;U;mAPwT4oScw{)$KB{}v5_8A!M4=X6t5Nz}YVQwfjT0M4q0X0mnCjpOgMgx%7rVCD6zeA3n6E8xf5{5# zlBkc~QTlNXgzp&WZ1{1bed)UV1w2{FXLG&Ivn*Z~Nr-3I70YYX<pgV}Et-D@(}M&4M=iXLXtVb|)553XOk zT}?khH(e$SJ~~CcKwk759jQNevnZ*|LkSHgzD7cwWc za$lACICc`U;&1#2X4H2GgwfH{WvZ;Z>VmZ}u<48FB9#Mw$gFp-V_@Xbnc1_Gj;uq+ zcz{^$!CI!OSrfyHmDNs`1{#ptjgj1Sz1LT_he*%;?%jai%Ac4CUQsB?Q?Mla6K)Ij z0H;p|SXZRXTF{M)vX-r!pdF)+wo_zsIyq07`AE6?s4S05^2-kz<+_}iK|TAmfL~>e zyp&{15)I4o_reWbDC9NJ2Z=%ADm<&a^E}m>&I!TO3NY$(NU_;U23*e?gm`k%e9|GzIW+>s;5^52_mO^zvVm82eD#8Q>(ePW4$K&j4 zJYtF>>xoRt!V?D?2#iP_z$Us30`;S;r5aA=PIC??h;f3Wtz@BU^GE6CXAEF#DLDrj zd*TWXPPdA^meVr@8DHc*ecJo&SrwMzv0j%nhty6cP3ZC2A}rlU_i zY&gJu{U)BKf8sx*utroHn-ZVK?7I(lQFPp`zev7DJY_?Zy1v=_aqlwZA9+-#+fJ`D z@2e+&T?HYLtx{(An*6fPwCIMBhtxMswus^6OB-_n3}u@A%cI91-gR-w!@|OQN7);i zF)4>bBySzPO^*#@Bfa1!_|5Qf9=*@5rOhoYyt!++VDue!uK@3DR5re66R()bTtq{s z%OG1ZV_WS|-HO7Vrd#@oy}B9Dit^VFfEk)Hj5_lAUYH9MK ziAtjnETkoCLJ%>aRECnZspwiYQcBHnfL!~KE@I*0V%n(?DPW#}dp_FwHi}fQh}Ego zpK~)L@gO;=FCbkwoAv+IDciiHU0+`L#6E^d^V=In#98k)Y4?fOx7p;UfW9eGoT)&-zJ^+-+M# zY4uTh`9=FVPmN3j4CB$+7*eO6S-C5iKM$BzH3TniQKm>D6@T)4&z0-fJZ{C9aJ`EI zOFjRnTkG3f2|q% zh1{kR`s?*MbnO1`Qcx~IxVj!11?W#Gd6?50^&uAvT~;f4mS*M2kW!@0Ls|_c6t>q> zw?yySi?#;S-3rUam3^h%;OCgzTp_#>kXb9cDb)1Ms>(%8TtyhmRj$l=8|FZ)kZ$)O z3gK|7^G{qehJKo>V2hd;;MFC5bgh>zcN#T_XLB1{+jQpyy4@N-Jz#*jV{`K_bs2XE zEmTsVVWr)8O@(}@#HRKnRMn$E;9u|AdxbMl)PC>~r8%hiAK0`q)h)c4y{U_9QX3v} zk_iPB`Tt2TZ43=}Q4^HX+YN)H2FP%rVZccJAVdCVQf}&{AeII?Y;a?L_4bJp7wHe~ zf3N`H4_PuwlbR*w?JY(QP-<&Rhj}#&>4KkkW;%*(C!#V2lgx2G06$3D1UPVZ%# z$Fzp;eqYFq%2}tu<lB_n{ zL-vW+s1G7N!>QF5@cZ`d`#+~o;B{7w@xEl??oF*5pnhe%N#utG$em9U{fyi+8^ z&{nXdHXM`lR2X4*a$>cC%BH<6ybJL*{QLoWbo~4x(Q$*m7X>gi7?uswBIW(OCnUt) z0wcVqPwiIuGZ2Uo$%xE&!Pt^Nqp=n6(qxca0DoRW;|xg z!8o=@hQofOuqPr*FsWM<=71_ocB@cLaUKO6#?v65U2In$k&r`ZWD!D7DB`)o8);H1 zFO&#y{vgr(lF}(*kuG4|DjPotnef_Z zA{uAt!-31pi8)K%&B^PdqI7%B^m%5#KJ(9nSKc0)Y4EaaA@#8*aTxV3QpN$ z+e=BU1GW(6&{FZY^G5|&Wo`6^6%V05t^KjOer)BLzrMkaT)10CpRcxFa#1G=?=$_Y zRX6Zddl0Yt5|1B7xG!AF>h}KV_xDgK7H)U$;*h0Q9PR=(N|fuo57lrrL6oA|Sqq*M zXyicgV2`@ZS%PIb$^9xSu{->fAB083HY(LcrsB8*E7_M73ae@0)5NEf`R zRbV$iC!64WwH{yMs^-4U1hQA&wrTh;*L;<_@eI=N#Q+-lWD2_-%@?_&CAu%Bj9kNX>rAiAItk1Jm>|wp_@o<}iOct#4ZmueGO34)M7xCK5PHGpdt8Q>; z_JukhvK8B{q>ecE)@JL5q0T4n2`-iA@!@C08eUOSbq{+G>Ou#UjqOS5c+17+e8+Q+ zE)`d`%fxZm&Pljs$6lK!sWUm3#5i!%byKC|%(KL0!3n<9%{R!5Fip>%m*{P?G4)AE zaX7jce!XJ3Y&ZmwnWe-AETP0GPvM*ZzwHh({0ECkUVqSc`TkfmF z@0oaP_5Z9r*0YdEcx%x?atKr#zR9*Rd!$qmS)U!YKekePdFyGt9&4U^c>oDY;9j$c zr0}*>U#d0ZcSLh{S`?qJ@y#pkwh1p{uC^fJ3fk9w{(_txuAQda4KlF_Ew2%)IPaaS z>|T^qI&!7!hFcdgthQ8)PGsWXaT!~KWIr@lTeUJ1HY0K$bVarreSay{^NnAT_H_}# z?X~MIB6#bjpLiS1rjy1HocygC2PiLx@hqTWg}4!uiqoGjQVl~N zw5nQFnVi#h%Y_^aUXolUkKqUp9UXs`M@#Jv*K1n4<+>qYD~Y#}PL8Te(;lz3De0H~ zG=yD$loBRT)BNJrc4p0BvVOM~-s8rvt@~4{tJOZE**biGgmjMjo(`;EvTtcF(JN~2 zRv<52k0_>q&~19mzE!8OYCKA@@jAMGIN$32?1_{-DA=(N4xCm$UJ=@TIK6c&c9(vbT$9x2;W6#^vfJ;;_+!XIqF1_+7O73IT}tM}>K*kNJa% zpo=)PS)E4Sduk5A?+j$`?>Q`A9ar9nQlRCEIvy*s76^EE#tvJVTu6vcDt~CTp>#`H=Y(E-hMoHfrRNRKHBWry$<)uT1NQ*a1cvomKcX2 zku8<#wykEFXNY+W#vStHz|9CTItL#}%t&>xHM47}sp(&niU+E>7Z^BHgOZO*#}iAN zU*eNxD~NB|dsJIFu~ydAHV3j~LwZ1D4He}}rOTKEQ*K#>nG4UE?DaVyRqJA}zrZBS z9a*j3l={pIpDzbW`n+|8%XMbTOsQFRAg^HS*?r&!CXy~UJCw-9-QAVx#n~0BcvM_< zIocfrbWINc2e@?QtHyW=7QGx&r+O2nd8AT>6I-nvdtJyBgcbY>Wg}uiG8f-luiU;t zrq?J1|B}_823N*(@X@$w-r-w()M(<>C9Geo&z?W_I#2X_*#wj~)akF3opvyGTmSx3 za0#!QQJX1~>#N9%r@HDb@uMgm!c|Q2D$`wfNi#VCI8`L?;4r=>va9;A@4{LE z16q5q*25;oKHAvu=FI@T++u~sZCI|~Uq^XB&Y&-XtOZ_Zkiq|}wk|0d_;pvQvJ`}P74~#NZMao@_CbN2tjduxu z3hc^-@MK%Pn@28(kLeAzcoM4q=zkQZi62cOv(5)~m4ky2s*sA-4kKnkXzc0oNb+I_ zCNm>6B3L>)rUgNJP%FsA=j7wAvs@%YdgieFKtdPfrM4$ch5Xuyb6n;=$3+nA&4s!YtN}j*%qJL`-`HMGNx8-I$#165n73R{aDA zz>nO!Qys)$_t6&mYOjkEFiO)HdzRSs+t76H;`G~O6qy*o*Od%78X}A^ ztksau(bT3_?FFTd`cXNOYzE1(i~Gul1J@E>u|bsGdi7D>m5L{4$Y+dzQj5SRhV6%+ zA{inAg!WzaV0W#M%(>Gft6X>X<`=i7<%V^|3ztg*ieX^93PgoSE~1+)5X2#I2ir-{ zx9kZ8`NRS+g?s;41LvLop35DaKBm93|BV{hD?}`+LooopzLoXmTX)fg60dnt9{hp1&-}lh&5D&u}|@Re%!(r^n_x2CAhN z&B0=yP93;Y)Lf%~_Xx7lm+qtvi8Xu?PcF0G{9PoR>TyHigYn)4F=}5rG>0gw-}wMh zt{xAS*?NAFF3A3U@7#DRzi)TTfh6bgts#Ez`Xjx<1l+H(BNz5c*<|txk}yqGRv8WZ zro7!crqIrP1+t!j-Mm%UM+mK-WoyHIX|WE;SxL8ZAxUG+$*$i-9s*!G%dYT!Ll^4t zL^wHLl&2qcQN$KS)-l4EH9lyycc`3d&2#7NioJD$lT_*Y=G&VVgYKmSz<>AWuTN*| zsRP!${>Yi_$nxAi@bcVkG`YA(y8Lr77>d5bPI>;QMy(IxxFC*Si9=grsWq^z8T(x6 z{cp%}Yg9g(nXi284P6U|yZ5KJuMCJhb>>Sh#~6&A;>v%>;O~z!0iqFM0F4$;QJ3Yn zkOp_bx!uTOVnUgBCnUvL;NW(Wf^Q-z+^r6Q)wI1P!5p8XY1gB^HoDVJe&j|Dl@MWe zPHr75KF&;vLi<_cpa4w{bDkXaW{Z_E((2La3A$@8NV(nj#sLl;7?|S*B8Yt~83ga`!3?h>u-P*2kybDz;h} zwTrKY<_uG5i`C1fE|*4Ip5}2fKD3wBv;xDROBO!^O10;q%T=4UPNKJ1k~9Eq;EndP zG7^4CV?KRDYd?3KvsiH(C_OpT83QYu z*4hugZqcXM?e{X~P_=hf`^^`+P#g)|QT~WAhJG_I`4s+kt+uIc&T03Ylt9hSL4rOR zoIC3INl+Fs$_{(VVpF4pdzIc>F|9{11Bzw`RZ-pv@P?Kv)F7J9xoI+`E4c&ij!#u$ zvt-;^_Y*>lvpvZO8#Afb>!&CO3k#c(shKes2RjEFCp()NCl|XJr@5(_Iftp48N0C{ z|Nq~Kq@UhWK#u?S`Ggmpj}GvLlAK7<4=0W}9wx$p&>Lv~TzVNL=MLBlo{=8j@j zM>p#Zb@{W8hqLd^e0nm6?*!MxAIKInFgZXV*Uw7<8kJ9BS;V#t_{(TJ0DcKVE8fM7 z+xs!S3qK!EI$ZO|yr11PP~P?rZ~Xuwkefs|TY>OU7^@uw0i^_M0Qf=74MJH`f@qnA zWSE5)84WcC1cD`t;w6hgsfiMWy^&r7)p{WOnJL^|?(Z_*Ytntap_l#U>ZJ Date: Thu, 16 Apr 2026 16:15:16 +0100 Subject: [PATCH 12/13] attribute python scripts --- scripts/pdf_to_md/extractor.py | 5 +++++ scripts/pdf_to_md/pdf_to_md.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/scripts/pdf_to_md/extractor.py b/scripts/pdf_to_md/extractor.py index b9d5fd2..1be2722 100755 --- a/scripts/pdf_to_md/extractor.py +++ b/scripts/pdf_to_md/extractor.py @@ -1,3 +1,8 @@ +# Adapted in part from: +# https://github.com/aliceisjustplaying/claude-skill-pdf-to-markdown +# Original upstream license: MIT +# See THIRD_PARTY_NOTICES.md for attribution details. + """ PDF extraction with multiple backends: - Fast mode: PyMuPDF with multi-strategy table detection (good for simple tables) diff --git a/scripts/pdf_to_md/pdf_to_md.py b/scripts/pdf_to_md/pdf_to_md.py index 4ee6aad..bf58069 100755 --- a/scripts/pdf_to_md/pdf_to_md.py +++ b/scripts/pdf_to_md/pdf_to_md.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 from __future__ import annotations +# Adapted in part from: +# https://github.com/aliceisjustplaying/claude-skill-pdf-to-markdown +# Original upstream license: MIT +# See THIRD_PARTY_NOTICES.md for attribution details. + """ PDF to Markdown Converter for LLM Context From f170e25d88d4d2dbf0629b55675a98a4615ed320 Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Thu, 16 Apr 2026 16:21:38 +0100 Subject: [PATCH 13/13] add third party notice --- THIRD_PARTY_NOTICES.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 THIRD_PARTY_NOTICES.md diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md new file mode 100644 index 0000000..acde86c --- /dev/null +++ b/THIRD_PARTY_NOTICES.md @@ -0,0 +1,22 @@ +# Third-Party Notices + +This repository is primarily licensed under GPL-3.0, but it also contains +specific files adapted from third-party code under different terms. + +## `pdf_to_md` scripts + +Files: +- `scripts/pdf_to_md/pdf_to_md.py` +- `scripts/pdf_to_md/extractor.py` + +These files include code adapted from: +- Repository: `aliceisjustplaying/claude-skill-pdf-to-markdown` +- Source: + +Upstream licensing note: +- The upstream repository README states `MIT` as the license. +- Source checked: + +Attribution note: +- This repository records the upstream source and preserves attribution for the + adapted files listed above. \ No newline at end of file