From b63d98f1a7c74e66f3def6b122efdff3328e3c97 Mon Sep 17 00:00:00 2001 From: Dmitriy Solodukha Date: Fri, 6 Mar 2026 23:09:02 +0200 Subject: [PATCH 1/2] feat: add generate-briefs and audit-articles agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit generate-briefs.mjs + generate-briefs.yml: - Keeps content queue full — runs daily at 08:00 UTC before article generation - Reads CONTENT_PLAN.md, counts ready topics - If < MIN_READY (default 5) — calls Claude Haiku to generate new briefs - Reads KEYWORD_RESEARCH.md and existing briefs to avoid duplicates - Writes brief files to docs/briefs/, updates CONTENT_PLAN.md - Supports --dry-run, --count=N flags - Sends Telegram notification when briefs added audit-articles.mjs + audit-articles.yml: - Weekly audit of all published articles — runs Sundays at 07:00 UTC - Checks: required components present, broken internal links, missing OG images, blog-data.ts / llms.txt sync, duplicate slugs, stale year references, emoji violations - Opens GitHub Issue (label: audit) with categorized findings - Updates existing issue with new run instead of creating duplicates - Closes issue automatically when re-audit passes - Sends Telegram notification with link to issues on errors - Supports --blog-glob flag for custom blog directory path docs/gitattributes.template: - Template to copy to client repo root as .gitattributes - merge=union for llms.txt and CONTENT_PLAN.md prevents conflicts when multiple article PRs are open simultaneously README.md + CLAUDE.md: - Documented all new workflows and scripts - Updated commands reference - Updated workflow table --- .github/workflows/audit-articles.yml | 183 ++++++++++++++++++ .github/workflows/generate-briefs.yml | 159 ++++++++++++++++ CLAUDE.md | 26 ++- README.md | 12 +- docs/gitattributes.template | 8 + scripts/audit-articles.mjs | 233 +++++++++++++++++++++++ scripts/generate-briefs.mjs | 255 ++++++++++++++++++++++++++ 7 files changed, 864 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/audit-articles.yml create mode 100644 .github/workflows/generate-briefs.yml create mode 100644 docs/gitattributes.template create mode 100644 scripts/audit-articles.mjs create mode 100644 scripts/generate-briefs.mjs diff --git a/.github/workflows/audit-articles.yml b/.github/workflows/audit-articles.yml new file mode 100644 index 0000000..ebc9a2a --- /dev/null +++ b/.github/workflows/audit-articles.yml @@ -0,0 +1,183 @@ +name: Article Audit + +# Reusable workflow — weekly audit of all published blog articles. +# Checks structural compliance, broken internal links, file sync, and style violations. +# Opens (or updates) a GitHub Issue with labeled findings if problems are found. +# +# Client usage: +# jobs: +# audit: +# uses: cybrixcc/leadhunter-engine/.github/workflows/audit-articles.yml@master +# with: +# config_path: ./config.yml +# blog_glob: src/app/blog +# secrets: inherit + +on: + workflow_call: + inputs: + config_path: + description: 'Path to config.yml in the calling repo' + required: false + default: './config.yml' + type: string + blog_glob: + description: 'Path to blog directory containing article subdirectories' + required: false + default: 'src/app/blog' + type: string + secrets: + TELEGRAM_BOT_TOKEN: + required: false + TELEGRAM_CHAT_ID: + required: false + + workflow_dispatch: + inputs: + blog_glob: + description: 'Path to blog directory' + required: false + default: 'src/app/blog' + type: string + +permissions: + contents: read + issues: write + +jobs: + audit: + runs-on: ubuntu-latest + + steps: + - name: Checkout calling repository + uses: actions/checkout@v4 + + - name: Checkout engine + uses: actions/checkout@v4 + with: + repository: cybrixcc/leadhunter-engine + path: .engine + ref: master + token: ${{ secrets.GH_PAT }} + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Load config and set env vars + run: | + CONFIG="${{ inputs.config_path || './config.yml' }}" + if [ -f "$CONFIG" ]; then + SITE_NAME=$(grep '^site_name:' "$CONFIG" | sed 's/site_name:\s*//' | tr -d '"' | tr -d "'") + echo "SITE_NAME=${SITE_NAME}" >> $GITHUB_ENV + fi + + - name: Copy engine scripts + run: cp -r .engine/scripts ./scripts + + - name: Run article audit + id: audit + run: | + node scripts/audit-articles.mjs \ + --blog-glob="${{ inputs.blog_glob || 'src/app/blog' }}" + echo "exit_code=0" >> $GITHUB_OUTPUT + continue-on-error: true + + - name: Capture exit code + if: always() + run: echo "AUDIT_EXIT=$?" >> $GITHUB_ENV + + - name: Open or update GitHub Issue if issues found + if: always() && (steps.audit.outcome == 'failure' || steps.audit.outputs.exit_code != '0') + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + let body = '## Weekly Article Audit\n\nAudit script failed to produce a report. Check the Actions log.'; + if (fs.existsSync('audit-report.md')) { + body = fs.readFileSync('audit-report.md', 'utf8'); + } + + const label = 'audit'; + + // Ensure label exists + try { + await github.rest.issues.createLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + name: label, + color: 'e4e669', + description: 'Automated article audit findings', + }); + } catch (_) { + // Label already exists — ignore + } + + // Check for existing open audit issue + const existing = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + labels: label, + state: 'open', + }); + + const today = new Date().toISOString().slice(0, 10); + + if (existing.data.length > 0) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: existing.data[0].number, + body: `### Re-audit — ${today}\n\n${body}`, + }); + console.log(`Commented on existing issue #${existing.data[0].number}`); + } else { + const created = await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: `Article Audit: issues found — ${today}`, + body, + labels: [label], + }); + console.log(`Created issue #${created.data.number}`); + } + + - name: Close resolved audit issue if all clear + if: steps.audit.outcome == 'success' + uses: actions/github-script@v7 + with: + script: | + const existing = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + labels: 'audit', + state: 'open', + }); + for (const issue of existing.data) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + body: `Audit re-run passed — all issues resolved. Closing.`, + }); + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + state: 'closed', + }); + console.log(`Closed resolved issue #${issue.number}`); + } + + - name: Send Telegram notification on errors + if: steps.audit.outcome == 'failure' && secrets.TELEGRAM_BOT_TOKEN != '' + run: | + LABEL="${SITE_NAME:-Blog}" + ISSUES_URL="https://github.com/${{ github.repository }}/issues?q=label%3Aaudit+is%3Aopen" + MESSAGE="${LABEL} audit found issues. Review: ${ISSUES_URL}" + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d text="$MESSAGE" \ + > /dev/null diff --git a/.github/workflows/generate-briefs.yml b/.github/workflows/generate-briefs.yml new file mode 100644 index 0000000..8d1e34c --- /dev/null +++ b/.github/workflows/generate-briefs.yml @@ -0,0 +1,159 @@ +name: Generate Briefs + +# Reusable workflow — keeps the content queue full by auto-generating briefs +# when the number of "ready" topics drops below the minimum threshold. +# +# Designed to run daily at 08:00 UTC — one hour before generate-article.yml. +# +# Client usage: +# jobs: +# generate-briefs: +# uses: cybrixcc/leadhunter-engine/.github/workflows/generate-briefs.yml@master +# with: +# config_path: ./config.yml +# secrets: inherit + +on: + workflow_call: + inputs: + config_path: + description: 'Path to config.yml in the calling repo' + required: false + default: './config.yml' + type: string + min_ready: + description: 'Minimum number of ready topics to maintain (default: 5)' + required: false + default: 5 + type: number + force_count: + description: 'Force generate this many briefs regardless of queue size (0 = auto)' + required: false + default: 0 + type: number + dry_run: + description: 'Dry run — preview without writing files' + required: false + default: false + type: boolean + secrets: + ANTHROPIC_API_KEY: + required: true + GH_PAT: + required: false + TELEGRAM_BOT_TOKEN: + required: false + TELEGRAM_CHAT_ID: + required: false + + workflow_dispatch: + inputs: + force_count: + description: 'Force generate this many briefs (0 = auto)' + required: false + default: 0 + type: number + dry_run: + description: 'Dry run — preview without writing files' + required: false + default: false + type: boolean + +permissions: + contents: write + +jobs: + generate-briefs: + runs-on: ubuntu-latest + + steps: + - name: Checkout calling repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GH_PAT || secrets.GITHUB_TOKEN }} + + - name: Checkout engine + uses: actions/checkout@v4 + with: + repository: cybrixcc/leadhunter-engine + path: .engine + ref: master + token: ${{ secrets.GH_PAT }} + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + cache-dependency-path: .engine/package.json + + - name: Install engine dependencies + run: npm ci --prefix .engine + + - name: Load config and set env vars + run: | + CONFIG="${{ inputs.config_path || './config.yml' }}" + if [ -f "$CONFIG" ]; then + SITE_NAME=$(grep '^site_name:' "$CONFIG" | sed 's/site_name:\s*//' | tr -d '"' | tr -d "'") + GIT_USER_NAME=$(grep '^git_user_name:' "$CONFIG" | sed 's/git_user_name:\s*//' | tr -d '"' | tr -d "'" || echo "Blog Bot") + GIT_USER_EMAIL=$(grep '^git_user_email:' "$CONFIG" | sed 's/git_user_email:\s*//' | tr -d '"' | tr -d "'" || echo "bot@example.com") + echo "SITE_NAME=${SITE_NAME}" >> $GITHUB_ENV + echo "GIT_USER_NAME=${GIT_USER_NAME}" >> $GITHUB_ENV + echo "GIT_USER_EMAIL=${GIT_USER_EMAIL}" >> $GITHUB_ENV + fi + + - name: Setup Git + run: | + git config user.name "${GIT_USER_NAME:-Blog Bot}" + git config user.email "${GIT_USER_EMAIL:-bot@example.com}" + + - name: Copy engine scripts + run: cp -r .engine/scripts ./scripts + + - name: Generate briefs + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + ARGS="" + if [ "${{ inputs.dry_run }}" = "true" ]; then ARGS="--dry-run"; fi + if [ "${{ inputs.force_count }}" != "0" ] && [ -n "${{ inputs.force_count }}" ]; then + ARGS="$ARGS --count=${{ inputs.force_count }}" + fi + node scripts/generate-briefs.mjs $ARGS + + - name: Commit and push if changed + if: inputs.dry_run != true + run: | + git add docs/briefs/ CONTENT_PLAN.md + if git diff --staged --quiet; then + echo "No new briefs generated — queue already full." + else + ADDED=$(git diff --staged --name-only | grep 'docs/briefs/' | wc -l | tr -d ' ') + git commit -m "chore: auto-generate ${ADDED} brief(s) to refill content queue" + git pull --rebase origin $(git branch --show-current) + git push origin $(git branch --show-current) + echo "BRIEFS_ADDED=${ADDED}" >> $GITHUB_ENV + fi + + - name: Send Telegram notification + if: env.BRIEFS_ADDED != '' && secrets.TELEGRAM_BOT_TOKEN != '' + run: | + LABEL="${SITE_NAME:-Blog}" + MESSAGE="${LABEL}: ${BRIEFS_ADDED} new brief(s) added to content queue" + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d text="$MESSAGE" \ + > /dev/null + + - name: Send failure notification + if: failure() + run: | + if [ -n "${{ secrets.TELEGRAM_BOT_TOKEN }}" ] && [ -n "${{ secrets.TELEGRAM_CHAT_ID }}" ]; then + LABEL="${SITE_NAME:-Blog}" + MESSAGE="${LABEL}: Brief generation failed. Check Actions: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \ + -d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \ + -d text="$MESSAGE" \ + > /dev/null + fi diff --git a/CLAUDE.md b/CLAUDE.md index 67f4f0e..a65a775 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,14 +13,19 @@ Shared engine for automated SEO article generation for client blogs. Cloned from ## Commands ```bash -npm install # install deps (Node 22+) -node scripts/generate-article.mjs # generate next ready topic -node scripts/generate-article.mjs --dry-run # preview without committing -node scripts/generate-article.mjs --topic=6 # generate specific topic -node scripts/gsc-index-check.mjs # check/submit GSC index -node scripts/gsc-keyword-performance.mjs # weekly keyword report -node scripts/ai-citation-research.mjs # AI citation research -bash scripts/geo-health-check.sh # GEO content health score +npm install # install deps (Node 22+) +node scripts/generate-briefs.mjs # generate briefs if queue < 5 ready +node scripts/generate-briefs.mjs --count=3 # force generate 3 briefs +node scripts/generate-briefs.mjs --dry-run # preview without writing files +node scripts/generate-article.mjs # generate next ready topic +node scripts/generate-article.mjs --dry-run # preview without committing +node scripts/generate-article.mjs --topic=6 # generate specific topic +node scripts/audit-articles.mjs # audit all published articles +node scripts/audit-articles.mjs --blog-glob=src/app/blog # custom blog directory path +node scripts/gsc-index-check.mjs # check/submit GSC index +node scripts/gsc-keyword-performance.mjs # weekly keyword report +node scripts/ai-citation-research.mjs # AI citation research +bash scripts/geo-health-check.sh # GEO content health score ``` ## Architecture @@ -103,10 +108,13 @@ All workflows support `workflow_call` with `config_path` input. They: | Workflow | Trigger in client | |---------|------------------| -| `generate-article.yml` | schedule or manual, generates articles | +| `generate-briefs.yml` | daily 08:00 UTC, refills content queue if < 5 ready topics | +| `generate-article.yml` | daily 09:00 UTC or manual, generates next article | +| `audit-articles.yml` | weekly (Sundays), audits all published articles | | `gsc-index-check.yml` | schedule, submits unindexed pages to GSC | | `gsc-keyword-performance.yml` | schedule, weekly keyword report → GitHub issue | | `geo-health-check.yml` | schedule, GEO score → GitHub issue | +| `ai-article-review.yml` | PR trigger, reviews new article PRs with auto-fixes | | `ai-article-review.yml` | PR trigger, auto-reviews blog article PRs | ### Required secrets (in client repo) diff --git a/README.md b/README.md index 6bc243c..e327ac3 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,18 @@ Each client blog is a standalone Next.js site that calls this engine via GitHub ## What it does +**Brief generation** +Keeps the content queue full automatically. Reads the keyword research doc and existing topics, generates new briefs with Claude when the queue drops below 5 ready topics. Runs daily at 08:00 UTC before article generation. + **Article generation** Reads the content plan, researches the topic, writes a fully structured SEO article, and opens it as a Pull Request. The client reviews and merges — no writing required. +**AI article review** +Every Pull Request with a new article is automatically reviewed for quality, SEO structure, and brand consistency before it goes live. Auto-fixes issues and commits to the PR branch — up to 5 rounds. + +**Weekly article audit** +Every Sunday, scans all published articles for broken internal links, missing required components, desync between blog-data.ts and llms.txt, stale dates, and style violations. Opens a GitHub Issue with a labeled list of findings. Closes the issue automatically when all issues are resolved. + **Google Search Console monitoring** Checks which pages are indexed, flags coverage issues, and tracks keyword performance over time. @@ -20,9 +29,6 @@ Scores how well the site is optimized for AI-powered search (ChatGPT, Perplexity **Citation research** Monitors whether the brand is being cited by AI models and authoritative sources. Tracks competitor mentions for comparison. -**AI article review** -Every Pull Request with a new article is automatically reviewed for quality, SEO structure, and brand consistency before it goes live. - --- ## How it connects to a client blog diff --git a/docs/gitattributes.template b/docs/gitattributes.template new file mode 100644 index 0000000..b6b5681 --- /dev/null +++ b/docs/gitattributes.template @@ -0,0 +1,8 @@ +# Copy this file to .gitattributes in the client repo root. +# +# merge=union prevents conflicts in append-only files when multiple +# article PRs are open simultaneously. Git merges both sets of lines +# instead of raising a conflict — safe because these files only grow. + +public/llms.txt merge=union +CONTENT_PLAN.md merge=union diff --git a/scripts/audit-articles.mjs b/scripts/audit-articles.mjs new file mode 100644 index 0000000..e08e697 --- /dev/null +++ b/scripts/audit-articles.mjs @@ -0,0 +1,233 @@ +/** + * audit-articles.mjs + * + * Weekly audit of all published blog articles. Checks structural compliance, + * broken internal links, sync between blog-data.ts / llms.txt and actual files, + * and common style violations. + * + * Exit codes: + * 0 — all clear + * 1 — errors found (broken links, missing required components, desync) + * 2 — warnings only (stale dates, missing OG image, minor issues) + * + * The calling workflow reads exit code and creates a GitHub Issue if > 0. + * + * Usage: + * node scripts/audit-articles.mjs + * node scripts/audit-articles.mjs --blog-glob="src/app/blog" + */ + +import fs from "fs"; +import path from "path"; + +const args = process.argv.slice(2); +const BLOG_DIR = + args.find((a) => a.startsWith("--blog-glob="))?.split("=")[1] || + "src/app/blog"; +const BLOG_DATA_PATH = "src/lib/blog-data.ts"; +const LLMS_TXT_PATH = "public/llms.txt"; + +const issues = []; + +function issue(severity, file, message) { + issues.push({ severity, file, message }); + const prefix = severity === "ERROR" ? "[ERROR]" : "[WARN] "; + console.log(`${prefix} ${file}: ${message}`); +} + +// --- 1. Collect article slugs --- + +if (!fs.existsSync(BLOG_DIR)) { + console.error(`Blog directory not found: ${BLOG_DIR}`); + process.exit(1); +} + +const slugs = fs.readdirSync(BLOG_DIR).filter((f) => { + const p = path.join(BLOG_DIR, f); + return ( + fs.statSync(p).isDirectory() && + fs.existsSync(path.join(p, "page.tsx")) + ); +}); + +console.log(`Auditing ${slugs.length} articles...\n`); + +// --- 2. blog-data.ts sync --- + +if (fs.existsSync(BLOG_DATA_PATH)) { + const blogData = fs.readFileSync(BLOG_DATA_PATH, "utf8"); + + for (const slug of slugs) { + if (!blogData.includes(`slug: "${slug}"`) && !blogData.includes(`slug: '${slug}'`)) { + issue("ERROR", BLOG_DATA_PATH, `Missing entry for slug: "${slug}"`); + } + } + + // Detect duplicates + const slugMatches = [ + ...blogData.matchAll(/slug:\s*["']([^"']+)["']/g), + ].map((m) => m[1]); + const seen = new Set(); + for (const s of slugMatches) { + if (seen.has(s)) { + issue("ERROR", BLOG_DATA_PATH, `Duplicate slug: "${s}"`); + } + seen.add(s); + } +} else { + console.warn(`${BLOG_DATA_PATH} not found — skipping blog-data sync check.`); +} + +// --- 3. llms.txt sync --- + +if (fs.existsSync(LLMS_TXT_PATH)) { + const llmsTxt = fs.readFileSync(LLMS_TXT_PATH, "utf8"); + for (const slug of slugs) { + if (!llmsTxt.includes(`/blog/${slug}`)) { + issue("WARN", LLMS_TXT_PATH, `Missing entry for /blog/${slug}`); + } + } +} else { + console.warn(`${LLMS_TXT_PATH} not found — skipping llms.txt sync check.`); +} + +// --- 4. Per-article checks --- + +for (const slug of slugs) { + const filePath = path.join(BLOG_DIR, slug, "page.tsx"); + const content = fs.readFileSync(filePath, "utf8"); + + // 4a. Required components + for (const component of [ + "ArticleAuthor", + "FAQJsonLd", + "ArticleJsonLd", + "Header", + "Footer", + ]) { + if (!content.includes(component)) { + issue("ERROR", filePath, `Missing required component: ${component}`); + } + } + + // 4b. Broken internal links (href="/blog/" pointing to non-existent article) + for (const match of content.matchAll(/href=["']\/blog\/([^"'/?\s]+)["']/g)) { + const linkedSlug = match[1]; + if (!slugs.includes(linkedSlug)) { + issue( + "ERROR", + filePath, + `Broken internal link: /blog/${linkedSlug} does not exist` + ); + } + } + + // 4c. Hardcoded absolute domain in hrefs (should use relative paths) + for (const match of content.matchAll( + /href=["'](https?:\/\/[^"']+)["']/g + )) { + const url = match[1]; + // Flag internal-looking URLs that should be relative + if ( + url.includes("/blog/") && + !url.startsWith("https://schema.org") && + !url.startsWith("https://twitter") && + !url.startsWith("https://linkedin") + ) { + issue( + "WARN", + filePath, + `Hardcoded absolute URL in href — consider using relative path: ${url}` + ); + } + } + + // 4d. Missing opengraph-image.tsx + const ogPath = path.join(BLOG_DIR, slug, "opengraph-image.tsx"); + if (!fs.existsSync(ogPath)) { + issue("WARN", filePath, `Missing opengraph-image.tsx`); + } + + // 4e. ArticleAuthor missing date prop + if ( + content.includes("ArticleAuthor") && + !content.match(/ArticleAuthor[^/\n]*date=/) + ) { + issue("WARN", filePath, `ArticleAuthor is missing the date prop`); + } + + // 4f. Emoji in content (style violation) + if (/[\u{1F300}-\u{1F9FF}\u{2600}-\u{27BF}]/u.test(content)) { + issue("WARN", filePath, `Contains emoji — style violation`); + } + + // 4g. Stale year: only mentions a past year, no current year + const currentYear = new Date().getFullYear().toString(); + const prevYear = (parseInt(currentYear) - 1).toString(); + if ( + content.includes(prevYear) && + !content.includes(currentYear) && + !content.includes(String(parseInt(currentYear) + 1)) + ) { + issue( + "WARN", + filePath, + `Only mentions ${prevYear} — may need year references updated to ${currentYear}` + ); + } +} + +// --- 5. Report --- + +console.log(`\n${"─".repeat(50)}`); +console.log(`Audit complete`); +console.log(`Articles checked : ${slugs.length}`); +console.log(`Total issues : ${issues.length}`); + +const errors = issues.filter((i) => i.severity === "ERROR"); +const warnings = issues.filter((i) => i.severity === "WARN"); +console.log(` Errors : ${errors.length}`); +console.log(` Warnings : ${warnings.length}`); + +// Write markdown report for GitHub Issue +const today = new Date().toISOString().slice(0, 10); +const reportLines = [ + `## Weekly Article Audit — ${today}`, + ``, + `**Articles checked:** ${slugs.length} | **Errors:** ${errors.length} | **Warnings:** ${warnings.length}`, + ``, +]; + +if (errors.length > 0) { + reportLines.push(`### Errors (must fix)`); + reportLines.push(``); + for (const i of errors) { + reportLines.push(`- \`${i.file}\`: ${i.message}`); + } + reportLines.push(``); +} + +if (warnings.length > 0) { + reportLines.push(`### Warnings (should fix)`); + reportLines.push(``); + for (const i of warnings) { + reportLines.push(`- \`${i.file}\`: ${i.message}`); + } + reportLines.push(``); +} + +if (issues.length === 0) { + reportLines.push(`All checks passed. No issues found.`); + reportLines.push(``); +} + +reportLines.push( + `---`, + `*Generated by audit-articles.mjs — runs every Sunday at 07:00 UTC*` +); + +fs.writeFileSync("audit-report.md", reportLines.join("\n")); +console.log(`\nReport written to audit-report.md`); + +if (errors.length > 0) process.exit(1); +if (warnings.length > 0) process.exit(2); diff --git a/scripts/generate-briefs.mjs b/scripts/generate-briefs.mjs new file mode 100644 index 0000000..523d8bb --- /dev/null +++ b/scripts/generate-briefs.mjs @@ -0,0 +1,255 @@ +/** + * generate-briefs.mjs + * + * Keeps the content queue full by auto-generating briefs when the number of + * "ready" topics in CONTENT_PLAN.md drops below MIN_READY. + * + * Designed to run daily (e.g. 08:00 UTC) — one hour before generate-article.mjs. + * + * Usage: + * node scripts/generate-briefs.mjs # auto mode + * node scripts/generate-briefs.mjs --count=3 # force generate N briefs + * node scripts/generate-briefs.mjs --dry-run # preview without writing files + */ + +import Anthropic from "@anthropic-ai/sdk"; +import fs from "fs"; +import path from "path"; +import { loadConfig } from "./lib/config-loader.mjs"; + +const MIN_READY = 5; +const BRIEFS_DIR = "docs/briefs"; +const CONTENT_PLAN_PATH = "CONTENT_PLAN.md"; +const KEYWORD_RESEARCH_PATH = "docs/KEYWORD_RESEARCH.md"; + +const args = process.argv.slice(2); +const DRY_RUN = args.includes("--dry-run"); +const FORCE_COUNT = parseInt( + args.find((a) => a.startsWith("--count="))?.split("=")[1] || "0" +); + +// --- Load config --- +const config = await loadConfig(); +const siteName = config.site_name || "Blog"; +const niche = config.niche || "content marketing"; +const productDescription = + config.citation_research?.product_description || siteName; +const ctaUrl = config.cta_url || "/"; + +// --- Read current state --- + +if (!fs.existsSync(CONTENT_PLAN_PATH)) { + console.error(`CONTENT_PLAN.md not found at ${CONTENT_PLAN_PATH}`); + process.exit(1); +} + +const contentPlan = fs.readFileSync(CONTENT_PLAN_PATH, "utf8"); +const keywordResearch = fs.existsSync(KEYWORD_RESEARCH_PATH) + ? fs.readFileSync(KEYWORD_RESEARCH_PATH, "utf8") + : ""; + +// Count ready topics +const readyCount = (contentPlan.match(/\|\s+ready\s+\|/g) || []).length; +console.log(`Ready topics in queue: ${readyCount}`); + +const needed = FORCE_COUNT > 0 ? FORCE_COUNT : Math.max(0, MIN_READY - readyCount); + +if (needed === 0) { + console.log(`Queue is full (${readyCount} >= ${MIN_READY}). Nothing to do.`); + process.exit(0); +} + +console.log(`Generating ${needed} new brief(s)...`); +if (DRY_RUN) console.log("DRY RUN — no files will be written."); + +// --- Read existing content to avoid duplicates --- + +if (!fs.existsSync(BRIEFS_DIR)) { + fs.mkdirSync(BRIEFS_DIR, { recursive: true }); +} + +const existingBriefSummaries = fs + .readdirSync(BRIEFS_DIR) + .filter((f) => f.endsWith(".md")) + .map((f) => { + const content = fs.readFileSync(path.join(BRIEFS_DIR, f), "utf8"); + const titleMatch = content.match(/## Title\s*\n([^\n]+)/); + return titleMatch ? titleMatch[1].trim() : f; + }); + +const existingTitles = [ + ...contentPlan.matchAll(/\|\s*\d+\s*\|\s*([^|]+)\|/g), +].map((m) => m[1].trim()); + +// --- Build prompt --- + +const client = new Anthropic(); + +const prompt = `You are a content strategist for ${siteName}. + +Product: ${productDescription} + +Your task: generate ${needed} new article brief(s) for the ${siteName} blog. +Niche: ${niche} + +${keywordResearch ? `## Keyword research to draw from:\n${keywordResearch}\n` : ""} + +## Existing article titles — do NOT duplicate these: +${[...existingTitles, ...existingBriefSummaries].map((t) => `- ${t}`).join("\n")} + +## Rules: +- Target keywords with commercial or informational intent relevant to the niche +- Each article must be directly useful to the target audience +- Tone: direct, expert, no fluff +- Each brief must follow the EXACT format below — no deviations + +## Brief format: +\`\`\` +# Brief: [Article Title] + +## Title +[Full article title] + +## Target Keywords +- [primary keyword] +- [secondary keyword] +- [secondary keyword] + +## Search Intent +[One sentence: who is searching and why] + +## Main Thesis +[One sentence: the core argument of the article] + +## Key Points +- [Point 1] +- [Point 2] +- [Point 3] +- [Point 4] +- [Point 5] + +## Why It Matters +[2-3 sentences: why this topic matters to the audience] + +## Brand Angle +[2-3 sentences: how ${siteName} connects to this topic. CTA URL: ${ctaUrl}] + +## Internal Links +- /blog/[existing-slug] — anchor: "[anchor text]" + +## Sources +- [Source 1 — real, credible, publicly known] +- [Source 2] +- [Source 3] +\`\`\` + +Generate exactly ${needed} brief(s). Return ONLY the briefs separated by a line containing only "---". +No intro text, no explanations, no markdown code fences around the output.`; + +console.log("Calling Claude API..."); +const message = await client.messages.create({ + model: "claude-haiku-4-5-20251001", + max_tokens: 4000, + messages: [{ role: "user", content: prompt }], +}); + +const response = message.content[0].text.trim(); +const briefs = response + .split(/\n---\n/) + .map((b) => b.trim()) + .filter((b) => b.length > 100); + +console.log(`Claude returned ${briefs.length} brief(s).`); + +if (briefs.length === 0) { + console.error("No valid briefs returned. Exiting."); + process.exit(1); +} + +// --- Write briefs and update CONTENT_PLAN.md --- + +// Find next brief number +const existingNumbers = fs + .readdirSync(BRIEFS_DIR) + .filter((f) => f.endsWith(".md")) + .map((f) => parseInt(f.match(/^(\d+)/)?.[1] || "0")) + .filter((n) => !isNaN(n) && n > 0); +let nextNumber = + existingNumbers.length > 0 ? Math.max(...existingNumbers) + 1 : 1; + +// Find next topic number +const topicNumbers = [...contentPlan.matchAll(/\|\s*(\d+)\s*\|/g)].map((m) => + parseInt(m[1]) +); +let nextTopicNumber = + topicNumbers.length > 0 ? Math.max(...topicNumbers) + 1 : 1; + +const newTopicLines = []; + +for (const brief of briefs) { + const titleMatch = brief.match(/## Title\s*\n([^\n]+)/); + if (!titleMatch) { + console.warn("Could not extract title from brief — skipping."); + continue; + } + const title = titleMatch[1].trim(); + + const slug = title + .toLowerCase() + .replace(/[^a-z0-9\s-]/g, "") + .replace(/\s+/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, "") + .slice(0, 60); + + const fileName = `${String(nextNumber).padStart(2, "0")}-${slug}.md`; + const filePath = path.join(BRIEFS_DIR, fileName); + + if (DRY_RUN) { + console.log(`[dry-run] Would write: ${filePath}`); + console.log(`[dry-run] Title: ${title}`); + } else { + fs.writeFileSync(filePath, brief + "\n"); + console.log(`Written: ${filePath}`); + } + + const paddedNum = String(nextTopicNumber).padEnd(2); + const paddedTitle = title.slice(0, 66).padEnd(66); + newTopicLines.push( + `| ${paddedNum} | ${paddedTitle} | ready | P1 |` + ); + + nextNumber++; + nextTopicNumber++; +} + +// Update CONTENT_PLAN.md +if (newTopicLines.length > 0) { + const currentCount = topicNumbers.length; + const newCount = currentCount + newTopicLines.length; + + let updated = contentPlan; + + updated = updated.replace( + /### Article Index \(\d+ topics\)/, + `### Article Index (${newCount} topics)` + ); + + updated = updated.replace( + /\n> Status values:/, + "\n" + newTopicLines.join("\n") + "\n\n> Status values:" + ); + + if (DRY_RUN) { + console.log( + `[dry-run] Would add ${newTopicLines.length} topic(s) to CONTENT_PLAN.md` + ); + } else { + fs.writeFileSync(CONTENT_PLAN_PATH, updated); + console.log( + `Updated CONTENT_PLAN.md with ${newTopicLines.length} new topic(s).` + ); + } +} + +console.log("Done."); From 3e2ded85dc4fdce37f9a3da525ec7cebe5b35d14 Mon Sep 17 00:00:00 2001 From: Dmitriy Solodukha Date: Fri, 6 Mar 2026 23:20:01 +0200 Subject: [PATCH 2/2] fix: orphaned table rows and silently ignored min_ready input - Use /\n+> Status values:/ regex to collapse any blank lines before the legend, preventing new rows from being orphaned outside the table - Accept --min-ready=N CLI arg in generate-briefs.mjs - Pass inputs.min_ready from workflow to script via --min-ready flag --- .github/workflows/generate-briefs.yml | 4 ++++ scripts/generate-briefs.mjs | 12 ++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate-briefs.yml b/.github/workflows/generate-briefs.yml index 8d1e34c..60c1fcc 100644 --- a/.github/workflows/generate-briefs.yml +++ b/.github/workflows/generate-briefs.yml @@ -120,6 +120,10 @@ jobs: if [ "${{ inputs.force_count }}" != "0" ] && [ -n "${{ inputs.force_count }}" ]; then ARGS="$ARGS --count=${{ inputs.force_count }}" fi + MIN_READY="${{ inputs.min_ready }}" + if [ -n "$MIN_READY" ] && [ "$MIN_READY" != "5" ]; then + ARGS="$ARGS --min-ready=$MIN_READY" + fi node scripts/generate-briefs.mjs $ARGS - name: Commit and push if changed diff --git a/scripts/generate-briefs.mjs b/scripts/generate-briefs.mjs index 523d8bb..b2f1b2d 100644 --- a/scripts/generate-briefs.mjs +++ b/scripts/generate-briefs.mjs @@ -9,6 +9,7 @@ * Usage: * node scripts/generate-briefs.mjs # auto mode * node scripts/generate-briefs.mjs --count=3 # force generate N briefs + * node scripts/generate-briefs.mjs --min-ready=10 # custom threshold * node scripts/generate-briefs.mjs --dry-run # preview without writing files */ @@ -17,7 +18,7 @@ import fs from "fs"; import path from "path"; import { loadConfig } from "./lib/config-loader.mjs"; -const MIN_READY = 5; +const DEFAULT_MIN_READY = 5; const BRIEFS_DIR = "docs/briefs"; const CONTENT_PLAN_PATH = "CONTENT_PLAN.md"; const KEYWORD_RESEARCH_PATH = "docs/KEYWORD_RESEARCH.md"; @@ -27,6 +28,10 @@ const DRY_RUN = args.includes("--dry-run"); const FORCE_COUNT = parseInt( args.find((a) => a.startsWith("--count="))?.split("=")[1] || "0" ); +const MIN_READY = parseInt( + args.find((a) => a.startsWith("--min-ready="))?.split("=")[1] || + String(DEFAULT_MIN_READY) +); // --- Load config --- const config = await loadConfig(); @@ -235,8 +240,11 @@ if (newTopicLines.length > 0) { `### Article Index (${newCount} topics)` ); + // Insert new rows directly before the "> Status values:" line, + // stripping any blank lines between the table and the legend so rows + // are never orphaned from the table by a blank line. updated = updated.replace( - /\n> Status values:/, + /\n+> Status values:/, "\n" + newTopicLines.join("\n") + "\n\n> Status values:" );