From 21eef0016e9992b5f71e5ff4be61d8d25911a2b8 Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Sat, 27 Jun 2026 18:01:14 -0400 Subject: [PATCH 1/3] chore(plans): open import-person-avatars Co-Authored-By: Claude Opus 4.8 (1M context) --- plans/import-person-avatars.md | 67 ++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 plans/import-person-avatars.md diff --git a/plans/import-person-avatars.md b/plans/import-person-avatars.md new file mode 100644 index 0000000..f9a5a07 --- /dev/null +++ b/plans/import-person-avatars.md @@ -0,0 +1,67 @@ +--- +status: done +depends: [] +specs: [] +issues: + - 130 +pr: +--- + +# Plan: import legacy person avatars + +## Scope + +Leadership feedback (#130): legacy users show initials where their codeforphilly.org +photo used to be. The importer brought blog-post media but never person avatars, +so imported people had `avatarKey: null`. + +A spike against the live laddr API found the source: `person.PrimaryPhotoID` → +the image at `GET /media/` (confirmed 200, image/jpeg). Projects have **no** +image field in laddr, so this is person avatars only. + +What ships: + +- **json-fetcher**: `RawPersonSchema` now parses `PrimaryPhotoID`. +- **importer**: for each person with a `PrimaryPhotoID`, fetch `/media/`, + run it through the existing `processAvatar` (square original + 128px thumb), + store both as gitsheets attachments (`avatar.jpg` + `avatar-128.jpg`) and set + `avatarKey = people//avatar.jpg` — exactly the convention the avatar + upload route uses. Reuses the proven `fetchMediaBytes` + `BlobObject.write` + + `setAttachments` machinery (same as blog media), concurrency 4. + +## Implements + +# 130. No spec change — the avatar storage contract (api/people.md, behaviors/ +storage.md attachments) already exists; this just populates it at import time. + +## Approach + +`fetchAndMaterializePersonAvatars(photoIdBySlug, sourceHost, …)` mirrors +`fetchAndMaterializeBlogMedia`: parallel fetch + `processAvatar`, returning +slug → {original, thumbnail}. The transact's people loop wires the attachments +- `avatarKey` for people that have one; failed fetches/decodes are skipped with +a warning (the person still imports). `hologit` hoisted to the transact top +(shared by people + blog attachment writes). + +## Validation + +- [x] `RawPersonSchema` parses `PrimaryPhotoID`. +- [x] Importer test: a person with `PrimaryPhotoID` gets `people//avatar.jpg` + + `avatar-128.jpg` attachments and `avatarKey` set; a person without one + gets neither. (import-laddr 37/37.) +- [x] `npm run type-check && npm run lint` clean. + +## Risks + +- Fetch volume: one image per photo-bearing person. Concurrency-capped at 4 and + failures are non-fatal, matching blog media. `--limit` bounds it for testing. +- Many photo-bearing accounts are spam — but the spam-prune (#133) removes them + downstream, so net imported avatars skew to real members. + +## Notes + +## Follow-ups + +- The first photo-bearing accounts sampled in the spike were spam; harmless + (pruned later), but a reminder that import → prune ordering matters (already + documented in spam-detection.md / cutover.md). From 4b712c3fe88357e38716cdb8e5d9f941a1a594aa Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Sat, 27 Jun 2026 18:01:14 -0400 Subject: [PATCH 2/3] feat(import): import legacy person avatars (#130) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Legacy users showed initials where their codeforphilly.org photo used to be — the importer brought blog media but never person avatars. A spike found the source: person.PrimaryPhotoID → GET /media/. (Projects have no image field in laddr, so this is avatars only.) RawPersonSchema now parses PrimaryPhotoID. For each person with one, the importer fetches /media/, runs it through the existing processAvatar (square original + 128px thumb), stores both as gitsheets attachments (avatar.jpg + avatar-128.jpg), and sets avatarKey — the same convention as the avatar-upload route. Reuses the blog-media machinery (fetchMediaBytes + BlobObject.write + setAttachments), concurrency 4, failures non-fatal. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/api/scripts/import-laddr/importer.ts | 100 ++++++++++++++++-- apps/api/scripts/import-laddr/json-fetcher.ts | 2 + apps/api/tests/import-laddr.test.ts | 48 +++++++++ 3 files changed, 144 insertions(+), 6 deletions(-) diff --git a/apps/api/scripts/import-laddr/importer.ts b/apps/api/scripts/import-laddr/importer.ts index 1e72026..80b4386 100644 --- a/apps/api/scripts/import-laddr/importer.ts +++ b/apps/api/scripts/import-laddr/importer.ts @@ -58,6 +58,7 @@ import type { } from '@cfp/shared/schemas'; import { openPublicStore, type PublicStore } from '../../src/store/public.js'; +import { processAvatar } from '../../src/lib/avatar.js'; import { fetchAllPages, RawBlogPostSchema, @@ -235,6 +236,9 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise` and stored as gitsheets attachments. + const photoIdBySlug = new Map(); for await (const row of fetchAllPages( '/people', RawPersonSchema, @@ -253,6 +257,9 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise`) into + // square original + 128px thumbnail buffers, keyed by person slug. + const avatarsBySlug = await fetchAndMaterializePersonAvatars( + photoIdBySlug, + opts.sourceHost, + fetchOpts, + log, + warnings, + ); + // ------------------------------------------------------------------------- // 3. One atomic gitsheets transaction: // - clear() each importer-owned sheet (deletes capture for free) @@ -480,13 +497,33 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise { + if (publicRepo === null) { + throw new Error('[import-laddr] internal: publicRepo not opened'); + } + const hologit = publicRepo.hologitRepo; + log(`[import] clear + upsert tags (${tags.length})`); await tx.tags.clear(); for (const t of tags) await tx.tags.upsert(t); - log(`[import] clear + upsert people (${people.length})`); + log(`[import] clear + upsert people (${people.length}, avatars: ${avatarsBySlug.size})`); await tx.people.clear(); - for (const p of people) await tx.people.upsert(p); + for (const p of people) { + const avatar = avatarsBySlug.get(p.slug); + if (avatar) { + // Mirror POST /api/people/:slug/avatar: store original + 128 thumb + // as attachments and point avatarKey at the conventional path. + const originalBlob = await BlobObject.write(hologit, avatar.original as unknown as string); + const thumbnailBlob = await BlobObject.write(hologit, avatar.thumbnail as unknown as string); + await tx.people.setAttachments(p, { + 'avatar.jpg': originalBlob, + 'avatar-128.jpg': thumbnailBlob, + }); + await tx.people.upsert({ ...p, avatarKey: `people/${p.slug}/avatar.jpg` }); + } else { + await tx.people.upsert(p); + } + } log(`[import] clear + upsert projects (${projects.length})`); await tx.projects.clear(); @@ -540,10 +577,6 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise 0) { @@ -839,6 +872,61 @@ async function fetchMediaBytes( } } +/** + * Fetch each person's laddr photo (`/media/`) and process it + * into a square original + 128px thumbnail (the same outputs the avatar-upload + * route produces). Returns a map of person slug → buffers for the transact + * callback to wire in via setAttachments. Failed fetches/decodes are skipped + * with a warning — the person still imports, just without an avatar. + * + * Concurrency 4, matching the blog-media fetcher's politeness compromise. + */ +async function fetchAndMaterializePersonAvatars( + photoIdBySlug: Map, + sourceHost: string, + fetchOpts: FetchOptions, + log: (msg: string) => void, + warnings: Warnings, +): Promise> { + const fetchImpl = fetchOpts.fetchImpl ?? fetch; + const userAgent = fetchOpts.userAgent ?? 'cfp-importer/dev'; + const entries = [...photoIdBySlug.entries()]; + const out = new Map(); + if (entries.length === 0) return out; + + log(`[import] fetching ${entries.length} person avatars`); + + const CONCURRENCY = 4; + let cursor = 0; + const workers: Promise[] = []; + for (let w = 0; w < CONCURRENCY; w++) { + workers.push( + (async () => { + while (true) { + const idx = cursor++; + if (idx >= entries.length) return; + const [slug, photoId] = entries[idx]!; + const url = `https://${sourceHost}/media/${photoId}`; + const fetched = await fetchMediaBytes(url, fetchImpl, userAgent); + if (fetched === null) { + warnings.push(`[people] avatar fetch failed: ${url} (/${slug})`); + continue; + } + try { + const processed = await processAvatar(fetched.bytes); + out.set(slug, { original: processed.original, thumbnail: processed.thumbnail }); + } catch (err) { + warnings.push(`[people] avatar decode failed for /${slug} (${url}): ${describe(err)}`); + } + } + })(), + ); + } + await Promise.all(workers); + log(`[import] processed ${out.size}/${entries.length} person avatars`); + return out; +} + /** * Fetch every distinct media asset referenced across all blog posts, * derive the final filename per asset, then rewrite each post's body to diff --git a/apps/api/scripts/import-laddr/json-fetcher.ts b/apps/api/scripts/import-laddr/json-fetcher.ts index 5515835..26f2e07 100644 --- a/apps/api/scripts/import-laddr/json-fetcher.ts +++ b/apps/api/scripts/import-laddr/json-fetcher.ts @@ -75,6 +75,8 @@ export const RawPersonSchema = z AccountLevel: z.string().nullable().optional(), Newsletter: z.union([z.boolean(), z.number(), z.string()]).nullable().optional(), Twitter: z.string().nullable().optional(), + /** Emergence Media ID of the person's photo; fetch at `/media/`. */ + PrimaryPhotoID: z.number().int().nullable().optional(), Created: z.number().int().nullable().optional(), Modified: z.number().int().nullable().optional(), /** Present when `?include=Tags` */ diff --git a/apps/api/tests/import-laddr.test.ts b/apps/api/tests/import-laddr.test.ts index 1b35e50..3b461e2 100644 --- a/apps/api/tests/import-laddr.test.ts +++ b/apps/api/tests/import-laddr.test.ts @@ -55,6 +55,10 @@ function makeFetch(routes: MockRoutes): typeof fetch { return new Response('Not found', { status: 404 }); } const body = queue.shift()!; + // Binary routes (e.g. /media/) queue a Buffer — serve it raw. + if (Buffer.isBuffer(body)) { + return new Response(body, { status: 200, headers: { 'content-type': 'image/png' } }); + } return new Response(JSON.stringify(body), { status: 200, headers: { 'content-type': 'application/json' }, @@ -62,6 +66,12 @@ function makeFetch(routes: MockRoutes): typeof fetch { }) as typeof fetch; } +/** Smallest valid PNG (1×1) — sharp can decode it, so processAvatar works. */ +const TINY_PNG = Buffer.from( + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', + 'base64', +); + function envelope(rows: unknown[], total: number, limit: number, offset: number) { return { success: true, @@ -1009,6 +1019,44 @@ describe('importLaddrFromJson — orchestrator', () => { } }); + it('imports a person avatar from /media/ and sets avatarKey', async () => { + const { path: repo, cleanup } = await makeRepo(); + try { + const routes = mockRoutes(); + // Give alice a photo + serve it as a tiny PNG at /media/555. + const peopleResp = routes.responses.get( + '/people?format=json&include=Tags&limit=200&offset=0', + ) as Array<{ data: Array> }>; + peopleResp[0]!.data[0]!.PrimaryPhotoID = 555; + routes.responses.set('/media/555?', [TINY_PNG]); + + const report = await importLaddrFromJson({ + sourceHost: 'example.test', + dataRepo: repo, + branch: 'legacy-import', + initialParent: 'empty', + now: '2026-05-18T00:00:00.000Z', + delayMs: 0, + pageSize: 200, + fetchImpl: makeFetch(routes), + }); + expect(report.commitHash).not.toBeNull(); + + const tree = await exec('git', ['ls-tree', '-r', '--name-only', 'HEAD'], { cwd: repo }); + const paths = tree.stdout.split('\n').filter(Boolean); + // alice has a photo → original + thumbnail attachments + expect(paths).toContain('people/alice/avatar.jpg'); + expect(paths).toContain('people/alice/avatar-128.jpg'); + // bob has no photo → no avatar attachments + expect(paths).not.toContain('people/bob/avatar.jpg'); + + const aliceToml = await exec('git', ['show', 'HEAD:people/alice.toml'], { cwd: repo }); + expect(aliceToml.stdout).toContain('avatarKey = "people/alice/avatar.jpg"'); + } finally { + await cleanup(); + } + }); + it('is idempotent: re-running on identical mock data makes no new commit', async () => { const { path: repo, cleanup } = await makeRepo(); try { From a603f3d3f24f3d1924bf467f77bb361fa7376065 Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Sat, 27 Jun 2026 18:02:13 -0400 Subject: [PATCH 3/3] chore(plans): mark import-person-avatars done (PR #143) Co-Authored-By: Claude Opus 4.8 (1M context) --- plans/import-person-avatars.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plans/import-person-avatars.md b/plans/import-person-avatars.md index f9a5a07..5dbfd19 100644 --- a/plans/import-person-avatars.md +++ b/plans/import-person-avatars.md @@ -4,7 +4,7 @@ depends: [] specs: [] issues: - 130 -pr: +pr: 143 --- # Plan: import legacy person avatars @@ -32,6 +32,7 @@ What ships: ## Implements # 130. No spec change — the avatar storage contract (api/people.md, behaviors/ + storage.md attachments) already exists; this just populates it at import time. ## Approach @@ -39,6 +40,7 @@ storage.md attachments) already exists; this just populates it at import time. `fetchAndMaterializePersonAvatars(photoIdBySlug, sourceHost, …)` mirrors `fetchAndMaterializeBlogMedia`: parallel fetch + `processAvatar`, returning slug → {original, thumbnail}. The transact's people loop wires the attachments + - `avatarKey` for people that have one; failed fetches/decodes are skipped with a warning (the person still imports). `hologit` hoisted to the transact top (shared by people + blog attachment writes).