Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 94 additions & 6 deletions apps/api/scripts/import-laddr/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import type {
} from '@cfp/shared/schemas';

import { openPublicStore, type PublicStore } from '../../src/store/public.js';
import { processAvatar } from '../../src/lib/avatar.js';
import {
fetchAllPages,
RawBlogPostSchema,
Expand Down Expand Up @@ -235,6 +236,9 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
log(`[import] fetching people from ${opts.sourceHost} (this is the large one)`);
const people: Person[] = [];
const tagAssignments: TagAssignment[] = [];
// slug → laddr PrimaryPhotoID, for people who have a profile photo. Their
// avatars are fetched from `/media/<id>` and stored as gitsheets attachments.
const photoIdBySlug = new Map<string, number>();
for await (const row of fetchAllPages<RawPerson>(
'/people',
RawPersonSchema,
Expand All @@ -253,6 +257,9 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
if (parsed) {
people.push(parsed);
counts.people!.imported++;
if (typeof row.PrimaryPhotoID === 'number') {
photoIdBySlug.set(parsed.slug, row.PrimaryPhotoID);
}
for (const rawTag of row.Tags ?? []) {
const ta = translateTagAssignment(rawTag, row.ID, 'person', ctx);
if (ta === null) {
Expand Down Expand Up @@ -460,6 +467,16 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
warnings,
);

// Fetch + process person avatars from laddr (`/media/<PrimaryPhotoID>`) into
// square original + 128px thumbnail buffers, keyed by person slug.
const avatarsBySlug = await fetchAndMaterializePersonAvatars(
photoIdBySlug,
opts.sourceHost,
fetchOpts,
log,
warnings,
);

// -------------------------------------------------------------------------
// 3. One atomic gitsheets transaction:
// - clear() each importer-owned sheet (deletes capture for free)
Expand All @@ -480,13 +497,33 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
},
},
async (tx) => {
if (publicRepo === null) {
throw new Error('[import-laddr] internal: publicRepo not opened');
}
const hologit = publicRepo.hologitRepo;

log(`[import] clear + upsert tags (${tags.length})`);
await tx.tags.clear();
for (const t of tags) await tx.tags.upsert(t);

log(`[import] clear + upsert people (${people.length})`);
log(`[import] clear + upsert people (${people.length}, avatars: ${avatarsBySlug.size})`);
await tx.people.clear();
for (const p of people) await tx.people.upsert(p);
for (const p of people) {
const avatar = avatarsBySlug.get(p.slug);
if (avatar) {
// Mirror POST /api/people/:slug/avatar: store original + 128 thumb
// as attachments and point avatarKey at the conventional path.
const originalBlob = await BlobObject.write(hologit, avatar.original as unknown as string);
const thumbnailBlob = await BlobObject.write(hologit, avatar.thumbnail as unknown as string);
await tx.people.setAttachments(p, {
'avatar.jpg': originalBlob,
'avatar-128.jpg': thumbnailBlob,
});
await tx.people.upsert({ ...p, avatarKey: `people/${p.slug}/avatar.jpg` });
} else {
await tx.people.upsert(p);
}
}

log(`[import] clear + upsert projects (${projects.length})`);
await tx.projects.clear();
Expand Down Expand Up @@ -540,10 +577,6 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
`[import] clear + upsert blog-posts (${blogTranslations.length}) + media attachments`,
);
await tx['blog-posts'].clear();
if (publicRepo === null) {
throw new Error('[import-laddr] internal: publicRepo not opened');
}
const hologit = publicRepo.hologitRepo;
for (const { record } of blogTranslations) {
const artifacts = mediaArtifactsBySlug.get(record.slug) ?? [];
if (artifacts.length > 0) {
Expand Down Expand Up @@ -839,6 +872,61 @@ async function fetchMediaBytes(
}
}

/**
* Fetch each person's laddr photo (`/media/<PrimaryPhotoID>`) and process it
* into a square original + 128px thumbnail (the same outputs the avatar-upload
* route produces). Returns a map of person slug → buffers for the transact
* callback to wire in via setAttachments. Failed fetches/decodes are skipped
* with a warning — the person still imports, just without an avatar.
*
* Concurrency 4, matching the blog-media fetcher's politeness compromise.
*/
async function fetchAndMaterializePersonAvatars(
photoIdBySlug: Map<string, number>,
sourceHost: string,
fetchOpts: FetchOptions,
log: (msg: string) => void,
warnings: Warnings,
): Promise<Map<string, { original: Buffer; thumbnail: Buffer }>> {
const fetchImpl = fetchOpts.fetchImpl ?? fetch;
const userAgent = fetchOpts.userAgent ?? 'cfp-importer/dev';
const entries = [...photoIdBySlug.entries()];
const out = new Map<string, { original: Buffer; thumbnail: Buffer }>();
if (entries.length === 0) return out;

log(`[import] fetching ${entries.length} person avatars`);

const CONCURRENCY = 4;
let cursor = 0;
const workers: Promise<void>[] = [];
for (let w = 0; w < CONCURRENCY; w++) {
workers.push(
(async () => {
while (true) {
const idx = cursor++;
if (idx >= entries.length) return;
const [slug, photoId] = entries[idx]!;
const url = `https://${sourceHost}/media/${photoId}`;
const fetched = await fetchMediaBytes(url, fetchImpl, userAgent);
if (fetched === null) {
warnings.push(`[people] avatar fetch failed: ${url} (/${slug})`);
continue;
}
try {
const processed = await processAvatar(fetched.bytes);
out.set(slug, { original: processed.original, thumbnail: processed.thumbnail });
} catch (err) {
warnings.push(`[people] avatar decode failed for /${slug} (${url}): ${describe(err)}`);
}
}
})(),
);
}
await Promise.all(workers);
log(`[import] processed ${out.size}/${entries.length} person avatars`);
return out;
}

/**
* Fetch every distinct media asset referenced across all blog posts,
* derive the final filename per asset, then rewrite each post's body to
Expand Down
2 changes: 2 additions & 0 deletions apps/api/scripts/import-laddr/json-fetcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ export const RawPersonSchema = z
AccountLevel: z.string().nullable().optional(),
Newsletter: z.union([z.boolean(), z.number(), z.string()]).nullable().optional(),
Twitter: z.string().nullable().optional(),
/** Emergence Media ID of the person's photo; fetch at `/media/<id>`. */
PrimaryPhotoID: z.number().int().nullable().optional(),
Created: z.number().int().nullable().optional(),
Modified: z.number().int().nullable().optional(),
/** Present when `?include=Tags` */
Expand Down
48 changes: 48 additions & 0 deletions apps/api/tests/import-laddr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,23 @@ function makeFetch(routes: MockRoutes): typeof fetch {
return new Response('Not found', { status: 404 });
}
const body = queue.shift()!;
// Binary routes (e.g. /media/<id>) queue a Buffer — serve it raw.
if (Buffer.isBuffer(body)) {
return new Response(body, { status: 200, headers: { 'content-type': 'image/png' } });
}
return new Response(JSON.stringify(body), {
status: 200,
headers: { 'content-type': 'application/json' },
});
}) as typeof fetch;
}

/** Smallest valid PNG (1×1) — sharp can decode it, so processAvatar works. */
const TINY_PNG = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
'base64',
);

function envelope(rows: unknown[], total: number, limit: number, offset: number) {
return {
success: true,
Expand Down Expand Up @@ -1009,6 +1019,44 @@ describe('importLaddrFromJson — orchestrator', () => {
}
});

it('imports a person avatar from /media/<PrimaryPhotoID> and sets avatarKey', async () => {
const { path: repo, cleanup } = await makeRepo();
try {
const routes = mockRoutes();
// Give alice a photo + serve it as a tiny PNG at /media/555.
const peopleResp = routes.responses.get(
'/people?format=json&include=Tags&limit=200&offset=0',
) as Array<{ data: Array<Record<string, unknown>> }>;
peopleResp[0]!.data[0]!.PrimaryPhotoID = 555;
routes.responses.set('/media/555?', [TINY_PNG]);

const report = await importLaddrFromJson({
sourceHost: 'example.test',
dataRepo: repo,
branch: 'legacy-import',
initialParent: 'empty',
now: '2026-05-18T00:00:00.000Z',
delayMs: 0,
pageSize: 200,
fetchImpl: makeFetch(routes),
});
expect(report.commitHash).not.toBeNull();

const tree = await exec('git', ['ls-tree', '-r', '--name-only', 'HEAD'], { cwd: repo });
const paths = tree.stdout.split('\n').filter(Boolean);
// alice has a photo → original + thumbnail attachments
expect(paths).toContain('people/alice/avatar.jpg');
expect(paths).toContain('people/alice/avatar-128.jpg');
// bob has no photo → no avatar attachments
expect(paths).not.toContain('people/bob/avatar.jpg');

const aliceToml = await exec('git', ['show', 'HEAD:people/alice.toml'], { cwd: repo });
expect(aliceToml.stdout).toContain('avatarKey = "people/alice/avatar.jpg"');
} finally {
await cleanup();
}
});

it('is idempotent: re-running on identical mock data makes no new commit', async () => {
const { path: repo, cleanup } = await makeRepo();
try {
Expand Down
69 changes: 69 additions & 0 deletions plans/import-person-avatars.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
status: done
depends: []
specs: []
issues:
- 130
pr: 143
---

# Plan: import legacy person avatars

## Scope

Leadership feedback (#130): legacy users show initials where their codeforphilly.org
photo used to be. The importer brought blog-post media but never person avatars,
so imported people had `avatarKey: null`.

A spike against the live laddr API found the source: `person.PrimaryPhotoID` →
the image at `GET /media/<id>` (confirmed 200, image/jpeg). Projects have **no**
image field in laddr, so this is person avatars only.

What ships:

- **json-fetcher**: `RawPersonSchema` now parses `PrimaryPhotoID`.
- **importer**: for each person with a `PrimaryPhotoID`, fetch `/media/<id>`,
run it through the existing `processAvatar` (square original + 128px thumb),
store both as gitsheets attachments (`avatar.jpg` + `avatar-128.jpg`) and set
`avatarKey = people/<slug>/avatar.jpg` — exactly the convention the avatar
upload route uses. Reuses the proven `fetchMediaBytes` + `BlobObject.write` +
`setAttachments` machinery (same as blog media), concurrency 4.

## Implements

# 130. No spec change — the avatar storage contract (api/people.md, behaviors/

storage.md attachments) already exists; this just populates it at import time.

## Approach

`fetchAndMaterializePersonAvatars(photoIdBySlug, sourceHost, …)` mirrors
`fetchAndMaterializeBlogMedia`: parallel fetch + `processAvatar`, returning
slug → {original, thumbnail}. The transact's people loop wires the attachments

- `avatarKey` for people that have one; failed fetches/decodes are skipped with
a warning (the person still imports). `hologit` hoisted to the transact top
(shared by people + blog attachment writes).

## Validation

- [x] `RawPersonSchema` parses `PrimaryPhotoID`.
- [x] Importer test: a person with `PrimaryPhotoID` gets `people/<slug>/avatar.jpg`
+ `avatar-128.jpg` attachments and `avatarKey` set; a person without one
gets neither. (import-laddr 37/37.)
- [x] `npm run type-check && npm run lint` clean.

## Risks

- Fetch volume: one image per photo-bearing person. Concurrency-capped at 4 and
failures are non-fatal, matching blog media. `--limit` bounds it for testing.
- Many photo-bearing accounts are spam — but the spam-prune (#133) removes them
downstream, so net imported avatars skew to real members.

## Notes

## Follow-ups

- The first photo-bearing accounts sampled in the spike were spam; harmless
(pruned later), but a reminder that import → prune ordering matters (already
documented in spam-detection.md / cutover.md).