diff --git a/.gitignore b/.gitignore
index 6cf9326..2a29aaf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,11 @@
 
 # Testing artefacts
 .temp-profile
+tests/.env
+tests/.env.local
+tests/.compare-summary.txt
+__pycache__/
+*.pyc
 
 # logs
 geckodriver.log
diff --git a/js/lib.js b/js/lib.js
index e38430e..518a6fa 100644
--- a/js/lib.js
+++ b/js/lib.js
@@ -57,6 +57,16 @@ class MissingMappedField {
     toString() {
         return `${this.value}`;
     }
+
+    // Mirror 4CAT's API serialization so JSON.stringify produces the same
+    // tagged form on both sides: 4CAT's /api/dataset/<key>/items/ endpoint,
+    // when called with `missing_fields=keep`, emits missing values as
+    // `{ __missing: true, value: <fallback> }`. Matching that shape here
+    // lets the map_item comparator deep-equal both sides without special
+    // handling.
+    toJSON() {
+        return { __missing: true, value: this.value };
+    }
 }
 
 /**
diff --git a/modules/9gag.js b/modules/9gag.js
index a2d8bc5..213e798 100644
--- a/modules/9gag.js
+++ b/modules/9gag.js
@@ -40,4 +40,70 @@ export function capture(response, source_platform_url, source_url) {
     }
 
     return data["data"]["posts"];
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/ninegag/search_9gag.py)
+export function map_item(post) {
+    // Convert Unix timestamp (seconds) to Date object
+    const postTimestampSec = post.creationTs;
+    const postTimestamp = new Date(postTimestampSec * 1000);
+
+    // Select the highest‑resolution image that is not a video
+    const images = Object.values(post.images ?? {});
+    const imageCandidates = images.filter(v => !('hasAudio' in v));
+    imageCandidates.sort((a, b) => (b.width * b.height) - (a.width * a.height));
+    const image = imageCandidates[0] ?? {};
+
+    // Select the highest‑resolution video (if any) and pick the best URL format
+    const videoCandidates = images.filter(v => ('hasAudio' in v));
+    videoCandidates.sort((a, b) => (b.width * b.height) - (a.width * a.height));
+    let videoUrl = "";
+    if (videoCandidates.length) {
+        const vid = videoCandidates[0];
+        if (vid.av1Url) videoUrl = vid.av1Url;
+        else if (vid.h265Url) videoUrl = vid.h265Url;
+        else if (vid.vp9Url) videoUrl = vid.vp9Url;
+        else if (vid.vp8Url) videoUrl = vid.vp8Url;
+    }
+
+    // Handle anonymous posts – they appear as the user "9GAGGER"
+    if (!post.creator) {
+        post.creator = {
+            username: "9GAGGER",
+            fullName: "",
+            emojiStatus: "",
+            isVerifiedAccount: ""
+        };
+    }
+
+    return new MappedItem({
+        collected_from_url: normalize_url_encoding(post.__import_meta?.source_platform_url ?? ""),
+        id: post.id,
+        url: post.url,
+        subject: post.title,
+        body: post.description,
+        timestamp: formatUtcTimestamp(postTimestampSec),
+        author: post.creator?.username ?? "",
+        author_name: post.creator?.fullName ?? "",
+        author_status: post.creator?.emojiStatus ?? "",
+        author_verified: post.creator?.isVerifiedAccount ? "yes" : "no",
+        type: post.type,
+        image_url: image.url ?? "",
+        video_url: videoUrl,
+        is_nsfw: post.nsfw === 0 ? "no" : "yes",
+        is_promoted: post.promoted === 0 ? "no" : "yes",
+        is_vote_masked: post.isVoteMasked === 0 ? "no" : "yes",
+        is_anonymous: !post.isAnonymous ? "no" : "yes",
+        source_domain: post.sourceDomain,
+        source_url: post.sourceUrl,
+        upvotes: post.upVoteCount,
+        downvotes: post.downVoteCount,
+        score: (post.upVoteCount ?? 0) - (post.downVoteCount ?? 0),
+        comments: post.commentsCount,
+        tags: (post.tags ?? []).map(t => t.key).join(","),
+        tags_annotated: (post.annotationTags ?? []).join(","),
+        unix_timestamp: postTimestampSec
+    });
+}
+// === end auto-generated ===
diff --git a/modules/_loader.js b/modules/_loader.js
index afae2d7..ceb0080 100644
--- a/modules/_loader.js
+++ b/modules/_loader.js
@@ -1,3 +1,8 @@
+// Load-order dependency: `wrap_for_map_item` (used below) is a free global
+// defined in js/lib.js, which manifest.json loads as a plain background
+// script before this module. There is no import for it here on purpose —
+// MV2 background scripts share one global scope. If lib.js stops being
+// loaded first, the mapper wrapper below will ReferenceError.
 async function load() {
     const imported_modules = [
         await import("./tiktok.js"),
diff --git a/modules/douyin.js b/modules/douyin.js
index ef811d9..bb33e3a 100644
--- a/modules/douyin.js
+++ b/modules/douyin.js
@@ -339,4 +339,268 @@ export function capture(response, source_platform_url, source_url) {
     } else {
         // console.log("Detected expected object(s) by no usable items found")
     }
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/douyin/search_douyin.py)
+function getChineseNumber(num) {
+    if (typeof num === "number") {
+        return num;
+    }
+    if (typeof num !== "string") {
+        return 0;
+    }
+    if (num.includes("万")) {
+        const cleaned = num.replace(/[^0-9.]/g, "");
+        return parseFloat(cleaned) * 10000;
+    }
+    const cleaned = num.replace(/[^0-9.]/g, "");
+    return cleaned ? parseInt(cleaned, 10) : 0;
+}
+
+export function map_item(item) {
+    // Helper to safely access nested properties
+    const get = (obj, path, def) => {
+        return path.reduce((o, p) => (o && o[p] != null ? o[p] : undefined), obj) ?? def;
+    };
+
+    const metadata = item["__import_meta"] ?? {};
+    let subject = "Post";
+    let stream_data = {};
+    let post_timestamp;
+    let video_url = "";
+    let video_thumbnail = "";
+    let video_description = "";
+    let duration = "Unknown";
+    let prevent_download = null;
+    let stats = {};
+    let author = {};
+    let video_tags = "";
+    let aweme_id_key, group_id_key, text_extra_key, hashtag_key, mention_key, author_id_key;
+    let mix_info_key, mix_id_key, mix_name_key;
+    let author_sec_key, avatar_thumb_key, url_list_key, is_fake_key;
+
+    if (item["ZS_collected_from_embed"]) {
+        // Embedded HTML format
+        if (item["cellRoom"] && item["cellRoom"] !== "$undefined") {
+            stream_data = item["cellRoom"]["rawdata"] ?? {};
+        }
+        if (Object.keys(stream_data).length) {
+            // Stream embedded
+            subject = "Stream";
+            const createtime = stream_data["createtime"] ?? (item["requestTime"] ? item["requestTime"] / 1000 : undefined);
+            post_timestamp = new Date((createtime ?? 0) * 1000);
+            video_url = stream_data["stream_url"]?.["flv_pull_url"]?.["FULL_HD1"] ?? "";
+            video_thumbnail = stream_data["video"]?.["cover"] ?? null;
+            video_description = stream_data["title"] ?? "";
+            duration = "Unknown";
+            stats = stream_data["stats"] ?? {};
+            author = stream_data["owner"] ?? {};
+            author_sec_key = "sec_uid";
+            avatar_thumb_key = "avatar_thumb";
+            url_list_key = "url_list";
+            is_fake_key = "is_ad_fake";
+        } else {
+            // Regular post embedded
+            post_timestamp = new Date(item["createTime"] * 1000);
+            const videos_list = item["video"]?.["bitRateList"];
+            if (videos_list) {
+                const videos = [...videos_list].sort((a, b) => (b["bitRate"] ?? 0) - (a["bitRate"] ?? 0));
+                video_url = "https" + (videos[0]["playApi"] ?? "");
+            } else {
+                video_url = "";
+            }
+            video_thumbnail = item["video"]?.["cover"] ?? null;
+            video_description = item["desc"] ?? "";
+            duration = item["duration"] ?? item["video"]?.["duration"] ?? "Unknown";
+            prevent_download = item["download"]?.["prevent"] ? "yes" : "no";
+            stats = item["stats"] ?? {};
+            author = item["authorInfo"] ?? {};
+            author_sec_key = "secUid";
+            avatar_thumb_key = "avatarThumb";
+            url_list_key = "urlList";
+            is_fake_key = "isAdFake";
+        }
+        // Embedded keys (same for both branches)
+        aweme_id_key = "awemeId";
+        group_id_key = "groupId";
+        text_extra_key = "textExtra";
+        hashtag_key = "hashtagName";
+        mention_key = "secUid";
+        author_id_key = "authorUserId";
+        mix_info_key = "mixInfo";
+        mix_id_key = "mixId";
+        mix_name_key = "mixName";
+        // Stats (may be MissingMappedField)
+        const collect_count = stats["collectCount"] ?? new MissingMappedField("Unknown");
+        const comment_count = stats["commentCount"] ?? new MissingMappedField("Unknown");
+        const digg_count = stats["diggCount"] ?? new MissingMappedField("Unknown");
+        const download_count = stats["downloadCount"] ?? new MissingMappedField("Unknown");
+        const forward_count = stats["forwardCount"] ?? new MissingMappedField("Unknown");
+        const play_count = stats["playCount"] ?? new MissingMappedField("Unknown");
+        const share_count = stats["shareCount"] ?? new MissingMappedField("Unknown");
+        // Video tags (guess)
+        video_tags = (item["videoTag"] ?? []).filter(t => t["tagName"]).map(t => t["tagName"]).join(",");
+        const mix_current_episode = (item[mix_info_key] ?? {})["currentEpisode"] ?? "N/A";
+        // Build result later – keep intermediate values in closure variables
+        var __embed_collect_count = collect_count;
+        var __embed_comment_count = comment_count;
+        var __embed_digg_count = digg_count;
+        var __embed_download_count = download_count;
+        var __embed_forward_count = forward_count;
+        var __embed_play_count = play_count;
+        var __embed_share_count = share_count;
+        var __embed_mix_current_episode = mix_current_episode;
+    } else {
+        // Non‑embedded JSON format
+        stream_data = item["rawdata"] ?? item["cell_room"]?.["rawdata"];
+        if (stream_data) {
+            // Stream (may be a JSON string)
+            if (typeof stream_data === "string") {
+                try { stream_data = JSON.parse(stream_data); } catch (e) { /* ignore */ }
+            }
+            subject = "Stream";
+            const create_time = stream_data["create_time"] ?? item["create_time"] ?? (metadata["timestamp_collected"] ? metadata["timestamp_collected"] / 1000 : undefined);
+            post_timestamp = new Date((create_time ?? 0) * 1000);
+            video_url = stream_data["stream_url"]?.["flv_pull_url"]?.["FULL_HD1"] ?? "";
+            video_thumbnail = stream_data["video"]?.["cover"] ?? null;
+            video_description = stream_data["title"] ?? "";
+            duration = "Unknown";
+            author = stream_data["owner"] ?? {};
+            video_tags = stream_data["video_feed_tag"] ?? "";
+            stats = stream_data["stats"] ?? {};
+        } else {
+            // Regular post
+            post_timestamp = new Date(item["create_time"] * 1000);
+            const videos_list = item["video"]?.["bit_rate"];
+            if (!videos_list) {
+                video_url = "";
+                video_thumbnail = "";
+            } else {
+                const videos = [...videos_list].sort((a, b) => (b["bit_rate"] ?? 0) - (a["bit_rate"] ?? 0));
+                video_url = videos[0]["play_addr"]?.["url_list"]?.[0] ?? "";
+                video_thumbnail = item["video"]?.["cover"]?.["url_list"]?.[0] ?? "";
+            }
+            video_description = item["desc"] ?? "";
+            duration = item["duration"] ?? item["video"]?.["duration"] ?? "Unknown";
+            author = item["author"] ?? {};
+            stats = item["statistics"] ?? {};
+        }
+        prevent_download = ("prevent_download" in item) ? (item["prevent_download"] ? "yes" : "no") : null;
+        // Keys for non‑embedded format
+        aweme_id_key = "aweme_id";
+        group_id_key = "group_id";
+        text_extra_key = "text_extra";
+        hashtag_key = "hashtag_name";
+        mention_key = "sec_uid";
+        author_id_key = "author_user_id";
+        mix_info_key = "mix_info";
+        mix_id_key = "mix_id";
+        mix_name_key = "mix_name";
+        author_sec_key = "sec_uid";
+        avatar_thumb_key = "avatar_thumb";
+        url_list_key = "url_list";
+        is_fake_key = "is_ad_fake";
+        // Stats (may be MissingMappedField)
+        const collect_count = stats ? (stats["collect_count"] ?? null) : new MissingMappedField("Unknown");
+        const comment_count = stats ? (stats["comment_count"] ?? null) : new MissingMappedField("Unknown");
+        const digg_count = stats ? (stats["digg_count"] ?? null) : new MissingMappedField("Unknown");
+        const download_count = stats ? (stats["download_count"] ?? null) : new MissingMappedField("Unknown");
+        const forward_count = stats ? (stats["forward_count"] ?? null) : new MissingMappedField("Unknown");
+        const play_count = stats ? (stats["play_count"] ?? null) : new MissingMappedField("Unknown");
+        const share_count = stats ? (stats["share_count"] ?? null) : new MissingMappedField("Unknown");
+        // Video tags list
+        video_tags = (item["video_tag"] ?? []).filter(t => t["tag_name"]).map(t => t["tag_name"]).join(",");
+        const mix_current_episode = item[mix_info_key] ? (item[mix_info_key]["statis"]?.["current_episode"] ?? "N/A") : "N/A";
+        var __embed_collect_count = collect_count;
+        var __embed_comment_count = comment_count;
+        var __embed_digg_count = digg_count;
+        var __embed_download_count = download_count;
+        var __embed_forward_count = forward_count;
+        var __embed_play_count = play_count;
+        var __embed_share_count = share_count;
+        var __embed_mix_current_episode = mix_current_episode;
+    }
+
+    // Stream stats (common)
+    const count_total_streams_viewers = stats["total_user"] ?? "N/A";
+    const count_current_stream_viewers = ("user_count_str" in stats) ? getChineseNumber(stats["user_count_str"]) : "N/A";
+
+    // Displayed flag for mix items
+    let displayed = true;
+    if (item["ZS_collected_from_mix"] && !item["ZS_first_mix_vid"]) {
+        displayed = false;
+    }
+
+    // Image URLs
+    const image_urls = [];
+    if (Array.isArray(item["images"])) {
+        for (const img of item["images"]) {
+            if (Array.isArray(img["url_list"])) {
+                image_urls.push(img["url_list"][0]);
+            } else if (Array.isArray(img["urlList"])) {
+                image_urls.push(img["urlList"][0]);
+            }
+        }
+    }
+
+    // Music fields
+    const music_obj = item["music"];
+    const music_author = (music_obj && music_obj !== "$undefined") ? (music_obj["author"] ?? "") : "";
+    const music_title = (music_obj && music_obj !== "$undefined") ? (music_obj["title"] ?? "") : "";
+    const music_url = (music_obj && music_obj !== "$undefined") ? (music_obj["play_url"]?.["uri"] ?? "") : "";
+
+    // Collection / Mix handling
+    let mix_current_episode = __embed_mix_current_episode;
+    if (mix_current_episode === "$undefined") mix_current_episode = "N/A";
+    const collection_id_raw = item[mix_info_key]?.[mix_id_key] ?? "N/A";
+    const collection_id = collection_id_raw === "$undefined" ? "N/A" : collection_id_raw;
+    const collection_name_raw = item[mix_info_key]?.[mix_name_key] ?? "N/A";
+    const collection_name = collection_name_raw === "$undefined" ? "N/A" : collection_name_raw;
+    const part_of_collection = (item[mix_info_key] && (mix_id_key in item[mix_info_key]) && collection_id !== "N/A") ? "yes" : "no";
+
+    // Build the mapped item
+    return new MappedItem({
+        "collected_from_url": normalize_url_encoding(metadata["source_platform_url"] ?? ""),
+        "id": item[aweme_id_key],
+        "thread_id": item[group_id_key],
+        "subject": subject,
+        "body": video_description,
+        "timestamp": formatUtcTimestamp(Math.floor(post_timestamp.getTime() / 1000)),
+        "post_url": subject === "Post" ? `https://www.douyin.com/video/${item[aweme_id_key]}` : `https://live.douyin.com/${author["web_rid"]}`,
+        "region": item["region"] ?? "",
+        "hashtags": (item[text_extra_key] ?? []).filter(t => t[hashtag_key]).map(t => t[hashtag_key]).join(","),
+        "mentions": (item[text_extra_key] ?? []).filter(t => t[mention_key]).map(t => `https://www.douyin.com/user/${t[mention_key]}`).join(","),
+        "video_tags": video_tags,
+        "prevent_download": prevent_download,
+        "video_url": video_url,
+        "video_thumbnail": video_thumbnail,
+        "video_duration": duration,
+        "image_urls": image_urls.join(","),
+        "music_author": music_author,
+        "music_title": music_title,
+        "music_url": music_url,
+        "collect_count": __embed_collect_count,
+        "comment_count": __embed_comment_count,
+        "digg_count": __embed_digg_count,
+        "download_count": __embed_download_count,
+        "forward_count": __embed_forward_count,
+        "play_count": __embed_play_count,
+        "share_count": __embed_share_count,
+        "count_total_streams_viewers": count_total_streams_viewers,
+        "count_current_stream_viewers": count_current_stream_viewers,
+        "author_user_id": item[author_id_key] ?? (author["uid"] ?? author["id"]),
+        "author_nickname": author["nickname"] ?? "",
+        "author_profile_url": `https://www.douyin.com/user/${author[author_sec_key]}`,
+        "author_thumbnail_url": author[avatar_thumb_key]?.[url_list_key]?.[0] ?? "",
+        "author_region": author["region"] ?? null,
+        "author_is_ad_fake": author[is_fake_key] ?? null,
+        "part_of_collection": part_of_collection,
+        "4CAT_first_video_displayed": displayed ? "yes" : "no",
+        "collection_id": collection_id,
+        "collection_name": collection_name,
+        "place_in_collection": mix_current_episode,
+        "unix_timestamp": Math.floor(post_timestamp.getTime() / 1000)
+    });
+}
+// === end auto-generated ===
diff --git a/modules/gab.js b/modules/gab.js
index a5eab6d..9e8b4e2 100644
--- a/modules/gab.js
+++ b/modules/gab.js
@@ -72,4 +72,112 @@ export function capture(response, source_platform_url, source_url) {
         }
     }
     return items;
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/gab/search_gab.py)
+export function map_item(item) {
+    const unknownData = [];
+
+    const postId = item['i'] ?? item['id'];
+    const metadata = item['__import_meta'] ?? {};
+
+    let collectedAt;
+    if (metadata['timestamp_collected'] != null) {
+        const ts = metadata['timestamp_collected'] / 1000;
+        collectedAt = formatUtcTimestamp(ts);
+    } else {
+        collectedAt = new MissingMappedField('Unknown');
+    }
+
+    const reactions = item['rc'] ?? item['reactions_counts'];
+    let reactionCount;
+    if (typeof reactions === 'number') {
+        reactionCount = reactions;
+    } else {
+        reactionCount = Object.values(reactions ?? {}).reduce((sum, val) => sum + (val ?? 0), 0);
+    }
+
+    const group = item['g'] ?? item['group'] ?? null;
+    const author = item['author_info'] ?? item['account'] ?? null;
+    const mentions = item['m'] ?? item['mentions'] ?? [];
+    const tags = item['tg'] ?? item['tags'] ?? [];
+    const card = item['card'] ?? item['link'] ?? null;
+    const mediaItems = item['image_info'] ?? item['media_attachments'] ?? [];
+
+    const imageUrls = [];
+    const videoUrls = [];
+
+    for (const media of mediaItems) {
+        const type = media['t'] ?? media['type'];
+        if (type === 'image') {
+            const url = media['u'] ?? media['url'];
+            if (url == null) {
+                unknownData.push(`Media missing URL: ${url}`);
+            } else {
+                imageUrls.push(url);
+            }
+        } else if (type === 'video') {
+            const url = media['smp4'] ?? media['source_mp4'];
+            if (url == null) {
+                unknownData.push(`Media missing URL: ${url}`);
+            } else {
+                videoUrls.push(url);
+            }
+        } else {
+            unknownData.push(`Unknown media type: ${JSON.stringify(media)}`);
+        }
+    }
+
+    const createdAtRaw = item['ca'] ?? item['created_at'];
+    const postDate = new Date(createdAtRaw);
+    const postTimeStr = formatUtcTimestamp(postDate.getTime() / 1000);
+
+    const mappedItem = {
+        collected_at: collectedAt,
+        collected_from_url: normalize_url_encoding(metadata['source_platform_url'] ?? ''),
+        id: postId,
+        created_at: postTimeStr,
+        body: item['c'] ?? item['content'],
+        url: item['ul'] ?? item['url'],
+        reaction_count: reactionCount,
+        favourites_count: item['fbc'] ?? item['favourites_count'] ?? null,
+        replies_count: item['rc'] ?? item['replies_count'],
+        reblogs_count: item['rbc'] ?? item['reblogs_count'],
+        mentions: mentions.map(m => m['username']).join(','),
+        tags: tags.map(t => t['name']).join(','),
+
+        group_id: group ? group['id'] ?? null : null,
+        group_title: group ? group['title'] ?? null : null,
+        group_description: group ? group['description'] ?? null : null,
+        group_member_count: group ? group['member_count'] ?? null : null,
+        group_is_private: group ? group['is_private'] ?? null : null,
+        group_url: group ? group['url'] ?? null : null,
+        group_created_at: group ? group['created_at'] ?? null : null,
+
+        account_id: author ? (author['i'] ?? author['id']) : null,
+        account_username: author ? (author['un'] ?? author['username']) : null,
+        account_account: author ? (author['ac'] ?? author['acct']) : null,
+        account_display_name: author ? (author['dn'] ?? author['display_name']) : null,
+        account_note: author ? (author['nt'] ?? author['note']) : null,
+
+        link_id: card ? card['id'] ?? null : null,
+        link_url: card ? card['url'] ?? null : null,
+        link_title: card ? card['title'] ?? null : null,
+        link_description: card ? card['description'] ?? null : null,
+        link_type: card ? card['type'] ?? null : null,
+        link_image: card ? card['image'] ?? null : null,
+
+        image_urls: imageUrls.join(','),
+        video_urls: videoUrls.join(','),
+
+        thread_id: item['i'] ?? item['conversation_id'],
+        timestamp: postTimeStr
+    };
+
+    if (unknownData.length) {
+        return new MappedItem(mappedItem, {message: unknownData.join('')});
+    }
+    return new MappedItem(mappedItem);
+}
+// === end auto-generated ===
diff --git a/modules/imgur.js b/modules/imgur.js
index 9cc662b..3d37892 100644
--- a/modules/imgur.js
+++ b/modules/imgur.js
@@ -30,4 +30,44 @@ export function capture(response, source_platform_url, source_url) {
     }
 
     return data["posts"];
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/imgur/search_imgur.py)
+export function map_item(item) {
+    // Parse created_at timestamp (ISO 8601) to Unix seconds
+    const createdAt = item.created_at;
+    if (!createdAt) {
+        throw new MapItemException('Missing created_at field');
+    }
+    const unix_timestamp = Math.floor(Date.parse(createdAt) / 1000);
+    const timestamp = formatUtcTimestamp(unix_timestamp);
+
+    const collected_from_url = normalize_url_encoding(item.__import_meta?.source_platform_url ?? "");
+
+    return new MappedItem({
+        collected_from_url,
+        id: item.id,
+        subject: item.title,
+        body: item.description,
+        timestamp,
+        author: item.account_id,
+        type: item.cover?.type,
+        media_url: item.cover?.url,
+        post_url: item.url,
+        album_media: item.image_count,
+        is_ad: item.is_ad ? "yes" : "no",
+        is_album: item.is_album ? "yes" : "no",
+        is_mature: item.is_mature ? "yes" : "no",
+        is_viral: item.in_most_viral ? "yes" : "no",
+        views: item.view_count,
+        upvotes: item.upvote_count,
+        downvotes: item.downvote_count,
+        score: item.point_count,
+        comments: item.comment_count,
+        favourites: item.favorite_count,
+        virality_score: item.virality,
+        unix_timestamp,
+    });
+}
+// === end auto-generated ===
diff --git a/modules/instagram.js b/modules/instagram.js
index f14e6ef..b621b96 100644
--- a/modules/instagram.js
+++ b/modules/instagram.js
@@ -500,4 +500,396 @@ function extractEmbeddedInstagramJSON(response) {
     }
 
     return datas;
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/instagram/search_instagram.py)
+const MEDIA_TYPE_PHOTO = 1;
+const MEDIA_TYPE_VIDEO = 2;
+const MEDIA_TYPE_CAROUSEL = 8;
+
+const HASHTAG_REGEX = /#([^\s!@#$%ˆ&*()_+{}:"|<>?\[\];'\,./`~'‘’]+)/g;
+
+function extractHashtags(caption) {
+    if (caption instanceof MissingMappedField) {
+        return "";
+    }
+    const matches = [...caption.matchAll(HASHTAG_REGEX)];
+    return matches.map(m => m[1]).join(",");
+}
+
+function parsePolarisItem(node) {
+    const partial_item = node._zs_partial ?? false;
+    const collected_at = new MissingMappedField(0);
+    const unix_at = new MissingMappedField(0);
+    let caption;
+    if (!('caption' in node)) {
+        caption = new MissingMappedField("");
+    } else if (!node.caption) {
+        caption = "";
+    } else {
+        caption = node.caption.text;
+    }
+
+    const user = node.user;
+    const owner = node.owner;
+    if (user && owner) {
+        if (owner.id === user.id) {
+            // prefer user
+        } else if (user.username !== owner.username) {
+            throw new MapItemException(`Unable to parse item: different user and owner`);
+        }
+    }
+    const is_verified = ("is_verified" in user && user.is_verified != null) ? user.is_verified : new MissingMappedField(false);
+
+    const typeMap = {"XIGPolarisPhotoMedia": "photo", "XIGPolarisVideoMedia": "video"};
+    const media_type = typeMap[node.__typename] ?? "unknown";
+    const num_media = node.__typename !== "XIGPolarisCarouselMedia" ? 1 : (node.carousel_media?.length ?? 0);
+
+    const display_urls = node.display_uri ?? new MissingMappedField("");
+    const missing_media = null;
+    let media_urls;
+    if ("video_versions" in node) {
+        media_urls = node.video_versions[0]?.url ?? new MissingMappedField("");
+    } else {
+        media_urls = new MissingMappedField("");
+    }
+
+    return {
+        "collected_from_url": normalize_url_encoding(node.__import_meta?.source_platform_url),
+        "collected_from_view": node._zs_instagram_view ?? "",
+        "partial_item": partial_item,
+        "id": node.code,
+        "timestamp": collected_at,
+        "thread_id": node.code,
+        "parent_id": node.code,
+        "url": "https://www.instagram.com/p/" + node.code,
+        "body": caption,
+
+        "author_id": user?.id ?? owner?.id ?? new MissingMappedField(""),
+        "author": user?.username ?? owner?.username ?? new MissingMappedField(""),
+        "author_fullname": user?.full_name ?? owner?.full_name ?? new MissingMappedField(""),
+        "verified": is_verified,
+        "author_avatar_url": user?.profile_pic_url ?? owner?.profile_pic_url ?? new MissingMappedField(""),
+
+        "coauthors": new MissingMappedField(""),
+        "coauthor_fullnames": new MissingMappedField(""),
+        "coauthor_ids": new MissingMappedField(""),
+
+        "media_type": media_type,
+        "num_media": num_media,
+        "image_urls": display_urls,
+        "media_urls": media_urls,
+
+        "hashtags": extractHashtags(caption),
+        "usertags": new MissingMappedField(""),
+        "play_count": node.play_count ?? new MissingMappedField(0),
+
+        "likes_hidden": new MissingMappedField(""),
+        "num_likes": new MissingMappedField(0),
+        "num_comments": new MissingMappedField(0),
+
+        "location_name": new MissingMappedField(""),
+        "location_id": new MissingMappedField(""),
+        "location_latlong": new MissingMappedField(""),
+        "location_city": new MissingMappedField(""),
+
+        "unix_timestamp": unix_at,
+        "missing_media": missing_media
+    };
+}
+
+function parseGraphItem(node) {
+    let caption;
+    try {
+        caption = node.edge_media_to_caption.edges[0].node.text;
+    } catch (e) {
+        caption = new MissingMappedField("");
+    }
+
+    const num_media = node.__typename !== "GraphSidecar" ? 1 : (node.edge_sidecar_to_children?.edges?.length ?? 0);
+
+    let media_node;
+    if (node.__typename === "GraphSidecar") {
+        media_node = node.edge_sidecar_to_children.edges[0].node;
+    } else {
+        media_node = node;
+    }
+
+    let media_url;
+    if (media_node.__typename === "GraphVideo") {
+        media_url = media_node.video_url ?? "";
+    } else if (media_node.__typename === "GraphImage") {
+        const resources = media_node.display_resources ?? media_node.thumbnail_resources;
+        if (resources && resources.length) {
+            media_url = resources[resources.length - 1].src;
+        } else {
+            media_url = media_node.display_url ?? "";
+        }
+    } else {
+        media_url = media_node.display_url ?? "";
+    }
+
+    const typeMap = {"GraphSidecar": "photo", "GraphVideo": "video"};
+    let media_type;
+    if (node.__typename !== "GraphSidecar") {
+        media_type = typeMap[node.__typename] ?? "unknown";
+    } else {
+        const childTypes = new Set(node.edge_sidecar_to_children.edges.map(e => e.node.__typename));
+        if (childTypes.size > 1) {
+            media_type = "mixed";
+        } else {
+            const single = childTypes.values().next().value;
+            media_type = typeMap[single] ?? "unknown";
+        }
+    }
+
+    const location = {name: "", latlong: "", city: "", location_id: ""};
+    if (node.location) {
+        location.name = node.location.name ?? "";
+        location.location_id = node.location.pk ?? "";
+        location.latlong = node.location.lat != null ? `${node.location.lat},${node.location.lng}` : "";
+        location.city = node.location.city ?? null;
+    }
+
+    const no_likes = Boolean(node.like_and_view_counts_disabled);
+    const user = node.user;
+    const owner = node.owner;
+    if (user && owner) {
+        if (owner.id === user.id) {
+            // prefer user
+        } else if (user.username !== owner.username) {
+            throw new MapItemException(`Unable to parse item: different user and owner`);
+        }
+    }
+
+    let play_count;
+    if (node.view_count != null) {
+        play_count = node.view_count;
+    } else if (node.play_count != null) {
+        play_count = node.play_count;
+    } else {
+        play_count = new MissingMappedField(0);
+    }
+
+    let usertags = "";
+    if (node.edge_media_to_tagged_user && Array.isArray(node.edge_media_to_tagged_user.edges)) {
+        usertags = node.edge_media_to_tagged_user.edges.map(e => e.node.user.username).join(",");
+    }
+
+    return {
+        "id": node.shortcode,
+        "post_source_domain": node.__import_meta?.source_platform_url,
+        "collected_from_view": node._zs_instagram_view ?? new MissingMappedField(""),
+        "partial_item": node._zs_partial ?? new MissingMappedField(""),
+        "timestamp": formatUtcTimestamp(node.taken_at_timestamp),
+        "thread_id": node.shortcode,
+        "parent_id": node.shortcode,
+        "url": "https://www.instagram.com/p/" + node.shortcode,
+        "body": caption,
+
+        "author": user?.username ?? owner?.username ?? new MissingMappedField(""),
+        "author_fullname": user?.full_name ?? owner?.full_name ?? new MissingMappedField(""),
+        "is_verified": Boolean(user?.is_verified),
+        "author_avatar_url": user?.profile_pic_url ?? owner?.profile_pic_url ?? new MissingMappedField(""),
+        "coauthors": new MissingMappedField(""),
+        "coauthor_fullnames": new MissingMappedField(""),
+        "coauthor_ids": new MissingMappedField(""),
+
+        "media_type": media_type,
+        "num_media": num_media,
+        "image_urls": node.display_url ?? "",
+        "media_urls": media_url,
+
+        "hashtags": extractHashtags(caption),
+        "usertags": usertags,
+        "play_count": play_count,
+        "likes_hidden": no_likes ? "yes" : "no",
+        "num_likes": no_likes ? new MissingMappedField(0) : (node.edge_media_preview_like?.count ?? new MissingMappedField(0)),
+        "num_comments": node.edge_media_preview_comment?.count ?? 0,
+
+        "location_name": location.name,
+        "location_id": location.location_id,
+        "location_latlong": location.latlong,
+        "location_city": location.city,
+
+        "unix_timestamp": node.taken_at_timestamp,
+        "missing_media": null
+    };
+}
+
+function parseItemlistItem(node) {
+    const partial_item = node._zs_partial ?? false;
+    const num_media = node.media_type !== MEDIA_TYPE_CAROUSEL ? 1 : (node.carousel_media?.length ?? 0);
+    let caption;
+    if (!('caption' in node)) {
+        caption = new MissingMappedField("");
+    } else if (!node.caption) {
+        caption = "";
+    } else {
+        caption = node.caption.text;
+    }
+
+    const display_urls = [];
+    const media_urls = [];
+    let missing_media = null;
+    const typeMap = { [MEDIA_TYPE_PHOTO]: "photo", [MEDIA_TYPE_VIDEO]: "video" };
+    const mediaTypesSet = new Set();
+
+    const media_nodes = node.media_type === MEDIA_TYPE_CAROUSEL ? node.carousel_media : [node];
+    for (const media_node of media_nodes) {
+        if (media_node.media_type === MEDIA_TYPE_VIDEO) {
+            if (media_node.image_versions2) {
+                display_urls.push(media_node.image_versions2.candidates[0].url);
+            } else if (media_node.video_versions) {
+                display_urls.push(media_node.video_versions[0].url);
+            } else {
+                if (!partial_item) {
+                    throw new MapItemException("Instagram item format change");
+                }
+            }
+            if (media_node.video_versions) {
+                media_urls.push(media_node.video_versions[0].url);
+            } else {
+                if (!partial_item) {
+                    throw new MapItemException("Instagram item format change");
+                }
+            }
+        } else if (media_node.media_type === MEDIA_TYPE_PHOTO && media_node.image_versions2) {
+            const media_url = media_node.image_versions2.candidates[0].url;
+            display_urls.push(media_url);
+            media_urls.push(media_url);
+        } else {
+            missing_media = new MissingMappedField("");
+        }
+        mediaTypesSet.add(typeMap[media_node.media_type] ?? "unknown");
+    }
+
+    const media_type = mediaTypesSet.size > 1 ? "mixed" : (mediaTypesSet.values().next().value);
+
+    let num_comments;
+    if ("comment_count" in node) {
+        num_comments = node.comment_count;
+    } else if (Array.isArray(node.comments)) {
+        num_comments = node.comments.length;
+    } else {
+        num_comments = -1;
+    }
+
+    const location = {name: "", latlong: "", city: "", location_id: ""};
+    if (node.location) {
+        location.name = node.location.name ?? "";
+        location.location_id = node.location.pk ?? "";
+        location.latlong = node.location.lat != null ? `${node.location.lat},${node.location.lng}` : "";
+        location.city = node.location.city ?? null;
+    }
+
+    const user = node.user;
+    const owner = node.owner;
+    if (user && owner) {
+        if (owner.id === user.id) {
+            // prefer user
+        } else if (user.username !== owner.username) {
+            throw new MapItemException(`Unable to parse item: different user and owner`);
+        }
+    }
+
+    const coauthorsArr = [];
+    const coauthorFullnamesArr = [];
+    const coauthorIdsArr = [];
+    if (Array.isArray(node.coauthor_producers)) {
+        for (const cp of node.coauthor_producers) {
+            coauthorsArr.push(cp.username ?? new MissingMappedField(""));
+            coauthorFullnamesArr.push(cp.full_name ?? new MissingMappedField(""));
+            coauthorIdsArr.push(cp.id);
+        }
+    }
+    const coauthors = coauthorsArr.map(v => String(v)).join(",");
+    const coauthor_fullnames = coauthorFullnamesArr.map(v => String(v)).join(",");
+    const coauthor_ids = coauthorIdsArr.join(",");
+
+    const no_likes = Boolean(node.like_and_view_counts_disabled);
+    let play_count;
+    if (node.view_count != null) {
+        play_count = node.view_count;
+    } else if (node.play_count != null) {
+        play_count = node.play_count;
+    } else {
+        play_count = new MissingMappedField(0);
+    }
+
+    let usertags = "";
+    if (node.usertags) {
+        usertags = node.usertags.in?.map(u => u.user.username).join(",") ?? "";
+    }
+
+    let collected_at;
+    let unix_at;
+    if (partial_item) {
+        collected_at = new MissingMappedField(0);
+        unix_at = new MissingMappedField(0);
+    } else {
+        collected_at = formatUtcTimestamp(node.taken_at);
+        unix_at = node.taken_at;
+    }
+
+    return {
+        "collected_from_url": normalize_url_encoding(node.__import_meta?.source_platform_url),
+        "collected_from_view": node._zs_instagram_view ?? "",
+        "partial_item": node._zs_partial ?? "",
+        "id": node.code,
+        "timestamp": collected_at,
+        "thread_id": node.code,
+        "parent_id": node.code,
+        "url": "https://www.instagram.com/p/" + node.code,
+        "body": caption,
+
+        "author_id": user.id ?? owner.id ?? new MissingMappedField(""),
+        "author": user.username ?? owner.username ?? new MissingMappedField(""),
+        "author_fullname": user.full_name ?? owner.full_name ?? new MissingMappedField(""),
+        "verified": Boolean(user.is_verified),
+        "author_avatar_url": user.profile_pic_url ?? owner.profile_pic_url ?? new MissingMappedField(""),
+        "coauthors": coauthors,
+        "coauthor_fullnames": coauthor_fullnames,
+        "coauthor_ids": coauthor_ids,
+
+        "media_type": media_type,
+        "num_media": num_media,
+        "image_urls": display_urls.join(","),
+        "media_urls": media_urls.join(","),
+
+        "hashtags": extractHashtags(caption),
+        "usertags": usertags,
+        "play_count": play_count,
+        "likes_hidden": no_likes ? "yes" : "no",
+        "num_likes": no_likes ? new MissingMappedField(0) : (node.like_count ?? new MissingMappedField(0)),
+        "num_comments": num_comments,
+
+        "location_name": location.name,
+        "location_id": location.location_id,
+        "location_latlong": location.latlong,
+        "location_city": location.city,
+
+        "unix_timestamp": unix_at,
+        "missing_media": missing_media
+    };
+}
+
+export function map_item(item) {
+    const link = item.link ?? "";
+    if ((item.product_type === "ad") || (link && link.startsWith("https://www.facebook.com/ads/ig_redirect"))) {
+        throw new MapItemException("appears to be Instagram ad, check raw data to confirm and ensure Zeeschuimer is up to date.");
+    }
+
+    const isPolaris = typeof item.__typename === "string" && item.__typename.toLowerCase().includes("polaris");
+    const isGraph = typeof item.__typename === "string" && item.__typename !== "XDTMediaDict";
+
+    if (isPolaris) {
+        return new MappedItem(parsePolarisItem(item));
+    } else if (isGraph) {
+        return new MappedItem(parseGraphItem(item));
+    } else {
+        return new MappedItem(parseItemlistItem(item));
+    }
+}
+// === end auto-generated ===
diff --git a/modules/linkedin.js b/modules/linkedin.js
index f9b3e7a..75ed5bc 100644
--- a/modules/linkedin.js
+++ b/modules/linkedin.js
@@ -167,4 +167,230 @@ function recursively_enrich(object, mapped_objects) {
     }
 
     return object;
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/linkedin/search_linkedin.py)
+function getAuthor(post) {
+    const author = {
+        username: post.actor.navigationContext.actionTarget.split("linkedin.com/").pop().split("?")[0],
+        name: post.actor.name.text,
+        description: post.actor.description?.text ?? "",
+        pronouns: "",
+        avatar_url: "",
+        is_company: "no",
+        url: post.actor.navigationContext.actionTarget.split("?")[0]
+    };
+
+    if (post.actor.name?.attributes && post.actor.name.attributes[0]) {
+        const attr0 = post.actor.name.attributes[0];
+        if (attr0["*miniProfile"]) {
+            const profile = attr0["*miniProfile"];
+            if (profile.picture) {
+                const artifacts = profile.picture.artifacts.slice().sort((a, b) => b.width - a.width);
+                author.avatar_url = profile.picture.rootUrl + artifacts[0].fileIdentifyingUrlPathSegment;
+            }
+            if (profile.customPronoun) {
+                author.pronouns = profile.customPronoun;
+            } else if (profile.standardizedPronoun) {
+                author.pronouns = profile.standardizedPronoun.toLowerCase();
+            }
+        } else if (attr0["*miniCompany"]) {
+            const comp = attr0["*miniCompany"];
+            const artifacts = comp.logo.artifacts.slice().sort((a, b) => b.width - a.width);
+            author.is_company = "yes";
+            author.avatar_url = comp.logo.rootUrl + artifacts[0].fileIdentifyingUrlPathSegment;
+        }
+    }
+
+    if (post.actor.name?.attributesV2 && post.actor.name.attributesV2[0]) {
+        const pron = post.actor.name.attributesV2[0].detailData?.["*profileFullName"]?.pronoun;
+        if (pron) {
+            if (pron.customPronoun) author.pronouns = pron.customPronoun;
+            else if (pron.standardizedPronoun) author.pronouns = pron.standardizedPronoun;
+        }
+    }
+
+    const avatar = post.actor.image?.attributes?.[0]?.detailData?.nonEntityProfilePicture;
+    if (avatar && avatar.vectorImage) {
+        author.avatar_url = avatar.vectorImage.rootUrl + avatar.vectorImage.artifacts[0].fileIdentifyingUrlPathSegment;
+    }
+
+    return author;
+}
+
+function parseTimeAgo(time_ago) {
+    const part = time_ago.split("•")[0];
+    const numbers = part.replace(/[^0-9]/g, "").trim();
+    const letters = part.replace(/[0-9]/g, "").trim();
+
+    const periodLengths = {
+        s: 1,
+        m: 60,
+        h: 3600,
+        d: 86400,
+        w: 7 * 86400,
+        mo: 30.4375 * 86400,
+        mnd: 30.4375 * 86400,
+        yr: 365.25 * 86400,
+        j: 365.25 * 86400
+    };
+
+    const num = numbers.length ? parseInt(numbers, 10) : 0;
+    const factor = periodLengths[letters] ?? 0;
+    return factor * num;
+}
+
+export function map_item(item) {
+    if (!item.actor) {
+        return {};
+    }
+    let time_collected;
+    if (item.__import_meta) {
+        time_collected = Math.floor(item.__import_meta.timestamp_collected / 1000);
+    } else {
+        time_collected = Math.floor(Date.now() / 1000);
+    }
+    const time_ago = item.actor.subDescription?.text ?? "";
+    const timestamp = Math.floor(time_collected - parseTimeAgo(time_ago));
+
+    // images
+    const images = [];
+    if (item.content && item.content.images) {
+        for (const image of item.content.images) {
+            const image_data = image.attributes[0].vectorImage;
+            const artifacts = image_data.artifacts.slice().sort((a, b) => b.width - a.width);
+            const url = image_data.rootUrl + artifacts[0].fileIdentifyingUrlPathSegment;
+            images.push(url);
+        }
+    }
+    if (images.length === 0 && item.content && item.content.articleComponent && item.content.articleComponent.largeImage) {
+        const largeImg = item.content.articleComponent.largeImage;
+        const attr0 = largeImg.attributes[0];
+        const image = attr0.detailData?.vectorImage;
+        if (!image && attr0.imageUrl) {
+            images.push(attr0.imageUrl.url);
+        } else if (image && image.artifacts) {
+            images.push(image.rootUrl + image.artifacts[0].fileIdentifyingUrlPathSegment);
+        }
+    }
+
+    // video thumbnail
+    let video_thumb_url = "";
+    let thumb_content = null;
+    if (item.content && "*videoPlayMetadata" in item.content) {
+        thumb_content = item.content["*videoPlayMetadata"].thumbnail;
+    } else if (item.content && item.content.linkedInVideoComponent && item.content.linkedInVideoComponent) {
+        thumb_content = item.content.linkedInVideoComponent["*videoPlayMetadata"].thumbnail;
+    } else if (item.content && item.content.externalVideoComponent && item.content.externalVideoComponent) {
+        thumb_content = item.content.externalVideoComponent["*videoPlayMetadata"].thumbnail;
+    }
+    if (thumb_content) {
+        video_thumb_url = thumb_content.rootUrl + thumb_content.artifacts[0].fileIdentifyingUrlPathSegment;
+    }
+
+    const author = getAuthor(item);
+
+    const meta_urn = (item.updateMetadata?.urn) ?? item.preDashEntityUrn;
+    const urn = "urn:li:activity:" + meta_urn.split("urn:li:activity:")[1].split(",")[0].split(")")[0];
+    const item_id = urn.split(":").pop();
+
+    // hashtags
+    let hashtags = [];
+    if (item.commentary && item.commentary.text && item.commentary.text.attributes) {
+        hashtags = item.commentary.text.attributes
+            .filter(tag => tag.type === "HASHTAG")
+            .map(tag => tag.trackingUrn.split(":").pop());
+    } else if (item.commentary && item.commentary.text && item.commentary.text.attributesV2) {
+        hashtags = item.commentary.text.attributesV2
+            .filter(tag => tag.detailData && tag.detailData["*hashtag"])
+            .map(tag => tag.detailData["*hashtag"].trackingUrn.split(":").pop());
+    }
+
+    // mentions
+    const author_mentions = [];
+    const author_name_mentions = [];
+    if (item.commentary && item.commentary.text && item.commentary.text.attributes) {
+        for (const mention of item.commentary.text.attributes) {
+            if (mention.type === "PROFILE_MENTION") {
+                const mini = mention["*miniProfile"];
+                author_mentions.push(mini.publicIdentifier);
+                author_name_mentions.push([mini.firstName ?? "", mini.lastName ?? ""].join(" ").trim());
+            } else if (mention.type === "COMPANY_NAME") {
+                const mini = mention["*miniCompany"];
+                author_mentions.push(mini.universalName);
+                author_name_mentions.push(mini.name ?? "");
+            }
+        }
+    }
+
+    // metrics
+    let metrics = {};
+    if (item["*socialDetail"] && "*totalSocialActivityCounts" in item["*socialDetail"]) {
+        const counts = item["*socialDetail"]["*totalSocialActivityCounts"];
+        metrics = {
+            comments: counts.numComments,
+            shares: counts.numShares,
+            reactions: counts.numLikes,
+            reaction_like: 0,
+            reaction_empathy: 0,
+            reaction_praise: 0,
+            reaction_entertainment: 0,
+            reaction_appreciation: 0,
+            reaction_interest: 0
+        };
+        if (Array.isArray(counts.reactionTypeCounts)) {
+            for (const rc of counts.reactionTypeCounts) {
+                const key = "reaction_" + rc.reactionType.toLowerCase();
+                metrics[key] = rc.count;
+            }
+        }
+    } else {
+        const sd = item["*socialDetail"];
+        metrics = {
+            comments: sd.comments?.paging?.total ?? 0,
+            shares: sd.totalShares ?? 0,
+            reactions: sd.likes?.paging?.total ?? 0
+        };
+    }
+
+    // link url
+    let link_url = "";
+    if (item.content && item.content.navigationContext) {
+        link_url = item.content.navigationContext.actionTarget ?? "";
+    } else if (item.content && item.content.articleComponent && item.content.articleComponent.navigationContext) {
+        link_url = item.content.articleComponent.navigationContext.actionTarget ?? "";
+    }
+
+    // build result object
+    const result = {
+        collected_from_url: normalize_url_encoding(item.__import_meta?.source_platform_url ?? ""),
+        id: item_id,
+        thread_id: item_id,
+        body: item.commentary?.text?.text ?? "",
+        timestamp: formatUtcTimestamp(timestamp),
+        timestamp_collected: formatUtcTimestamp(time_collected),
+        timestamp_ago: time_ago.split("•")[0].trim(),
+        is_promoted: /\d/.test(time_ago) ? "no" : "yes",
+        // author fields (author_ prefix, drop trailing _username)
+        ...Object.fromEntries(Object.entries(author).map(([k, v]) => {
+            let field = "author_" + k;
+            field = field.replace("_username", "");
+            return [field, v];
+        })),
+        author_mentions: author_mentions.join(","),
+        author_name_mentions: author_name_mentions.join(","),
+        hashtags: hashtags.join(","),
+        image_urls: images.join(","),
+        video_thumb_url: video_thumb_url,
+        post_url: "https://www.linkedin.com/feed/update/" + urn,
+        link_url: link_url,
+        ...metrics,
+        inclusion_context: item.header?.text?.text ?? "",
+        unix_timestamp: timestamp,
+        unix_timestamp_collected: time_collected
+    };
+
+    return new MappedItem(result);
+}
+// === end auto-generated ===
diff --git a/modules/package.json b/modules/package.json
new file mode 100644
index 0000000..3dbc1ca
--- /dev/null
+++ b/modules/package.json
@@ -0,0 +1,3 @@
+{
+  "type": "module"
+}
diff --git a/modules/pinterest.js b/modules/pinterest.js
index 5f9abcc..a67a0fe 100644
--- a/modules/pinterest.js
+++ b/modules/pinterest.js
@@ -91,4 +91,95 @@ export function capture(response, source_platform_url, source_url) {
     }
 
     return pins;
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/pinterest/search_pinterest.py)
+export function map_item(item) {
+    function map_item_from_json(post) {
+        // Parse timestamp, handling missing or malformed values
+        let timestampStr = post['created_at'] ?? post['createdAt'];
+        let unix_timestamp;
+        let str_timestamp;
+        if (timestampStr) {
+            let date = new Date(timestampStr);
+            if (!isNaN(date)) {
+                unix_timestamp = Math.floor(date.getTime() / 1000);
+                str_timestamp = formatUtcTimestamp(unix_timestamp);
+            } else {
+                unix_timestamp = new MissingMappedField("");
+                str_timestamp = new MissingMappedField("");
+            }
+        } else {
+            unix_timestamp = new MissingMappedField("");
+            str_timestamp = new MissingMappedField("");
+        }
+
+        let post_id = post['entityId'] ?? post['id'];
+
+        let image_url;
+        if (post['imageSpec_orig']) {
+            image_url = post['imageSpec_orig']['url'];
+        } else if (post['images']?.orig?.url) {
+            image_url = post['images']['orig']['url'];
+        } else {
+            image_url = post['images']?.url;
+        }
+
+        return new MappedItem({
+            collected_from_url: normalize_url_encoding(post['__import_meta']?.source_platform_url ?? ""),
+            id: post_id,
+            thread_id: post_id,
+            author: post['pinner']?.username,
+            author_fullname: post['pinner']?.fullName ?? post['pinner']?.full_name ?? "",
+            author_original: post['nativeCreator'] ? post['nativeCreator'].username : post['pinner']?.username,
+            body: (post['description'] ?? "").trim(),
+            subject: (post['title'] ?? "").trim(),
+            ai_description: post['auto_alt_text'] ?? "",
+            pinner_original: post['originPinner'] ? post['originPinner'].fullName : "",
+            pinner_via: post['viaPinner'] ? post['viaPinner'].fullName : "",
+            board: post['board']?.name,
+            board_pins: post['board']?.pinCount ?? post['board']?.pin_count ?? null,
+            board_url: post['board']?.url ? `https://www.pinterest.com${post['board'].url}` : null,
+            timestamp: str_timestamp,
+            idea_tags: post['pinJoin'] ? (post['pinJoin']['visualAnnotation'] ?? []).join(",") : "",
+            url: `https://www.pinterest.com/pin/${post_id}`,
+            is_video: (post['isVideo'] ?? post['videos']) ? "yes" : "no",
+            image_url: image_url,
+            dominant_colour: post['dominantColor'] ?? post['dominant_color'] ?? null,
+            unix_timestamp: unix_timestamp
+        });
+    }
+
+    function map_item_from_html(post) {
+        return new MappedItem({
+            collected_from_url: normalize_url_encoding(post['__import_meta']?.source_platform_url ?? ""),
+            id: parseInt(post['id'], 10),
+            thread_id: parseInt(post['id'], 10),
+            author: new MissingMappedField(""),
+            author_fullname: new MissingMappedField(""),
+            author_original: new MissingMappedField(""),
+            body: (post['body'] ?? "").trim(),
+            subject: (post['title'] ?? "").trim(),
+            ai_description: new MissingMappedField(""),
+            pinner_original: new MissingMappedField(""),
+            pinner_via: new MissingMappedField(""),
+            board: new MissingMappedField(""),
+            board_pins: new MissingMappedField(""),
+            board_url: new MissingMappedField(""),
+            timestamp: new MissingMappedField(""),
+            idea_tags: (post['tags'] ?? []).join(","),
+            url: `https://www.pinterest.com/pin/${post['id']}`,
+            is_video: new MissingMappedField(""),
+            image_url: post['image'],
+            dominant_colour: new MissingMappedField(""),
+            unix_timestamp: new MissingMappedField("")
+        });
+    }
+
+    if (item['_zs-origin'] === 'html') {
+        return map_item_from_html(item);
+    }
+    return map_item_from_json(item);
+}
+// === end auto-generated ===
diff --git a/modules/rednote-comments.js b/modules/rednote-comments.js
index 47f9d79..46911a3 100644
--- a/modules/rednote-comments.js
+++ b/modules/rednote-comments.js
@@ -52,4 +52,36 @@ export function capture(response, source_platform_url, source_url) {
 
     // no posts, no data
     return [];
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/xiaohongshu_comments/search_rednote_comments.py)
+export function map_item(item) {
+    // Convert create_time (milliseconds) to Unix timestamp (seconds)
+    const createTimeMs = Number(item["create_time"]);
+    const unix_timestamp = Math.floor(createTimeMs / 1000);
+    // Format as "YYYY-MM-DD HH:MM:SS" using the global helper
+    const timestamp = formatUtcTimestamp(unix_timestamp);
+
+    // Resolve optional import metadata URL
+    const collected_from_url = normalize_url_encoding(item["__import_meta"]?.["source_platform_url"] ?? "");
+
+    // ip_location may be missing or empty – use MissingMappedField in that case
+    const ip_location = item["ip_location"] ? item["ip_location"] : new MissingMappedField("");
+
+    return new MappedItem({
+        collected_from_url: collected_from_url,
+        id: item["id"],
+        thread_id: item["note_id"],
+        url: `https://www.xiaohongshu.com/explore/${item["note_id"]}`,
+        body: item["content"] ?? "",
+        timestamp: timestamp,
+        author: item["user_info"]?.["nickname"] ?? "",
+        author_avatar_url: item["user_info"]?.["image"] ?? "",
+        ip_location: ip_location,
+        likes: item["like_count"],
+        replies: item["sub_comment_count"],
+        unix_timestamp: unix_timestamp
+    });
+}
+// === end auto-generated ===
diff --git a/modules/rednote.js b/modules/rednote.js
index 7471c92..e42d04f 100644
--- a/modules/rednote.js
+++ b/modules/rednote.js
@@ -103,4 +103,136 @@ export function capture(response, source_platform_url, source_url) {
 
     // no posts, no data
     return [];
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/xiaohongshu/search_rednote.py)
+function map_item_from_json_api_explore(post) {
+    const item = post.type !== 'video' ? post.note_card : post;
+    const item_id = post.id ?? post.note_id;
+
+    // Images handling
+    let images;
+    if (item.image_list) {
+        images = [];
+        for (const image of item.image_list) {
+            if (image.url_default) {
+                images.push(image.url_default);
+            } else if (image.info_list && image.info_list.length) {
+                let found = false;
+                for (const imgInfo of image.info_list) {
+                    if (imgInfo.image_scene === 'WB_DFT') {
+                        images.push(imgInfo.url);
+                        found = true;
+                        break;
+                    }
+                }
+                if (!found) {
+                    images.push(image.info_list[0].url);
+                }
+            }
+        }
+    } else if (item.cover) {
+        images = [item.cover.url_default];
+    } else {
+        images = new MissingMappedField("");
+    }
+
+    const xsec_bit = post.xsec_token ? `?xsec_token=${post.xsec_token}` : "";
+    const video_url = item.video?.media ? item.video.media.stream.h264[0].master_url : new MissingMappedField("");
+    const author = item.user.nickname ?? item.user.nick_name;
+    const timestamp = item.time ?? null;
+    const timestampStr = timestamp ? formatUtcTimestamp(timestamp / 1000) : new MissingMappedField("");
+    const hashtags = item.desc ? [...item.desc.matchAll(/#([^\s!@#$%^&*()_+{}:"|<>?\[\];',.\/`~]+)/g)].map(m => m[1]).join(",") : new MissingMappedField("");
+    const body = item.desc ?? new MissingMappedField("");
+    const image_urls = Array.isArray(images) ? images.join(",") : images;
+    const likes = item.interact_info?.liked_count ?? null;
+    const unix_ts = timestamp ? Math.floor(timestamp / 1000) : new MissingMappedField("");
+
+    return new MappedItem({
+        collected_from_url: normalize_url_encoding(post.__import_meta?.source_platform_url ?? ""),
+        id: item_id,
+        thread_id: item_id,
+        url: `https://www.xiaohongshu.com/explore/${post.id}${xsec_bit}`,
+        title: item.display_title ?? "",
+        body: body,
+        hashtags: hashtags,
+        timestamp: timestampStr,
+        author: author,
+        author_avatar_url: item.user.avatar,
+        image_urls: image_urls,
+        video_url: video_url,
+        likes: likes,
+        unix_timestamp: unix_ts,
+    });
+}
+
+function map_item_from_json_embedded(item) {
+    const note = item.note;
+    const image = note.imageList?.[0]?.urlDefault ?? new MissingMappedField("");
+    const xsec_bit = `?xsec_token=${note.xsecToken}`;
+    const timestamp = note.time ?? null;
+    const timestampStr = timestamp ? formatUtcTimestamp(timestamp / 1000) : new MissingMappedField("");
+    const hashtags = note.desc ? [...note.desc.matchAll(/#([^\s!@#$%^&*()_+{}:"|<>?\[\];',.\/`~]+)/g)].map(m => m[1]).join(",") : new MissingMappedField("");
+    const body = note.desc ?? new MissingMappedField("");
+    const author = note.user.nickname ?? note.user.nick_name;
+    const likes = note.interactInfo?.likedCount ??
+                  note.interact_info?.liked_count ??
+                  note.likes ??
+                  new MissingMappedField("");
+    const unix_ts = timestamp ? Math.floor(timestamp / 1000) : new MissingMappedField("");
+
+    return new MappedItem({
+        collected_from_url: normalize_url_encoding(item.__import_meta?.source_platform_url ?? ""),
+        id: item.id,
+        thread_id: item.id,
+        url: `https://www.xiaohongshu.com/explore/${item.id}${xsec_bit}`,
+        title: note.title ?? "",
+        body: body,
+        hashtags: hashtags,
+        timestamp: timestampStr,
+        author: author,
+        author_avatar_url: note.user.avatar,
+        image_url: image,
+        video_url: new MissingMappedField(""),
+        likes: likes,
+        unix_timestamp: unix_ts,
+    });
+}
+
+function map_item_from_html(item) {
+    return new MappedItem({
+        collected_from_url: normalize_url_encoding(item.__import_meta?.source_platform_url ?? ""),
+        id: item.id,
+        thread_id: item.id,
+        url: `https://www.xiaohongshu.com${item.url}`,
+        title: item.title,
+        body: new MissingMappedField(""),
+        hashtags: new MissingMappedField(""),
+        timestamp: new MissingMappedField(""),
+        author: item.author_name,
+        author_avatar_url: item.author_avatar_url,
+        image_url: item.thumbnail_url,
+        video_url: new MissingMappedField(""),
+        likes: item.likes,
+        unix_timestamp: new MissingMappedField(""),
+    });
+}
+
+export function map_item(post) {
+    // Reject tile stub items – minimal thumbnail entries with no content
+    if (!post.note_card && !post.user && post['_zs-origin'] !== 'html' && !post.note) {
+        const source = post.__import_meta?.source_url ?? "";
+        throw new MapItemException(`Xiaohongshu tile stub without post content (source: ${source || 'unknown'})`);
+    }
+    if (post['_zs-origin'] === 'html') {
+        return map_item_from_html(post);
+    } else {
+        if (post.note) {
+            return map_item_from_json_embedded(post);
+        } else {
+            return map_item_from_json_api_explore(post);
+        }
+    }
+}
+// === end auto-generated ===
diff --git a/modules/threads.js b/modules/threads.js
index 98ebfa5..e906fa5 100644
--- a/modules/threads.js
+++ b/modules/threads.js
@@ -69,4 +69,85 @@ export function capture(response, source_platform_url, source_url) {
             return item;
         }
     })]
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/threads/search_threads.py)
+export function map_item(item) {
+    const post = item;
+    const timestampStr = post.taken_at != null ? formatUtcTimestamp(post.taken_at) : "";
+    let imageUrls = [];
+    let videoUrls = [];
+
+    if (post.carousel_media && post.carousel_media.length) {
+        for (const c of post.carousel_media) {
+            if (c.image_versions2 && c.image_versions2.candidates && c.image_versions2.candidates.length) {
+                const url = c.image_versions2.candidates[0].url;
+                if (url) imageUrls.push(url);
+            }
+            if (c.video_versions && c.video_versions.length) {
+                const vurl = c.video_versions[0].url;
+                if (vurl) videoUrls.push(vurl);
+            }
+        }
+    } else {
+        if (post.image_versions2 && post.image_versions2.candidates && post.image_versions2.candidates.length) {
+            const url = post.image_versions2.candidates[0].url;
+            if (url) imageUrls.push(url);
+        }
+        if (post.video_versions && post.video_versions.length) {
+            const vurl = post.video_versions[0].url;
+            if (vurl) videoUrls.push(vurl);
+        }
+    }
+
+    const audioUrl = post.audio && post.audio.audio_src ? post.audio.audio_src : "";
+
+    let linkedUrl = "";
+    let linkThumbnail = "";
+    const linkPreview = post.text_post_app_info && post.text_post_app_info.link_preview_attachment;
+    if (linkPreview) {
+        linkedUrl = linkPreview.url || "";
+        try {
+            const parsed = new URL(linkedUrl);
+            const uParam = parsed.searchParams.getAll('u');
+            if (uParam.length) {
+                linkedUrl = uParam[0];
+                linkThumbnail = linkPreview.image_url ?? "";
+            } else {
+                linkThumbnail = linkedUrl;
+            }
+        } catch (e) {
+            linkThumbnail = linkedUrl;
+        }
+    }
+
+    const hashtags = post.caption && post.caption.text
+        ? [...post.caption.text.matchAll(/#([^\s!@#$%ˆ&*()_+{}:"|<>?\[\];',./`~']+)/g)].map(m => m[1]).join(',')
+        : "";
+
+    return new MappedItem({
+        collected_from_url: normalize_url_encoding(post.__import_meta?.source_platform_url ?? ""),
+        id: post.code,
+        thread_id: post.code,
+        url: `https://www.threads.com/@${post.user?.username ?? ""}/post/${post.code}`,
+        body: post.caption?.text ?? "",
+        timestamp: timestampStr,
+        author: post.user?.username ?? "",
+        author_is_verified: post.user?.is_verified ? "yes" : "no",
+        author_avatar: post.user?.profile_pic_url ?? null,
+        image_url: imageUrls.join(","),
+        video_url: videoUrls.join(","),
+        audio_url: audioUrl,
+        link_url: linkedUrl,
+        link_thumbnail_url: linkThumbnail ?? "",
+        is_paid_partnership: post.is_paid_partnership ? "yes" : "no",
+        likes: post.like_count,
+        reposts: post.text_post_app_info?.repost_count ?? 0,
+        replies: post.text_post_app_info?.direct_reply_count ?? 0,
+        quotes: post.text_post_app_info?.quote_count ?? 0,
+        hashtags: hashtags,
+        unix_timestamp: post.taken_at != null ? Math.floor(post.taken_at) : null
+    });
+}
+// === end auto-generated ===
diff --git a/modules/tiktok-comments.js b/modules/tiktok-comments.js
index 97b68b8..e41446a 100644
--- a/modules/tiktok-comments.js
+++ b/modules/tiktok-comments.js
@@ -29,4 +29,36 @@ export function capture(response, source_platform_url, source_url) {
     }
 
     return [];
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/tiktok_comments/search_tiktok_comments.py)
+export function map_item(item) {
+    const timestamp = formatUtcTimestamp(item.create_time);
+    const thread_id = item.reply_id === "0" ? item.aweme_id : item.reply_id;
+    const avatar_url = item.user?.avatar_thumb?.url_list?.[0] ?? null;
+    const collected_from_url = normalize_url_encoding(item.__import_meta?.source_platform_url ?? "");
+    const post_url = item.share_info?.url?.split(".html")[0] ?? null;
+    return new MappedItem({
+        collected_from_url: collected_from_url,
+        id: item.cid,
+        thread_id: thread_id,
+        author: item.user?.unique_id ?? null,
+        author_full: item.user?.nickname ?? null,
+        author_avatar_url: avatar_url,
+        body: item.text ?? null,
+        timestamp: timestamp,
+        unix_timestamp: item.create_time,
+        likes: item.digg_count,
+        replies: item.reply_comment_total ?? 0,
+        post_id: item.aweme_id,
+        post_url: post_url,
+        post_body: item.share_info?.title ?? null,
+        comment_url: item.share_info?.url ?? null,
+        is_liked_by_post_author: !!item.author_pin ? "yes" : "no",
+        is_sticky: !!item.stick_position ? "yes" : "no",
+        is_comment_on_comment: item.reply_id === "0" ? "no" : "yes",
+        language_guess: item.comment_language ?? null
+    });
+}
+// === end auto-generated ===
diff --git a/modules/tiktok.js b/modules/tiktok.js
index 55e6fbf..586c9e1 100644
--- a/modules/tiktok.js
+++ b/modules/tiktok.js
@@ -103,4 +103,98 @@ export function capture(response, source_platform_url, source_url) {
     } else {
         return [];
     }
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/tiktok/search_tiktok.py)
+export function map_item(post) {
+    // Zeeschuimer metadata
+    const metadata = post["__import_meta"] ?? {};
+
+    const challenges = (post["challenges"] ?? []).map(ch => ch.title);
+
+    const hashtags = (post["textExtra"] ?? []).filter(extra => "hashtagName" in extra && extra.hashtagName).map(extra => extra.hashtagName);
+
+    const labels = Array.isArray(post["diversificationLabels"]) ? post["diversificationLabels"].join(",") : "";
+
+    let user_nickname = "";
+    let user_fullname = "";
+    let user_thumbnail = "";
+    if (post["author"] && typeof post["author"] === "object") {
+        // from intercepted API response
+        user_nickname = post["author"]["uniqueId"] ?? "";
+        user_fullname = post["author"]["nickname"] ?? "";
+        user_thumbnail = post["author"]["avatarThumb"] ?? "";
+    } else if (post["author"]) {
+        // from embedded JSON object
+        user_nickname = post["author"] ?? "";
+        user_fullname = post["nickname"] ?? "";
+        user_thumbnail = "";
+    }
+
+    // Determine the best thumbnail URL that hasn't expired yet
+    const thumbnail_options = [];
+    if (post["video"]?.shareCover) {
+        const shareCover = post["video"]["shareCover"];
+        if (Array.isArray(shareCover) && shareCover.length) {
+            thumbnail_options.push(shareCover[shareCover.length - 1]);
+        } else if (typeof shareCover === "string") {
+            thumbnail_options.push(shareCover);
+        }
+    }
+    if (post["video"]?.cover) {
+        thumbnail_options.push(post["video"]["cover"]);
+    }
+    const now = Math.floor(Date.now() / 1000);
+    const validThumbnails = thumbnail_options.filter(url => {
+        if (!url) return false;
+        try {
+            const expiresStr = new URL(url).searchParams.get("x-expires");
+            const expires = expiresStr ? parseInt(expiresStr, 10) : now;
+            return expires >= now;
+        } catch (e) {
+            return false;
+        }
+    });
+    const thumbnail_url = validThumbnails.length ? validThumbnails[validThumbnails.length - 1] : "";
+
+    return new MappedItem({
+        "collected_from_url": metadata["source_platform_url"] ? normalize_url_encoding(metadata["source_platform_url"]) : "",
+        "id": post["id"],
+        "thread_id": post["id"],
+        "author": user_nickname,
+        "author_full": user_fullname,
+        "author_followers": post["authorStats"]?.followerCount ?? "",
+        "author_likes": post["authorStats"]?.diggCount ?? "",
+        "author_videos": post["authorStats"]?.videoCount ?? "",
+        "author_avatar": user_thumbnail,
+        "body": post["desc"],
+        "stickers": (post["stickersOnItem"] ?? []).map(s => s.stickerText.join(" ")).join("\n"),
+        "timestamp": formatUtcTimestamp(parseInt(post["createTime"], 10)),
+        "unix_timestamp": parseInt(post["createTime"], 10),
+        "is_duet": (post["duetInfo"]?.duetFromId && post["duetInfo"]["duetFromId"] !== "0") ? "yes" : "no",
+        "is_ad": post["isAd"] ? "yes" : "no",
+        "is_paid_partnership": post["adAuthorization"] ? "yes" : "no",
+        "is_sensitive": post["maskType"] === 3 ? "yes" : "no",
+        "is_photosensitive": post["maskType"] === 4 ? "yes" : "no",
+        "music_name": post["music"]?.title ?? "",
+        "music_id": post["music"]?.id ?? "",
+        "music_url": post["music"]?.playUrl ?? "",
+        "music_thumbnail": post["music"]?.coverLarge ?? "",
+        "music_author": post["music"]?.authorName ?? "",
+        "video_url": post["video"]?.downloadAddr ?? "",
+        "tiktok_url": `https://www.tiktok.com/@${user_nickname}/video/${post["id"]}`,
+        "thumbnail_url": thumbnail_url,
+        "likes": post["stats"]?.diggCount,
+        "comments": post["stats"]?.commentCount,
+        "shares": post["stats"]?.shareCount,
+        "plays": post["stats"]?.playCount,
+        "hashtags": hashtags.join(","),
+        "challenges": challenges.join(","),
+        "diversification_labels": labels,
+        "location_created": post["locationCreated"] ?? "",
+        "effects": (post["effectStickers"] ?? []).map(e => e.name).join(","),
+        "warning": (post["warnInfo"] ?? []).map(w => w.text).join(",")
+    });
+}
+// === end auto-generated ===
diff --git a/modules/truth.js b/modules/truth.js
index fe626cf..ec6bdb1 100644
--- a/modules/truth.js
+++ b/modules/truth.js
@@ -35,4 +35,95 @@ export function capture(response, source_platform_url, source_url) {
     }
 
     return items;
-}
\ No newline at end of file
+}
+
+// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
+// (regenerated from datasources/truth/search_truth.py)
+export function map_item(item) {
+    const errors = [];
+    const postTime = new Date(item["created_at"]);
+    const images = [];
+    const videos = [];
+    const videoThumbs = [];
+
+    if (item.media_attachments) {
+        for (const media of item.media_attachments) {
+            const mtype = media.type;
+            if (mtype === "image") {
+                images.push(media.url);
+            } else if (mtype === "video") {
+                videos.push(media.url);
+                videoThumbs.push(media.preview_url);
+            } else if (mtype === "tv") {
+                // Truth social TV channels – only a thumbnail is provided
+                videoThumbs.push(media.url);
+                // preview_url is a smaller thumb (ignored)
+            } else {
+                errors.push(`New media type: ${mtype}`);
+            }
+        }
+    }
+
+    const group = item.group ? item.group : {};
+
+    let thread_id;
+    if (item.quote_id != null) {
+        thread_id = item.quote_id;
+    } else if (item.in_reply_to != null) {
+        let reply_to = item.in_reply_to;
+        while (reply_to) {
+            if (reply_to.in_reply_to != null) {
+                reply_to = reply_to.in_reply_to;
+            } else {
+                thread_id = reply_to.id;
+                break;
+            }
+        }
+    } else {
+        thread_id = item.id;
+    }
+
+    const mentions = (item.mentions ?? []).map(m => m.username);
+    const hashtags = (item.tags ?? []).map(t => t.name);
+
+    // Format timestamp as "YYYY-MM-DD HH:MM:SS" in UTC
+    const pad = n => String(n).padStart(2, "0");
+    const timestamp = `${postTime.getUTCFullYear()}-${pad(postTime.getUTCMonth() + 1)}-${pad(postTime.getUTCDate())} ${pad(postTime.getUTCHours())}:${pad(postTime.getUTCMinutes())}:${pad(postTime.getUTCSeconds())}`;
+
+    const mapped_item = {
+        collected_from_url: normalize_url_encoding(item.__import_meta?.source_platform_url ?? ""),
+        id: item.id,
+        created_at: item.created_at,
+        body: item.content,
+        url: item.url ?? null,
+        reblogs_count: item.reblogs_count ?? 0,
+        replies_count: item.replies_count ?? 0,
+
+        account_id: item.account.id,
+        account_username: item.account.username,
+        account_display_name: item.account.display_name,
+        account_avatar: item.account.avatar,
+        account_verified: item.account.verified,
+        account_followers: item.account.followers_count,
+        account_following: item.account.following_count,
+
+        mentions: mentions.join(","),
+        hashtags: hashtags.join(","),
+
+        images: images.join(","),
+        video_thumbs: videoThumbs.join(","),
+        video_urls: videos.join(","),
+
+        group_id: group.id ?? null,
+        group_display_name: group.display_name ?? null,
+        group_avatar: group.avatar ?? null,
+        group_note: group.note ?? null,
+        group_members_count: group.members_count ?? 0,
+
+        thread_id: thread_id,
+        timestamp: timestamp
+    };
+
+    return new MappedItem(mapped_item, errors.join("; "));
+}
+// === end auto-generated ===
diff --git a/tests/.env.example b/tests/.env.example
new file mode 100644
index 0000000..137a52b
--- /dev/null
+++ b/tests/.env.example
@@ -0,0 +1,23 @@
+# 4CAT API config for the map_item comparator (`npm run test:compare`).
+# Copy this file to .env in this directory and fill in real values.
+# .env is gitignored; .env.example is the committed template.
+
+# Base URL of the 4CAT instance to hit. No trailing slash. Default ports:
+#   :80   for nginx (production)
+#   :4000 for the Flask dev server
+FOURCAT_URL=http://localhost
+
+# API key for that 4CAT instance. Get one from the 4CAT UI; tied to your
+# user. 4CAT accepts the raw key as the Authorization header value (no
+# `Bearer ` prefix).
+FOURCAT_API_KEY=your-api-key-here
+
+# Comma-separated list of dataset keys (the 32-char ids from 4CAT dataset
+# URLs) to compare. The comparator pulls inputs from /download/<key> and
+# expected outputs from
+# /api/dataset/<key>/items/?annotations=no&missing_fields=keep&stream=true
+# for each. Datasource is read from each dataset's metadata.
+#
+# `npm run test:compare -- <key>` narrows a single run to one key; the key
+# must still be listed here.
+FOURCAT_DATASETS=key1,key2,key3
diff --git a/tests/README.md b/tests/README.md
index f1188e2..f203b60 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,31 +1,42 @@
 ## Tests for Zeeschuimer
 
-This folder contains **testing** code for Zeeschuimer.
+This folder contains testing code for Zeeschuimer. There are three suites,
+each with a different purpose and a different runtime environment:
 
-### Integration Tests (Selenium)
+| Suite                            | Tests                                                     | Environment        | When it runs                    | Needs                                  |
+|----------------------------------|-----------------------------------------------------------|--------------------|---------------------------------|----------------------------------------|
+| Selenium integration             | Page captures real items from each supported platform     | Real Firefox       | Reviewer-supervised, manual     | Firefox profile, sometimes a human     |
+| Duplicate-behavior unit (Jest)   | DB merge / keep / update semantics in isolation           | jsdom + fake-IDB   | `npm test` (every push)         | None                                   |
+| Module load smoke (Jest, Tier 1) | Each `modules/*.js` parses and imports cleanly            | jsdom              | `npm test` (every push)         | None                                   |
+| `map_item` comparator (Jest, Tier 2) | JS `map_item` output matches 4CAT's Python mapping per item | jsdom + cross-fetch | `npm run test:compare` (on demand) | Live 4CAT, API key, dataset key(s) |
 
-The Python + Selenium tests visit pages on supported platforms
-and see how many items are captured. If the amount of items captured is 
-unexpectedly low or high, this is flagged and may indicate that Zeeschuimer no
-longer properly captures data from the platform.
+Hermetic suites (no external dependencies) live in `npm test`. Anything that
+requires a real browser, a 4CAT server, or a human in the loop is opt-in.
 
-These tests are **supervised** i.e. they require monitoring by a human and 
+### Integration tests (Selenium)
+
+The Python + Selenium tests visit pages on supported platforms and see how
+many items are captured. If the amount of items captured is unexpectedly
+low or high, this is flagged and may indicate that Zeeschuimer no longer
+properly captures data from the platform.
+
+These tests are **supervised** — they require monitoring by a human and
 cannot run fully autonomously, since some platforms (TikTok in particular)
 occasionally show CAPTCHAs that need to be completed for a test to run
 successfully. This is also why Selenium does not run a headless Firefox.
 
-The amount of items returned per page is somewhat variable for most platforms,
-so if the number is slightly lower or higher than expected this is not 
-necessarily a problem (but worth checking).
+The amount of items returned per page is somewhat variable for most
+platforms, so if the number is slightly lower or higher than expected this
+is not necessarily a problem (but worth checking).
 
-Additionally, most platforms require logging in before (full) access to the UI
-is available. The testing script borrows a Firefox profile directory from 
-elsewhere on the system to do this. It will try to find one automatically but
-you can also pass one with the `--profiledir` argument. The idea is that you
-log in to the various sites (Instagram, etc) in your 'normal' Firefox, and the
-tests then borrow that login to interface with the website.
+Most platforms require logging in before (full) access to the UI is
+available. The testing script borrows a Firefox profile directory from
+elsewhere on the system to do this. It will try to find one automatically
+but you can also pass one with the `--profiledir` argument. Log in to the
+various sites (Instagram, etc) in your 'normal' Firefox, and the tests then
+borrow that login.
 
-Run `test.py` to run tests. Required non-standard libraries are in 
+Run `test.py` to run tests. Required non-standard libraries are in
 `requirements.txt`.
 
 Tests are defined in `tests.json` with the following structure:
@@ -35,49 +46,152 @@ Tests are defined in `tests.json` with the following structure:
   "platform id as in zeeschuimer (e.g. 'tiktok.com')": {
     "test case (e.g. 'Home feed')": {
       "url": {
-        "expected": 0,  # amount of items expected to be captured on this page
-        "more-after-scroll": false,  # whether scrolling is supposed to load more items (currently unsupported)
-        "wait": 10  # wait time before checking number of items (optional, default 5)
-      } # more URLS can be added per test case
+        "expected": 0,
+        "more-after-scroll": false,
+        "wait": 10
+      }
     }
   }
 }
 ```
 
-### Unit Tests (Jest)
-
-The JavaScript unit tests verify duplicate-handling logic in isolation using 
-a mocked Dexie database. These tests ensure that when the duplicate behavior 
-setting is changed, the correct existing record is selected for updates.
+### Jest suites
 
 **Prerequisites**
-- Node.js (v18 or later) and npm must be installed
+- Node.js (v18 or later) and npm
+- `cd tests && npm install`
+
+**Recommended: develop the tests inside Docker.** On Windows the global
+permission model can make `npm install` / `npm test` awkward to run from
+an arbitrary shell, and an agentic assistant working in auto-mode will
+hit deny-rules before it can do a `cross-fetch`-style dependency spike.
+Any minimal `node:20`-or-newer image with this repo mounted in is
+enough — install what you need, run `npm install`, run `npm test` and
+`npm run test:compare`. The host's `tests/.env` is picked up via the
+mount, and `FOURCAT_URL` can point at a 4CAT reachable from the
+container (`host.docker.internal` on Windows/Mac, the host IP on
+Linux).
+
+#### Duplicate-behavior unit tests
+
+Verify duplicate-handling logic in isolation using a mocked Dexie database.
+Ensures that when the duplicate behavior setting is changed, the correct
+existing record is selected for updates.
+
+Coverage:
+- Schema upgrade backfills `last_updated` from `timestamp_collected`
+- Compound index correctly selects most recent item by `last_updated`
+- Forward-looking behavior: "keep" → "update" targets newest record
+- Forward-looking behavior: "update" → "keep" creates new records
+- Merge: shallow merge preserves fields from both records
+- Skip: no modifications occur when duplicate found
+- Platform isolation: same `item_id` on different platforms are independent
+- Tie-breaker: when `last_updated` is equal, prefer higher `id`
+
+#### Module load smoke (Tier 1)
+
+For every file under `modules/*.js`, `tests/map_item.test.js` asserts the
+module parses and imports without throwing. Modules with a `map_item`
+export and modules without one both pass this tier — the goal is purely to
+catch a generator that emits a syntax error or an import-time throw.
+
+No data is run through `map_item` here; that work belongs in the
+comparator.
+
+#### `map_item` comparator (Tier 2)
+
+For every 4CAT dataset key listed in `FOURCAT_DATASETS`,
+`tests/map_item_compare.test.js`:
+
+1. sends a HEAD to the items endpoint and reads the datasource id from its
+   `X-4CAT-Dataset-Datasource` response header (no metadata-endpoint call)
+2. translates that id to a Zeeschuimer module name via
+   `zeeschuimer-to-4cat.json` (used in reverse)
+3. fetches `/download/<key>` (NDJSON inputs, already wrapped via
+   `wrap_for_map_item` by Zeeschuimer pre-upload) and
+   `/api/dataset/<key>/items/?annotations=no&missing_fields=keep&stream=true`
+   (expected outputs from 4CAT's Python `map_item`, as NDJSON — `stream=true`
+   avoids the JSON form's `limit=100` pagination)
+4. pairs items by `id` (or by index with a warning if `id` is missing on
+   either side), runs each input through the local `map_item`, and
+   field-by-field diffs against the expected output (4CAT's API-only
+   aggregate `missing_fields` key is excluded; per-field `{__missing:true}`
+   markers are still compared)
 
-**Setup**
+The comparator does **not** exercise `wrap_for_map_item` itself — Zeeschuimer
+applies it pre-storage and `/download/<key>` returns post-wrap items. This
+is an accepted gap; see `docs/map-item-test-plan.md`.
 
-1. Install Node.js dependencies:
-   ```bash
-   cd tests
-   npm install
-   ```
+**Configuration:** copy `tests/.env.example` to `tests/.env` and set:
+- `FOURCAT_URL` — base URL of the 4CAT instance (no trailing slash)
+- `FOURCAT_API_KEY` — raw API key (no `Bearer ` prefix)
+- `FOURCAT_DATASETS` — comma-separated list of dataset keys
 
-**Running tests**
+The comparator hard-errors at startup if any of these are missing.
+
+**Optional knob:** by default the comparator halts a dataset at its first
+failing item (reporting the rest as one skipped "halted" placeholder). To
+compare *every* item, pass `--all`:
 
 ```bash
-npm test
+npm run test:compare -- <dataset_key> --all
 ```
 
-For watch mode during development:
+`FAIL_FAST=0` (or `FAIL_FAST=false`) does the same, but prefer `--all`: an
+inline `FAIL_FAST=0 npm run …` does not reliably reach node when npm/node is
+the Windows binary run through WSL interop, and isn't env syntax in cmd.exe.
+A CLI flag crosses every shell.
+
+### Running
+
 ```bash
+# everything that's hermetic — duplicate-behavior unit + module load smoke
+npm test
+
+# watch mode for the same
 npm run test:watch
+
+# the comparator — every dataset key in FOURCAT_DATASETS
+npm run test:compare
+
+# the comparator narrowed to one dataset key (must still appear in
+# FOURCAT_DATASETS — protects against typos)
+npm run test:compare -- <dataset_key>
+
+# compare every item instead of halting at the first failure
+npm run test:compare -- <dataset_key> --all
 ```
 
-**Test coverage**
-- Schema upgrade backfills `last_updated` from `timestamp_collected`
-- Compound index correctly selects most recent item by `last_updated`
-- Forward-looking behavior: switching from "keep" to "update" targets newest record
-- Forward-looking behavior: switching from "update" to "keep" creates new records
-- Merge behavior: shallow merge preserves fields from both records
-- Skip behavior: no modifications occur when duplicate found
-- Platform isolation: same `item_id` on different platforms are independent
-- Tie-breaker: when `last_updated` is equal, prefer higher `id`
+### Where does a new test go?
+
+- **Pure data transformation, no live external state, runs anywhere.**
+  Duplicate-behavior unit suite (DB logic) or the Tier 1 smoke
+  (`map_item` static checks).
+- **Field-by-field correctness against 4CAT's Python `map_item`.** Tier 2
+  comparator. Add a dataset to `FOURCAT_DATASETS` that covers the case;
+  the comparator will pick it up.
+- **End-to-end user flow in the extension.** Selenium.
+
+### Why the environments differ
+
+The two Jest tiers run in **jsdom** rather than node env. The reasoning:
+
+- `map_item` bodies are pure data transformation, but four of them
+  (`gab`, `pinterest`, `rednote`, `truth`) call `strip_tags`, which
+  invokes `new DOMParser()`. jsdom provides a spec-compliant native
+  `DOMParser`; node env doesn't.
+- jsdom doesn't ship `fetch`. The standard workaround
+  (`undici`) crashes inside jsdom because it pokes at
+  `clearImmediate` / `markResourceTiming` / fast-now timers that jsdom
+  shadows. `cross-fetch` wraps `node-fetch` v2 internally and doesn't
+  hit those Node internals, so it works in jsdom — the comparator
+  imports `cross-fetch/polyfill` to assign `globalThis.fetch`.
+
+The tradeoff is parser parity. `cross-fetch`-via-`node-fetch` and
+jsdom's `DOMParser` are not byte-equal to Firefox's Gecko `DOMParser`,
+which is what runs in production. Whitespace handling around `<br>` and
+block elements is the usual suspect. If the comparator emits false-
+positive diffs on text fields for the four `strip_tags` modules, the
+right fix is to normalise whitespace in the comparator's `deep_equal`
+rather than chase parser parity. The Selenium tier sits above and
+provides the real-Gecko fidelity check.
diff --git a/tests/_module-info.js b/tests/_module-info.js
new file mode 100644
index 0000000..e6866a3
--- /dev/null
+++ b/tests/_module-info.js
@@ -0,0 +1,59 @@
+/**
+ * Shared helper for the map_item test drivers.
+ *
+ * Pre-validates a module by:
+ *   1. Running `node --check` on its file (syntax check; avoids the
+ *      worker-killing experimental-ESM crash when a syntax error reaches
+ *      the dynamic importer).
+ *   2. Dynamically importing it and checking for a `map_item` export.
+ *
+ * Results are cached per module name so test files that load this helper
+ * via separate Jest workers/files don't pay the spawnSync cost twice.
+ *
+ * Returns one of four states the test driver can branch on:
+ *   { state: 'ok',           map_item: <fn> }
+ *   { state: 'no_map_item' }
+ *   { state: 'syntax_error', error: <string> }
+ *   { state: 'import_error', error: <Error> }
+ */
+
+import { spawnSync } from 'node:child_process';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const MODULES_ROOT = join(__dirname, '..', 'modules');
+
+const syntax_cache = new Map();
+const inspect_cache = new Map();
+
+function check_module_syntax(module_name) {
+    if (syntax_cache.has(module_name)) return syntax_cache.get(module_name);
+    const module_path = join(MODULES_ROOT, `${module_name}.js`);
+    const result = spawnSync(process.execPath, ['--check', module_path], { encoding: 'utf8' });
+    const out = result.status === 0
+        ? null
+        : (result.stderr || result.stdout || `exit code ${result.status}`).trim();
+    syntax_cache.set(module_name, out);
+    return out;
+}
+
+export async function inspect_module(module_name) {
+    if (inspect_cache.has(module_name)) return inspect_cache.get(module_name);
+    const syntax_error = check_module_syntax(module_name);
+    let result;
+    if (syntax_error) {
+        result = { state: 'syntax_error', error: syntax_error };
+    } else {
+        try {
+            const mod = await import(`../modules/${module_name}.js`);
+            result = typeof mod.map_item === 'function'
+                ? { state: 'ok', map_item: mod.map_item }
+                : { state: 'no_map_item' };
+        } catch (e) {
+            result = { state: 'import_error', error: e };
+        }
+    }
+    inspect_cache.set(module_name, result);
+    return result;
+}
diff --git a/tests/duplicate-behavior.test.js b/tests/duplicate-behavior.test.js
index 031f663..9f0662b 100644
--- a/tests/duplicate-behavior.test.js
+++ b/tests/duplicate-behavior.test.js
@@ -5,8 +5,9 @@
  * update or merge behaviors to duplicates across navigation boundaries.
  */
 
+import 'fake-indexeddb/auto';
+
 let Dexie;
-require('fake-indexeddb/auto');
 
 // Mock browser extension APIs
 global.browser = {
diff --git a/tests/jest.compare.config.cjs b/tests/jest.compare.config.cjs
new file mode 100644
index 0000000..070e2ff
--- /dev/null
+++ b/tests/jest.compare.config.cjs
@@ -0,0 +1,20 @@
+// Tier 2 — live comparator against a 4CAT instance.
+//
+// Runs only `map_item_compare.test.js`. Requires FOURCAT_URL,
+// FOURCAT_API_KEY, and FOURCAT_DATASETS to be set in tests/.env. Hard-errors
+// rather than silently skipping if env is missing.
+//
+// Env is jsdom so that the four modules using `strip_tags` (gab, pinterest,
+// rednote, truth) have a native DOMParser. The comparator uses cross-fetch
+// to provide a jsdom-friendly fetch (jsdom doesn't ship fetch and undici
+// crashes inside jsdom).
+module.exports = {
+  testEnvironment: 'jsdom',
+  testMatch: ['**/map_item_compare.test.js'],
+  testPathIgnorePatterns: ['/node_modules/'],
+  transform: {},
+  moduleFileExtensions: ['js', 'json'],
+  setupFiles: ['<rootDir>/setup-globals.cjs'],
+  testTimeout: 30000,
+  verbose: true
+};
diff --git a/tests/jest.config.cjs b/tests/jest.config.cjs
new file mode 100644
index 0000000..239abbc
--- /dev/null
+++ b/tests/jest.config.cjs
@@ -0,0 +1,12 @@
+// Default Jest config — Tier 1 only (duplicate-behavior + load-only smoke).
+// The comparator is excluded; invoke it via `npm run test:compare`.
+module.exports = {
+  testEnvironment: 'jsdom',
+  testMatch: ['**/*.test.js'],
+  testPathIgnorePatterns: ['/node_modules/', 'map_item_compare\\.test\\.js$'],
+  transform: {},
+  moduleFileExtensions: ['js', 'json'],
+  collectCoverageFrom: ['*.test.js'],
+  setupFiles: ['<rootDir>/setup-globals.cjs'],
+  verbose: true
+};
diff --git a/tests/jest.config.js b/tests/jest.config.js
deleted file mode 100644
index 7dd5b02..0000000
--- a/tests/jest.config.js
+++ /dev/null
@@ -1,8 +0,0 @@
-module.exports = {
-  testEnvironment: 'jsdom',
-  testMatch: ['**/*.test.js'],
-  transform: {},
-  moduleFileExtensions: ['js', 'json'],
-  collectCoverageFrom: ['duplicate-behavior.test.js'],
-  verbose: true
-};
diff --git a/tests/map_item.test.js b/tests/map_item.test.js
new file mode 100644
index 0000000..774c083
--- /dev/null
+++ b/tests/map_item.test.js
@@ -0,0 +1,49 @@
+/**
+ * Load-only smoke for every module under `modules/*.js`.
+ *
+ * For each module file, runs `inspect_module()` and asserts the module:
+ *   - parses (no SyntaxError)
+ *   - imports without throwing
+ *   - either exports a `map_item` function, or doesn't (both are fine here)
+ *
+ * No data is fed through `map_item`. That work belongs in the comparator
+ * (Tier 2 — `npm run test:compare`), where real items pulled from a 4CAT
+ * dataset provide both the input and the expected output.
+ *
+ * Catches: parse errors, import-time throws, broken top-level statements.
+ * Does NOT catch: anything that requires running `map_item` on real input.
+ */
+
+import { readdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { inspect_module } from './_module-info.js';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const MODULES_ROOT = join(__dirname, '..', 'modules');
+
+const module_files = readdirSync(MODULES_ROOT)
+    .filter(f => f.endsWith('.js') && !f.startsWith('_'));
+
+const module_info = {};
+for (const file of module_files) {
+    const name = file.replace(/\.js$/, '');
+    module_info[name] = await inspect_module(name);
+}
+
+describe('module load smoke', () => {
+    for (const file of module_files) {
+        const name = file.replace(/\.js$/, '');
+        test(`modules/${file} loads cleanly`, () => {
+            const info = module_info[name];
+            if (info.state === 'syntax_error') {
+                throw new Error(`syntax error in modules/${file}:\n${info.error}`);
+            }
+            if (info.state === 'import_error') {
+                throw new Error(`import failed for modules/${file}: ${info.error.message}`);
+            }
+            // 'ok' or 'no_map_item' — both acceptable at this tier.
+            expect(['ok', 'no_map_item']).toContain(info.state);
+        });
+    }
+});
diff --git a/tests/map_item_compare.test.js b/tests/map_item_compare.test.js
new file mode 100644
index 0000000..43c5283
--- /dev/null
+++ b/tests/map_item_compare.test.js
@@ -0,0 +1,590 @@
+/**
+ * Compare JS map_item output against 4CAT's Python map_item via dataset keys.
+ *
+ * For each 4CAT dataset key in FOURCAT_DATASETS, this test:
+ *   1. HEADs the items endpoint to read the datasource id from the
+ *      `X-4CAT-Dataset-*` response headers (no metadata-endpoint dependency)
+ *   2. translates that id back to a Zeeschuimer module name via
+ *      zeeschuimer-to-4cat.json (used in reverse)
+ *   3. inspects the local module (must export map_item)
+ *   4. fetches in parallel, both as NDJSON:
+ *        /download/<key>                       -> INPUTS (post-wrap)
+ *        /api/dataset/<key>/items/?annotations=no&missing_fields=keep&stream=true
+ *                                              -> mapped EXPECTED OUTPUTS
+ *   5. runs each input through the local map_item, then pairs by the
+ *      resulting MAPPED `id` — which can differ from the raw input id (e.g.
+ *      instagram maps to the post shortcode, not the numeric pk) — and
+ *      deep-equals each mapped result against the corresponding expected
+ *      output.
+ *
+ * The items endpoint is fetched with `stream=true` (NDJSON): its JSON-array
+ * form paginates at `limit=100`, silently dropping rows on larger datasets.
+ * `annotations=no` drops processor-added fields; `missing_fields=keep` keeps
+ * unmapped fields as `{ __missing: true, value: "" }` markers (matching the JS
+ * side) and additionally adds a comma-joined `missing_fields` summary key.
+ * That summary is API-only — the JS map_item never emits it — so it is
+ * excluded from the diff (see API_ONLY_FIELDS); the per-field markers it
+ * summarizes are still compared.
+ *
+ * Items from /download/<key> already have `wrap_for_map_item` applied by
+ * Zeeschuimer pre-upload, so they're fed to map_item directly without
+ * re-wrapping. The trade-off is that this comparator does not exercise
+ * `wrap_for_map_item` itself — see docs/map-item-test-plan.md for the
+ * accepted-gap rationale.
+ *
+ * Environment notes (fetch + DOMParser):
+ *   - jsdom env so `strip_tags` (used by gab/pinterest/rednote/truth) has
+ *     a native DOMParser.
+ *   - jsdom doesn't ship `fetch`. Spiked three candidates on 2026-06-03
+ *     under node:20-alpine:
+ *       * `undici`     — crashes at import in jsdom (pokes at
+ *                        clearImmediate/markResourceTiming/fast-now
+ *                        timers that jsdom shadows).
+ *       * `node-fetch` v3 — imports clean but `res.text()` throws
+ *                        `ReferenceError: TextDecoder is not defined`
+ *                        (jsdom doesn't expose TextDecoder as a global).
+ *       * `cross-fetch/polyfill` — clean import + working round-trip.
+ *     So this file imports `cross-fetch/polyfill`, which assigns
+ *     `globalThis.fetch` when undefined.
+ *
+ * Invocation:
+ *   npm run test:compare                 # runs every key in FOURCAT_DATASETS
+ *   npm run test:compare -- <key>        # narrows to one key (must be in
+ *                                        #   FOURCAT_DATASETS to avoid typos)
+ *
+ * Hard-errors at registration time if FOURCAT_URL, FOURCAT_API_KEY, or
+ * FOURCAT_DATASETS is missing — by Tier 2 contract these are required.
+ */
+
+import 'cross-fetch/polyfill';
+import 'dotenv/config';
+import { readFileSync, existsSync, writeFileSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { inspect_module } from './_module-info.js';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// The end-of-run roll-up is written here, then printed by run-compare.mjs
+// AFTER jest exits — jest buffers in-test stdout and hoists it above the
+// result tree, so writing it from here directly would never land last. Keep
+// in sync with the same constant in run-compare.mjs.
+const SUMMARY_PATH = join(__dirname, '.compare-summary.txt');
+
+const FOURCAT_URL = process.env.FOURCAT_URL?.replace(/\/$/, '');
+const FOURCAT_API_KEY = process.env.FOURCAT_API_KEY;
+
+// Hard-fail if env is missing — Tier 2 contract.
+function require_env(name, value, placeholder_values = []) {
+    if (!value || placeholder_values.includes(value)) {
+        throw new Error(
+            `${name} is not configured. Set it in tests/.env (see tests/.env.example).`
+        );
+    }
+    return value;
+}
+require_env('FOURCAT_URL', FOURCAT_URL);
+require_env('FOURCAT_API_KEY', FOURCAT_API_KEY, ['your-api-key-here']);
+
+const FOURCAT_DATASETS = require_env(
+    'FOURCAT_DATASETS',
+    process.env.FOURCAT_DATASETS,
+    ['key1,key2,key3'],
+)
+    .split(',')
+    .map(k => k.trim())
+    .filter(k => k.length > 0);
+
+if (FOURCAT_DATASETS.length === 0) {
+    throw new Error('FOURCAT_DATASETS parsed as empty. Set a comma-separated list of dataset keys in tests/.env.');
+}
+
+// Optional narrowing to a single dataset key. The `npm run test:compare --
+// <key>` form is handled by run-compare.mjs, which sets COMPARE_DATASET; jest
+// itself would mis-read a bare key as a test-path-pattern filter and silently
+// run nothing. A narrowed key must still be declared in FOURCAT_DATASETS —
+// erroring on an unlisted key catches typos and keeps the dataset list the
+// single source of truth.
+const COMPARE_DATASET = process.env.COMPARE_DATASET?.trim() || undefined;
+if (COMPARE_DATASET && !FOURCAT_DATASETS.includes(COMPARE_DATASET)) {
+    throw new Error(
+        `COMPARE_DATASET=${COMPARE_DATASET} is not listed in FOURCAT_DATASETS. ` +
+        `Add it to tests/.env before narrowing the run to it.`
+    );
+}
+
+const DATASET_KEYS_TO_RUN = COMPARE_DATASET ? [COMPARE_DATASET] : FOURCAT_DATASETS;
+
+// 4CAT datasource id -> Zeeschuimer module name. The on-disk map is
+// authored in the natural direction (zeeschuimer -> 4cat); flip here.
+const ID_MAP_PATH = join(__dirname, 'zeeschuimer-to-4cat.json');
+const ZEESCHUIMER_TO_4CAT = existsSync(ID_MAP_PATH)
+    ? JSON.parse(readFileSync(ID_MAP_PATH, 'utf8'))
+    : {};
+const FOURCAT_TO_ZEESCHUIMER = Object.fromEntries(
+    Object.entries(ZEESCHUIMER_TO_4CAT)
+        .filter(([k]) => !k.startsWith('_'))
+        .map(([z, f]) => [f, z])
+);
+
+// When true (default), comparison of a dataset stops at its first failing
+// item; the remaining items are reported as a single skipped "halted"
+// placeholder rather than one failure each. Disable it with the `--all`
+// launcher flag (preferred — crosses every shell) or FAIL_FAST=0. Trim
+// because `set FAIL_FAST=0 && ...` in cmd.exe includes the trailing space;
+// treat both '0' and 'false' (case-insensitive) as off.
+const FAIL_FAST_RAW = (process.env.FAIL_FAST ?? '').trim().toLowerCase();
+const FAIL_FAST = FAIL_FAST_RAW !== '0' && FAIL_FAST_RAW !== 'false';
+
+function auth_headers(extra = {}) {
+    return {
+        // 4CAT accepts the raw key without a `Bearer ` prefix.
+        'Authorization': FOURCAT_API_KEY,
+        ...extra,
+    };
+}
+
+async function fetch_headers(url) {
+    const res = await fetch(url, { method: 'HEAD', headers: auth_headers() });
+    if (!res.ok) throw new Error(`HTTP ${res.status} from HEAD ${url}`);
+    return res.headers;
+}
+
+async function fetch_ndjson(url) {
+    const res = await fetch(url, { headers: auth_headers() });
+    const text = await res.text();
+    if (!res.ok) throw new Error(`HTTP ${res.status} from ${url}: ${text}`);
+    return text
+        .split('\n')
+        .filter(line => line.trim().length > 0)
+        .map((line, i) => {
+            try { return JSON.parse(line); }
+            catch (e) { throw new Error(`bad NDJSON at line ${i} of ${url}: ${e.message}`); }
+        });
+}
+
+function normalize(value) {
+    return JSON.parse(JSON.stringify(value));
+}
+
+function looks_like_url(v) {
+    return typeof v === 'string' && /^https?:\/\//i.test(v);
+}
+
+// Percent-decode for encoding-insensitive URL comparison. Decode each maximal
+// %XX run on its own so a malformed sequence doesn't throw and abort the rest.
+function decode_url_loose(s) {
+    return s.replace(/(?:%[0-9A-Fa-f]{2})+/g, run => {
+        try { return decodeURIComponent(run); } catch { return run; }
+    });
+}
+
+function deep_equal(a, b) {
+    if (a === b) return true;
+    if (a === null || b === null) return a === b;
+    if (typeof a !== typeof b) return false;
+    if (typeof a !== 'object') {
+        // Treat encoding-equivalent URLs as equal. The comparator targets bad
+        // data, not cosmetic percent-encoding differences: `=` vs `%3D` in a
+        // query value (and the like) resolve to the same URL, so 4CAT emitting
+        // one form while the JS normalizer emits the other is not a defect.
+        // Applied at the leaf so it covers URLs nested in arrays/objects too.
+        // Tradeoff: this also collapses `%2F` vs `/`, which can be semantically
+        // distinct — accepted, as a genuinely different URL still differs once
+        // decoded.
+        if (looks_like_url(a) && looks_like_url(b)) {
+            return decode_url_loose(a) === decode_url_loose(b);
+        }
+        return false;
+    }
+    if (Array.isArray(a) !== Array.isArray(b)) return false;
+    if (Array.isArray(a)) {
+        if (a.length !== b.length) return false;
+        return a.every((v, i) => deep_equal(v, b[i]));
+    }
+    const a_keys = Object.keys(a);
+    const b_keys = Object.keys(b);
+    if (a_keys.length !== b_keys.length) return false;
+    return a_keys.every(k => k in b && deep_equal(a[k], b[k]));
+}
+
+function diff_objects(js_obj, py_obj) {
+    const diffs = [];
+    const keys = new Set([...Object.keys(js_obj ?? {}), ...Object.keys(py_obj ?? {})]);
+    for (const key of keys) {
+        const in_js = js_obj && key in js_obj;
+        const in_py = py_obj && key in py_obj;
+        if (!in_js) {
+            diffs.push({ key, kind: 'only_python', python: py_obj[key] });
+        } else if (!in_py) {
+            diffs.push({ key, kind: 'only_js', js: js_obj[key] });
+        } else if (!deep_equal(js_obj[key], py_obj[key])) {
+            diffs.push({ key, kind: 'mismatch', js: js_obj[key], python: py_obj[key] });
+        }
+    }
+    return diffs;
+}
+
+function format_diffs(diffs) {
+    return diffs.map(d => {
+        if (d.kind === 'only_js') {
+            return `  + only in JS:     ${d.key} = ${JSON.stringify(d.js)}`;
+        }
+        if (d.kind === 'only_python') {
+            return `  - only in Python: ${d.key} = ${JSON.stringify(d.python)}`;
+        }
+        return `  ~ ${d.key}\n      JS:     ${JSON.stringify(d.js)}\n      Python: ${JSON.stringify(d.python)}`;
+    }).join('\n');
+}
+
+function format_error_with_location(err) {
+    if (!err) return String(err);
+    const message = err.message || String(err);
+    const stack = err.stack || '';
+    const module_frames = stack.split('\n')
+        .filter(l => l.includes('/modules/') || l.includes('\\modules\\'))
+        .slice(0, 3)
+        .map(l => l.trim());
+    return module_frames.length
+        ? `${message}\n  ${module_frames.join('\n  ')}`
+        : message;
+}
+
+// Map each input through the local map_item, then pair the mapped result
+// against the expected output by `id`. Pairing MUST key on the mapped id:
+// some modules emit an `id` that differs from the raw input id — instagram,
+// for instance, maps to the post shortcode (`node.code`), not the numeric pk
+// — so pairing raw input ids against the API's already-mapped ids would match
+// nothing. Falls back to index pairing (with a logged warning) if either side
+// lacks a usable id. A throw inside map_item is captured per-item and surfaced
+// later as that item's failure.
+function map_and_pair(inputs, outputs, map_item, dataset_key) {
+    // Map every input up front so pairing can key on the mapped id.
+    const mapped = inputs.map(input => {
+        try {
+            return { input, js_result: map_item(input), error: null };
+        } catch (e) {
+            return {
+                input,
+                js_result: null,
+                error: new Error(`JS map_item threw: ${format_error_with_location(e)}`),
+            };
+        }
+    });
+
+    const probe_mapped = mapped.find(m => m.js_result)?.js_result;
+    const probe_out = outputs[0];
+    const has_id_mapped = probe_mapped && 'id' in probe_mapped && probe_mapped.id != null;
+    const has_id_out = probe_out && 'id' in probe_out && probe_out.id != null;
+
+    if (!has_id_mapped || !has_id_out) {
+        // eslint-disable-next-line no-console
+        console.warn(
+            `[compare] ${dataset_key}: no usable 'id' on ${!has_id_mapped ? 'map_item output' : '/items'} ` +
+            `side — falling back to index pairing for this dataset.`
+        );
+        const n = Math.min(mapped.length, outputs.length);
+        return {
+            mode: 'index',
+            pairs: Array.from({ length: n }, (_, i) => ({
+                input: mapped[i].input,
+                js_result: mapped[i].js_result,
+                error: mapped[i].error,
+                expected: outputs[i],
+                id: i,
+            })),
+            input_count: inputs.length,
+            output_count: outputs.length,
+            unmatched_inputs: [],
+            unmatched_outputs: [],
+        };
+    }
+
+    // An id is NOT guaranteed unique: some datasources re-emit the same post
+    // across paginated/scroll responses (e.g. imgur gallery returns a post on
+    // every page it appears on), so a key can legitimately recur with a
+    // different `collected_from_url` per capture. Bucket outputs into a FIFO
+    // queue per id rather than a single slot — then the k-th input occurrence
+    // of an id pairs with the k-th output occurrence. Both endpoints stream the
+    // dataset in the same stored order, so occurrences line up. (A plain
+    // last-wins Map would cross-match occurrence #0 against the surviving
+    // occurrence #N, fabricating field diffs and bogus unmatched ids.)
+    const by_id_out = new Map();
+    for (const item of outputs) {
+        const k = String(item.id);
+        if (!by_id_out.has(k)) by_id_out.set(k, []);
+        by_id_out.get(k).push(item);
+    }
+
+    const pairs = [];
+    const unmatched_inputs = [];
+    for (const m of mapped) {
+        // A throw produces no mapped id to pair on. Surface it as its own
+        // failing item (labelled with the raw input id) rather than burying it
+        // in the unmatched-id list — otherwise an id-transforming module hides
+        // the actual map_item error behind a generic "unmatched input" report.
+        if (m.error) {
+            const label = m.input && m.input.id != null ? String(m.input.id) : '(no id)';
+            pairs.push({ input: m.input, js_result: null, error: m.error, expected: null, id: label });
+            continue;
+        }
+        // Key on the mapped id; a successful map whose id matches no remaining
+        // output occurrence is a genuine pairing miss and goes to unmatched_inputs.
+        const lookup_id = m.js_result && m.js_result.id != null ? String(m.js_result.id) : null;
+        const queue = lookup_id != null ? by_id_out.get(lookup_id) : undefined;
+        const expected = queue && queue.length ? queue.shift() : undefined;
+        if (expected) {
+            pairs.push({ input: m.input, js_result: m.js_result, error: null, expected, id: lookup_id });
+        } else {
+            unmatched_inputs.push(lookup_id);
+        }
+    }
+    // Any output occurrences left in the queues had no matching input.
+    const unmatched_outputs = [];
+    for (const [id, queue] of by_id_out) {
+        for (let i = 0; i < queue.length; i++) unmatched_outputs.push(id);
+    }
+    return {
+        mode: 'id',
+        pairs,
+        input_count: inputs.length,
+        output_count: outputs.length,
+        unmatched_inputs,
+        unmatched_outputs,
+    };
+}
+
+// Recover the datasource id from a dataset's response headers. 4CAT exposes it
+// directly as `X-4CAT-Dataset-Datasource`. Older responses may only carry
+// `X-4CAT-Dataset-Type` (the datasource id with a `-search`/`-import` suffix),
+// so fall back to stripping that — anchored to end-of-string because
+// datasource ids can themselves contain hyphens (e.g. `xiaohongshu-comments`).
+// The result is translated to a Zeeschuimer module via FOURCAT_TO_ZEESCHUIMER.
+function datasource_id_from_headers(headers) {
+    const datasource = headers.get('x-4cat-dataset-datasource');
+    if (datasource) return datasource.trim();
+    const type = headers.get('x-4cat-dataset-type');
+    if (type) return type.trim().replace(/-(search|import)$/, '');
+    return null;
+}
+
+// Fields 4CAT's API attaches to every mapped item that the JS map_item never
+// produces, so they would otherwise diff as spurious "only_python" entries.
+// `missing_fields` is a comma-joined summary of which fields came back as
+// MissingMappedField — redundant with the per-field `{__missing:true}`
+// markers, which ARE compared.
+const API_ONLY_FIELDS = new Set(['missing_fields']);
+
+function strip_api_fields(obj) {
+    if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return obj;
+    const out = {};
+    for (const k of Object.keys(obj)) {
+        if (!API_ONLY_FIELDS.has(k)) out[k] = obj[k];
+    }
+    return out;
+}
+
+// Diff each paired (already-mapped) JS result against 4CAT's expected output.
+// map_item was run up front during pairing — so we could key on the mapped id
+// — so here we only diff, or report an input whose map_item threw. With
+// FAIL_FAST on (default), stop at the first failing item and record how many
+// were left unchecked — so one bad item yields a single failure plus one
+// skipped "halted" placeholder, not N failures.
+function compare_pairs(pairs) {
+    const results = [];
+    let halted_count = 0;
+    for (let i = 0; i < pairs.length; i++) {
+        const { id, js_result, error, expected } = pairs[i];
+        let message = null;
+        if (error) {
+            message = error.message;
+        } else {
+            const diffs = diff_objects(
+                strip_api_fields(normalize(js_result)),
+                strip_api_fields(normalize(expected)),
+            );
+            if (diffs.length > 0) {
+                message = `${diffs.length} field(s) differ between JS and 4CAT:\n${format_diffs(diffs)}`;
+            }
+        }
+        results.push({ id, ok: message === null, message });
+        if (message !== null && FAIL_FAST) {
+            halted_count = pairs.length - (i + 1);
+            break;
+        }
+    }
+    return { results, halted_count };
+}
+
+// Pre-pass: for each dataset, resolve the datasource (HEAD), fetch items, and
+// run the comparison up front, so tests register with knowable counts and a
+// deterministic pass/fail per item. Fetch/setup failures become a single
+// "setup" failure inside that dataset's describe.
+const dataset_state = {};
+for (const key of DATASET_KEYS_TO_RUN) {
+    try {
+        // The same items URL serves double duty: a HEAD reveals the datasource
+        // (via X-4CAT-Dataset-* headers) with no body; the GET pulls the mapped
+        // rows. `stream=true` avoids the JSON form's limit=100 pagination, which
+        // would silently drop rows (and break id-pairing) on larger datasets.
+        const items_url = `${FOURCAT_URL}/api/dataset/${key}/items/?annotations=no&missing_fields=keep&stream=true`;
+        const headers = await fetch_headers(items_url);
+        const datasource_id = datasource_id_from_headers(headers);
+        if (!datasource_id) {
+            throw new Error(
+                `no datasource id in response headers for ${key} ` +
+                `(looked for X-4CAT-Dataset-Datasource / X-4CAT-Dataset-Type)`
+            );
+        }
+        const module_name = FOURCAT_TO_ZEESCHUIMER[datasource_id] ?? datasource_id;
+        const module_state = await inspect_module(module_name);
+
+        if (module_state.state === 'ok') {
+            const [inputs, outputs] = await Promise.all([
+                fetch_ndjson(`${FOURCAT_URL}/download/${key}`),
+                fetch_ndjson(items_url),
+            ]);
+            const pairing = map_and_pair(inputs, outputs, module_state.map_item, key);
+            const comparison = compare_pairs(pairing.pairs);
+            dataset_state[key] = { datasource_id, module_name, module_state, pairing, comparison };
+        } else {
+            dataset_state[key] = { datasource_id, module_name, module_state };
+        }
+    } catch (e) {
+        dataset_state[key] = { error: e };
+    }
+}
+
+for (const dataset_key of DATASET_KEYS_TO_RUN) {
+    const info = dataset_state[dataset_key];
+
+    if (info.error) {
+        describe(`map_item compare: dataset ${dataset_key}`, () => {
+            test('setup', () => { throw info.error; });
+        });
+        continue;
+    }
+
+    const { datasource_id, module_name, module_state, pairing, comparison } = info;
+    const label = `${dataset_key} (datasource: ${datasource_id}, module: ${module_name})`;
+
+    if (module_state.state === 'no_map_item') {
+        describe(`map_item compare: ${label}`, () => {
+            test.skip(`modules/${module_name}.js has no map_item — nothing to compare`, () => {});
+        });
+        continue;
+    }
+    if (module_state.state === 'syntax_error' || module_state.state === 'import_error') {
+        const msg = module_state.state === 'syntax_error'
+            ? `syntax error:\n${module_state.error}`
+            : `import failed: ${module_state.error.message}`;
+        describe(`map_item compare: ${label}`, () => {
+            test('module loads', () => { throw new Error(msg); });
+        });
+        continue;
+    }
+
+    describe(`map_item compare: ${label}`, () => {
+        test('pairing', () => {
+            const messages = [];
+            if (pairing.input_count !== pairing.output_count) {
+                messages.push(
+                    `input count ${pairing.input_count} != output count ${pairing.output_count}`
+                );
+            }
+            if (pairing.unmatched_inputs.length) {
+                const shown = pairing.unmatched_inputs.slice(0, 5).join(', ');
+                const extra = pairing.unmatched_inputs.length > 5
+                    ? ` (+${pairing.unmatched_inputs.length - 5} more)`
+                    : '';
+                messages.push(`unmatched input ids: ${shown}${extra}`);
+            }
+            if (pairing.unmatched_outputs.length) {
+                const shown = pairing.unmatched_outputs.slice(0, 5).join(', ');
+                const extra = pairing.unmatched_outputs.length > 5
+                    ? ` (+${pairing.unmatched_outputs.length - 5} more)`
+                    : '';
+                messages.push(`unmatched output ids: ${shown}${extra}`);
+            }
+            if (pairing.mode === 'index') {
+                messages.push(`paired by index (no usable 'id' field) — diffs may be misaligned`);
+            }
+            if (messages.length) throw new Error(messages.join('\n'));
+        });
+
+        comparison.results.forEach(({ id, ok, message }, i) => {
+            test(`item ${i} (id=${id})`, () => {
+                if (!ok) throw new Error(message);
+            });
+        });
+
+        if (comparison.halted_count > 0) {
+            test.skip(
+                `halted after first failure — ${comparison.halted_count} later item(s) not compared ` +
+                `(pass --all, or set FAIL_FAST=0, to compare every item)`,
+                () => {},
+            );
+        }
+    });
+}
+
+// Reduce a dataset's pre-computed state to a single verdict + one-line detail.
+// Mirrors the assertions above exactly so the summary never disagrees with the
+// per-test results: PASS only when pairing is clean AND every compared item
+// matched; a FAIL_FAST halt leaves items unchecked, so it cannot be a PASS.
+function summarize_dataset(key, info) {
+    if (info.error) {
+        return { key, status: 'FAIL', datasource: '?', module: '?', detail: `setup error: ${info.error.message}` };
+    }
+    const { datasource_id, module_name, module_state, pairing, comparison } = info;
+    if (module_state.state === 'no_map_item') {
+        return { key, status: 'SKIP', datasource: datasource_id, module: module_name, detail: 'no map_item — nothing to compare' };
+    }
+    if (module_state.state === 'syntax_error' || module_state.state === 'import_error') {
+        return { key, status: 'FAIL', datasource: datasource_id, module: module_name, detail: `module ${module_state.state.replace('_', ' ')}` };
+    }
+
+    const pairing_problems = [];
+    if (pairing.input_count !== pairing.output_count) {
+        pairing_problems.push(`count ${pairing.input_count}!=${pairing.output_count}`);
+    }
+    if (pairing.unmatched_inputs.length) pairing_problems.push(`${pairing.unmatched_inputs.length} unmatched input(s)`);
+    if (pairing.unmatched_outputs.length) pairing_problems.push(`${pairing.unmatched_outputs.length} unmatched output(s)`);
+    if (pairing.mode === 'index') pairing_problems.push(`paired by index`);
+
+    const compared = comparison.results.length;
+    const failed_items = comparison.results.filter(r => !r.ok).length;
+    const total = pairing.pairs.length;
+
+    if (pairing_problems.length || failed_items) {
+        const parts = [];
+        if (pairing_problems.length) parts.push(`pairing: ${pairing_problems.join(', ')}`);
+        if (failed_items) {
+            const halted = comparison.halted_count > 0 ? `, halted (+${comparison.halted_count} unchecked)` : '';
+            parts.push(`${failed_items}/${compared} item(s) differ${halted}`);
+        }
+        return { key, status: 'FAIL', datasource: datasource_id, module: module_name, detail: parts.join('; ') };
+    }
+    return { key, status: 'PASS', datasource: datasource_id, module: module_name, detail: `${total}/${total} items match` };
+}
+
+// Build the per-datasource roll-up once the whole file has run and stash it
+// for run-compare.mjs to print as the genuine final output (see SUMMARY_PATH).
+afterAll(() => {
+    const rows = DATASET_KEYS_TO_RUN.map(key => summarize_dataset(key, dataset_state[key]));
+    const w_status = 4; // PASS/FAIL/SKIP
+    const w_module = Math.max(6, ...rows.map(r => r.module.length));
+
+    const lines = ['', '=== map_item compare summary ==='];
+    for (const r of rows) {
+        const mark = r.status === 'PASS' ? '✓' : r.status === 'SKIP' ? '○' : '✗';
+        lines.push(
+            `  ${mark} ${r.status.padEnd(w_status)}  ${r.module.padEnd(w_module)}  ${r.key}  — ${r.detail}`
+        );
+    }
+    const passed = rows.filter(r => r.status === 'PASS').length;
+    const failed = rows.filter(r => r.status === 'FAIL').length;
+    const skipped = rows.filter(r => r.status === 'SKIP').length;
+    lines.push(`${rows.length} datasource(s): ${passed} passed, ${failed} failed, ${skipped} skipped`);
+    writeFileSync(SUMMARY_PATH, lines.join('\n') + '\n');
+});
diff --git a/tests/package-lock.json b/tests/package-lock.json
index cc8f457..ada8011 100644
--- a/tests/package-lock.json
+++ b/tests/package-lock.json
@@ -8,7 +8,9 @@
       "name": "zeeschuimer-db-tests",
       "version": "1.0.0",
       "devDependencies": {
+        "cross-fetch": "^4.0.0",
         "dexie": "^3.2.4",
+        "dotenv": "^16.4.5",
         "fake-indexeddb": "^5.0.1",
         "jest": "^29.7.0",
         "jest-environment-jsdom": "^29.7.0"
@@ -1597,6 +1599,16 @@
         "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
       }
     },
+    "node_modules/cross-fetch": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.1.0.tgz",
+      "integrity": "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-fetch": "^2.7.0"
+      }
+    },
     "node_modules/cross-spawn": {
       "version": "7.0.6",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@@ -1758,6 +1770,19 @@
         "node": ">=12"
       }
     },
+    "node_modules/dotenv": {
+      "version": "16.6.1",
+      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz",
+      "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://dotenvx.com"
+      }
+    },
     "node_modules/dunder-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -3466,6 +3491,52 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/node-fetch/node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/node-fetch/node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+      "dev": true,
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/node-fetch/node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
     "node_modules/node-int64": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
diff --git a/tests/package.json b/tests/package.json
index dc3654c..763321c 100644
--- a/tests/package.json
+++ b/tests/package.json
@@ -1,13 +1,17 @@
 {
   "name": "zeeschuimer-db-tests",
   "version": "1.0.0",
-  "description": "Unit tests for Zeeschuimer duplicate handling logic",
+  "description": "Unit tests for Zeeschuimer duplicate handling logic and map_item generator output",
+  "type": "module",
   "scripts": {
-    "test": "jest",
-    "test:watch": "jest --watch"
+    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --config jest.config.cjs",
+    "test:watch": "node --experimental-vm-modules node_modules/jest/bin/jest.js --config jest.config.cjs --watch",
+    "test:compare": "node run-compare.mjs"
   },
   "devDependencies": {
+    "cross-fetch": "^4.0.0",
     "dexie": "^3.2.4",
+    "dotenv": "^16.4.5",
     "fake-indexeddb": "^5.0.1",
     "jest": "^29.7.0",
     "jest-environment-jsdom": "^29.7.0"
diff --git a/tests/run-compare.mjs b/tests/run-compare.mjs
new file mode 100644
index 0000000..bc7e88f
--- /dev/null
+++ b/tests/run-compare.mjs
@@ -0,0 +1,71 @@
+/**
+ * Launcher for the Tier 2 map_item comparator (`npm run test:compare`).
+ *
+ *   npm run test:compare              -> compares every key in FOURCAT_DATASETS
+ *   npm run test:compare -- <key>     -> narrows the run to a single key
+ *   npm run test:compare -- <key> --all   -> compare every item (no fail-fast)
+ *   npm run test:compare -- <key> -t "id=123"   -> key + forwarded jest flags
+ *
+ * Why this exists instead of invoking jest directly: jest treats any bare
+ * positional argument as a test-path-pattern filter. A 4CAT dataset key
+ * (`5daeba72a2dfbb5ed8c855f824a61570`) matches no test file path, so
+ * `jest <key>` silently discovers zero tests and exits "green" having run
+ * nothing. This launcher intercepts the first non-flag argument, hands it to
+ * the comparator through the COMPARE_DATASET env var, and forwards only the
+ * remaining flags to jest — so the key never reaches jest's argv.
+ */
+
+import { spawn } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+import { readFileSync, rmSync } from 'node:fs';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const args = process.argv.slice(2);
+
+// The comparator writes its roll-up here (jest buffers in-test stdout and
+// hoists it above the result tree, so we print it from this launcher after
+// jest exits to make it the genuine last output). Keep in sync with the same
+// constant in map_item_compare.test.js.
+const SUMMARY_PATH = join(__dirname, '.compare-summary.txt');
+// Drop any stale summary up front so a crashed run can't print the prior one.
+rmSync(SUMMARY_PATH, { force: true });
+
+// First non-flag arg (if any) is the dataset key to narrow to.
+const dataset_key = args.find(a => !a.startsWith('-'));
+const flags = args.filter(a => a !== dataset_key);
+
+// `--all` (alias `--no-fail-fast`) compares every item instead of halting at
+// the first failure. It's offered as a flag, not only via the FAIL_FAST env
+// var, because `FAIL_FAST=0 npm run ...` does not reliably reach node when
+// npm/node is the Windows binary invoked through WSL interop, and isn't env
+// syntax at all in cmd.exe. A CLI flag crosses every shell; the env var still
+// works where it propagates.
+const disable_fail_fast = flags.includes('--all') || flags.includes('--no-fail-fast');
+const jest_flags = flags.filter(f => f !== '--all' && f !== '--no-fail-fast');
+
+const env = { ...process.env };
+if (dataset_key) env.COMPARE_DATASET = dataset_key;
+if (disable_fail_fast) env.FAIL_FAST = '0';
+
+const jest_bin = join(__dirname, 'node_modules', 'jest', 'bin', 'jest.js');
+const child = spawn(
+    process.execPath,
+    ['--experimental-vm-modules', jest_bin, '--config', 'jest.compare.config.cjs', ...jest_flags],
+    { stdio: 'inherit', cwd: __dirname, env },
+);
+
+child.on('exit', code => {
+    // Print the roll-up after jest's own tally so it's the last thing on screen.
+    try {
+        process.stdout.write(readFileSync(SUMMARY_PATH, 'utf8'));
+        rmSync(SUMMARY_PATH, { force: true });
+    } catch {
+        // No summary file (e.g. setup threw before afterAll) — nothing to print.
+    }
+    process.exit(code ?? 1);
+});
+child.on('error', err => {
+    console.error(`failed to launch jest: ${err.message}`);
+    process.exit(1);
+});
diff --git a/tests/setup-globals.cjs b/tests/setup-globals.cjs
new file mode 100644
index 0000000..b55e659
--- /dev/null
+++ b/tests/setup-globals.cjs
@@ -0,0 +1,45 @@
+/**
+ * Make js/lib.js's helpers available as globals inside the Jest test
+ * environment, mirroring how the browser sees them after the manifest
+ * loads lib.js as a plain script.
+ *
+ * map_item bodies reference these as free identifiers (MappedItem,
+ * MissingMappedField, strip_tags, normalize_url_encoding, ...). Without
+ * this shim they'd hit ReferenceError as soon as a test invokes map_item.
+ *
+ * Names are auto-discovered from lib.js by regex-matching top-level
+ * `function name(...)` and `class Name ...` declarations. Adding a helper
+ * to lib.js makes it available to tests without touching this file.
+ */
+
+const fs = require('node:fs');
+const path = require('node:path');
+
+const lib_source = fs.readFileSync(
+    path.join(__dirname, '..', 'js', 'lib.js'),
+    'utf8',
+);
+
+// Match `function name(` and `class Name {` / `class Name extends` at
+// column 0 of a line. lib.js is a classic script with all top-level
+// declarations unindented; requiring column 0 keeps nested helpers (like
+// the `_traverse_data` IIFE inside `traverse_data`) from being exposed.
+const NAME_PATTERN = /^(?:function|class)\s+([A-Za-z_$][A-Za-z0-9_$]*)\b/gm;
+const EXPOSED_NAMES = Array.from(
+    lib_source.matchAll(NAME_PATTERN),
+    m => m[1],
+);
+
+if (EXPOSED_NAMES.length === 0) {
+    throw new Error(
+        'setup-globals.cjs: no top-level function/class declarations found in js/lib.js — ' +
+        'auto-discovery regex may be broken. Tests will ReferenceError if not fixed.'
+    );
+}
+
+const factory = new Function(`
+${lib_source}
+return { ${EXPOSED_NAMES.join(', ')} };
+`);
+
+Object.assign(globalThis, factory());
diff --git a/tests/translation-errors.md b/tests/translation-errors.md
new file mode 100644
index 0000000..fcc160d
--- /dev/null
+++ b/tests/translation-errors.md
@@ -0,0 +1,430 @@
+# Auto-generator translation errors
+
+Patterns of incorrect Python → JavaScript translation observed in
+auto-generated `modules/*.js` files. Each entry has a search pattern so
+this doc doubles as a checklist when reviewing a new auto-generator PR.
+
+When an entry is fixed at the generator level (no longer appears in
+fresh output), mark it `[fixed]` and keep the entry around — useful
+history when something regresses.
+
+## How to use
+
+- Found a new pattern? Add an entry below following the template.
+- Reviewing a generator PR? `grep` each `Search pattern` against the
+  changed module files. Anything that hits is worth a manual look.
+- Iterating on the generator prompt? The "Why" lines are the
+  feedback to add — they describe the exact Python-vs-JS semantic
+  difference the LLM keeps missing.
+
+## Template
+
+```
+### <short-name>
+
+**Status:** open | fixed in generator | accepted
+
+**Why it happens:** <one-line description of the Python-vs-JS difference>
+
+**Wrong JS:**
+```js
+<the broken pattern>
+```
+
+**Correct JS:**
+```js
+<what it should look like>
+```
+
+**Example:** `modules/<file>.js:<line>`
+
+**Search pattern:** `<grep-able regex>`
+```
+
+---
+
+## Observed patterns
+
+### `in` operator on strings
+
+**Status:** open
+
+**Why it happens:** In Python, `"x" in some_string` is a substring check.
+In JavaScript, the `in` operator only works on **objects** and checks for
+property/key existence; using it with a string on the right-hand side
+throws `TypeError: cannot use 'in' operator to search for "x" in <string>`.
+
+**Wrong JS:**
+```js
+const is_polaris = '__typename' in item && 'polaris' in item.__typename.toLowerCase();
+```
+
+**Correct JS:**
+```js
+const is_polaris = '__typename' in item && item.__typename.toLowerCase().includes('polaris');
+```
+
+**Example:** `modules/instagram.js:513`
+
+**Search pattern:** `'[^']+' in [a-zA-Z_$][\w$]*\.` — quoted string followed
+by `in` followed by a method call. Quick rough check: `grep -E "' in [a-zA-Z]" modules/`
+
+**Watch out for partial fixes:** seen as `'polaris' in (item.__typename ?? '').toLowerCase()`
+— adding `?? ''` guards against `undefined` but the `in` operator itself
+still throws on the resulting *string*. The fix is `.includes()`, not just
+defaulting the operand.
+
+---
+
+### Python f-string syntax left in single-quoted JS strings
+
+**Status:** open
+
+**Why it happens:** Python `f"... {var} ..."` interpolates. JS uses
+template literals (backticks) with `${var}`. The auto-generator leaves the
+`{var}` notation in a regular single- or double-quoted JS string, which is
+just literal text — no interpolation happens.
+
+**Wrong JS:**
+```js
+throw new MapItemException('Unable to parse item: different user {user.id} and owner {owner.id}');
+```
+
+**Correct JS:**
+```js
+throw new MapItemException(`Unable to parse item: different user ${user.id} and owner ${owner.id}`);
+```
+
+**Example:** `modules/instagram.js:754`
+
+**Search pattern:** `'[^']*\{[a-zA-Z_$][\w$.]*\}[^']*'` or `"[^"]*\{[a-zA-Z_$][\w$.]*\}[^"]*"`
+— a non-template-literal string containing `{identifier}` or `{identifier.path}`.
+Quick check: `grep -nE "['\"][^'\"]*\{[a-zA-Z_][a-zA-Z0-9_.]*\}[^'\"]*['\"]" modules/`
+
+---
+
+### `?? {}` default that defeats subsequent truthy checks
+
+**Status:** open
+
+**Why it happens:** When porting Python's `node.get('user') or {}` (which is
+intended to make subsequent code safe to call), the generator emits
+`node.user ?? {}`. That's a *valid* Python-equivalent, **but** any following
+`if (user && owner) { ... }` guard then never short-circuits because both
+`{}` references are truthy. The check ends up reading "if user and owner
+*objects* exist" when the intent was "if user and owner data exist."
+Subsequent property accesses then compare real ids/usernames against
+`undefined` on the missing side, often throwing.
+
+**Wrong JS:**
+```js
+const user  = node.user  ?? {};
+const owner = node.owner ?? {};
+if (user && owner) {
+    if (user.id === owner.id) { /* … */ }
+    else if (user.username !== owner.username) {
+        throw new MapItemException('different user and owner');
+    }
+}
+```
+
+**Correct JS** (depending on intent — pick one):
+```js
+// (a) drop the defaults so truthy guard means "both present"
+const user  = node.user;
+const owner = node.owner;
+if (user && owner) { /* compare */ }
+```
+```js
+// (b) check for actual content, not just object identity
+const user  = node.user  ?? {};
+const owner = node.owner ?? {};
+if (Object.keys(user).length && Object.keys(owner).length) { /* compare */ }
+```
+
+**Example:** `modules/instagram.js:748-756`
+
+**Search pattern:** `\?\?\s*\{\s*\}` — any `?? {}` occurrence is worth a
+review of subsequent guards. Quick check: `grep -nE "\?\?\s*\{\s*\}" modules/`
+
+---
+
+### Bare relative path as a statement (junk auto-imports section)
+
+**Status:** open
+
+**Why it happens:** The generator emits an "auto-generated imports" marker
+block at the top of the module but writes the import target as a bare
+relative path on its own line (`../js/lib.js`) instead of a real `import`
+statement. JS parses that as `..` then `.` then `/js/lib.js` — syntax error.
+
+**Wrong JS:**
+```js
+// === auto-generated imports for map_item — DO NOT EDIT BY HAND ===
+../js/lib.js
+// === end auto-generated imports ===
+```
+
+**Correct JS** (one of):
+```js
+// === auto-generated imports — DO NOT EDIT BY HAND ===
+// Provided as globals by js/lib.js (loaded via manifest.json):
+//   MappedItem, MissingMappedField, MapItemException, traverse_data,
+//   strip_tags, normalize_url_encoding, formatUtcTimestamp
+// === end auto-generated imports ===
+```
+
+Or, if a real import is intended, an ESM import with named bindings:
+```js
+import { MappedItem, MissingMappedField } from '../js/lib.js';
+```
+
+**Example:** seen historically in `modules/tiktok.js:2`
+
+**Search pattern:** `^\.\./` at the start of a line in module files.
+Quick check: `grep -nE "^\.\." modules/*.js`
+
+---
+
+### Key-existence check (`'X' in obj`) used where Python intended value-truthiness (`obj.get('X')`)
+
+**Status:** open
+
+**Why it happens:** Python's `if node.get('usertags'):` is a *truthy check on
+the value* — returns False if the key is missing **or** if the value is
+`None`/empty/falsy. The generator translates this to `if ('usertags' in
+node)`, which in JS is a *key-existence check* — returns True even when
+the value is `null`. Subsequent property accesses on the null value then
+throw `Cannot read properties of null`.
+
+**Wrong JS:**
+```js
+const usertags = 'usertags' in node ? node.usertags.in.map(...).join(',') : '';
+// node.usertags can be null → .in.map blows up
+```
+
+**Correct JS:**
+```js
+const usertags = node.usertags ? node.usertags.in.map(...).join(',') : '';
+```
+
+**Example:** `modules/instagram.js:777`
+
+**Search pattern:** `'[^']+' in [a-zA-Z_$][\w$]*\s*\?` — quoted-string `in`
+identifier followed by `?` (ternary). Quick check:
+`grep -nE "'[^']+' in [a-zA-Z_]+ \?" modules/`
+
+---
+
+### Datetime serialization format mismatch
+
+**Status:** open
+
+**Why it happens:** Python's `datetime.utcfromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S')`
+produces `"2026-05-13 21:27:31"` — space-separated, no timezone marker. JS's
+`new Date(t * 1000).toISOString()` produces `"2026-05-13T21:27:31.000Z"` — T
+separator, milliseconds, Z. The generator emits the JS `.toISOString()` form
+instead of using the existing `formatUtcTimestamp` helper from lib.js that
+mimics Python's output exactly.
+
+**Wrong JS:**
+```js
+collected_at = new Date(node.taken_at * 1000).toISOString();
+```
+
+**Correct JS:**
+```js
+collected_at = formatUtcTimestamp(node.taken_at);
+// formatUtcTimestamp is defined in js/lib.js as:
+//   new Date(unixSeconds * 1000).toISOString().replace('T', ' ').slice(0, 19)
+```
+
+**Example:** `modules/instagram.js:782`
+
+**Search pattern:** `new Date\([^)]+\)\.toISOString\(\)` — any use of
+`.toISOString()`. The helper should be used instead. Quick check:
+`grep -nE "\.toISOString\(\)" modules/`
+
+---
+
+### `re.findall` capture groups vs JS `.match` with /g flag
+
+**Status:** open
+
+**Why it happens:** Python's `re.findall(r'#(\w+)', s)` returns the **capture
+group contents**: `['lotr', 'woodart']`. JS's `s.match(/#(\w+)/g)` (with the
+global flag) returns the **full matches**: `['#lotr', '#woodart']` — capture
+groups are ignored. The generator translates the regex literally without
+adjusting for this semantic difference, so the resulting strings keep
+prefixes/wrappers that Python would have stripped.
+
+**Wrong JS:**
+```js
+hashtags: caption.match(/#([^\s!@#$%^&*()_+{}:"|<>?;',./`~]+)/g)?.join(',')
+// produces "#lotr,#woodart"
+```
+
+**Correct JS:**
+```js
+// Option A: strip the literal prefix from each full match
+hashtags: caption.match(/#([^\s...]+)/g)?.map(h => h.slice(1)).join(',') ?? ''
+// Option B: use matchAll to get capture groups properly
+hashtags: [...caption.matchAll(/#([^\s...]+)/g)].map(m => m[1]).join(',') ?? ''
+```
+
+**Example:** `modules/instagram.js:812` (also 766, 870 — three copies)
+
+**Search pattern:** `\.match\(/[^/]*\([^/]*\)[^/]*/g\)` — any `.match()` with
+a global-flag regex containing a capture group. Quick check:
+`grep -nE "\.match\(/.*\(.*\).*\/g\)" modules/`
+
+---
+
+### `undefined` field values get dropped from JSON, but Python's `None` becomes `null`
+
+**Status:** open
+
+**Why it happens:** When `JSON.stringify` encounters an object property whose
+value is `undefined`, it **omits the key entirely** from the output. Python's
+`json.dumps` serializes `None` as `null`, keeping the key. The generator
+writes assignments like `location.city = node.location.city` where the
+right-hand side can be `undefined`, producing missing keys in JS output
+that show up as `only in Python: <field> = null` diffs against 4CAT.
+
+**Wrong JS:**
+```js
+location.city = node.location.city;  // undefined if .city missing
+// JSON.stringify({location_city: undefined}) → "{}" (key omitted)
+
+body: caption,  // null if no caption — Python returns "" here, not null
+```
+
+**Correct JS:**
+```js
+// Whichever fallback Python uses for that specific field:
+location.city = node.location.city ?? null;   // some fields → null
+body: caption ?? '',                          // other fields → ""
+```
+
+**Example:** `modules/instagram.js:745, 853` (`null` flavor),
+559, 648, 798 (`""` flavor for `body`)
+
+**Note:** Python's choice of `None` vs `""` is per-field — there's no
+universal rule. When the comparator reports `~ X  JS: null  Python: ""` use
+`?? ''`. When it reports `- only in Python: X = null` use `?? null`. The
+distinction matters because the JS output should match Python's choice
+exactly for that field.
+
+**Search pattern:** harder to grep automatically — any property assignment
+where the RHS could be `undefined`/`null` and the resulting field is
+expected to appear in the mapped output. Look at "only in Python: X = null"
+and "~ X  JS: null  Python: \"\"" diffs in the comparator output to find
+specific cases.
+
+---
+
+### Object-reference inequality used as type check
+
+**Status:** open
+
+**Why it happens:** The generator emits `caption !== new MissingMappedField('')`
+to mean "caption is not a missing-marker", but `new MissingMappedField('')`
+creates a fresh object every time, and `!==` on objects compares references.
+The expression is **always true**, so the conditional never takes the
+"missing" branch. Likely originates from Python idioms like `caption != ""`
+or `caption is not None`, mistranslated through the MissingMappedField
+abstraction.
+
+**Wrong JS:**
+```js
+hashtags: caption !== new MissingMappedField('') ? caption.match(...) : '',
+// !== between two different object references is always true
+```
+
+**Correct JS:**
+```js
+// If the intent was "if caption has content", just truthy-check it:
+hashtags: caption ? caption.match(...) : '',
+// If the intent was "if caption is not a MissingMappedField instance":
+hashtags: !(caption instanceof MissingMappedField) ? caption.match(...) : '',
+```
+
+**Example:** `modules/instagram.js:812` (and two other copies)
+
+**Search pattern:** `!== new [A-Z]` or `=== new [A-Z]` — any equality
+comparison with a freshly-constructed object. Quick check:
+`grep -nE "(!==|===) new [A-Z]" modules/`
+
+---
+
+### `.method()` chain on potentially-null result
+
+**Status:** open
+
+**Why it happens:** In Python, calling a method on `None` raises
+`AttributeError`, which 4CAT sometimes catches. In JS, calling a method on
+`null`/`undefined` throws `TypeError: Cannot read properties of null
+(reading '<method>')`. The generator emits the same dotted chain without
+optional-chaining (`?.`) protection.
+
+**Wrong JS:**
+```js
+hashtags: caption !== new MissingMappedField('')
+    ? caption.match(/#([^\s!@#$%^&*()_+{}:"|<>?;',./`~]+)/g)?.join(',')
+    : '',
+```
+(here `caption` is allowed to be `null`, so `caption.match(...)` blows up
+on null caption)
+
+**Correct JS:**
+```js
+hashtags: caption
+    ? caption.match(/#([^\s!@#$%^&*()_+{}:"|<>?;',./`~]+)/g)?.join(',') ?? ''
+    : '',
+```
+
+**Example:** `modules/instagram.js:809`
+
+**Search pattern:** harder to grep — needs reading. Worth manual review of
+any field that uses `caption.match`, `something.split`, `something.join`
+without `?.` on a value that could be null/undefined.
+
+---
+
+## Generator prompt feedback (running list)
+
+Concrete things to fold into the generator's prompt over time:
+
+1. **Python `x in y` where `y` is a string** → use `y.includes(x)` in JS,
+   never `x in y`.
+2. **Python f-strings** → use JS template literals (backticks) with
+   `${...}` syntax. Never leave `{...}` in single- or double-quoted strings.
+3. **`?? {}` after a `.get(...) or {}` translation** → only use this if the
+   following code does property-access. If the following code does a
+   truthy guard (`if (x && y)`), drop the default and use just `node.user`.
+4. **Method chains on possibly-null values** → use `?.` (optional
+   chaining) instead of `.` whenever the receiver could be null/undefined.
+5. **The auto-imports header block** → emit either real `import { ... }`
+   statements with valid relative paths, or a comment-only header.
+   Never emit bare paths as JS statements.
+6. **Python `node.get('X')` truthy check** → in JS, use `node.X` (or
+   `node.X != null`), not `'X' in node`. The `in` operator checks key
+   existence, which is True even for explicit-null values.
+7. **Datetime serialization** → use the `formatUtcTimestamp` helper from
+   lib.js (which mimics Python's `strftime('%Y-%m-%d %H:%M:%S')` format),
+   not `new Date(...).toISOString()` (which has a different output shape:
+   T separator, milliseconds, Z suffix).
+8. **`re.findall` with capture groups** → in JS, `.match(/.../g)` returns
+   full matches, NOT capture groups. To get capture-group behavior, use
+   either `[...s.matchAll(/.../g)].map(m => m[1])` or post-process the
+   full matches with `.map(...)` to strip the literal parts.
+9. **Object-reference equality (`!== new X(...)`)** → never. Creating an
+   object with `new` produces a fresh reference; `===`/`!==` compares
+   identity. Use `instanceof X` for type checks, or compare values
+   directly. The MissingMappedField "is this missing?" check should be
+   `caption instanceof MissingMappedField` or just truthy-check the value.
+10. **Python `None` → JSON `null` vs JS `undefined` → omitted** — when a
+    field's value could be missing and Python returns `null` for it,
+    JS must explicitly assign `null` (not leave the value as `undefined`).
+    `JSON.stringify` drops `undefined` keys silently. Use `value ?? null`
+    when the field is expected to appear in the mapped output.
diff --git a/tests/zeeschuimer-to-4cat.json b/tests/zeeschuimer-to-4cat.json
new file mode 100644
index 0000000..f7de942
--- /dev/null
+++ b/tests/zeeschuimer-to-4cat.json
@@ -0,0 +1,7 @@
+{
+  "_comment": "Maps Zeeschuimer module filenames (without .js) to 4CAT datasource ids when they differ. Default behavior is identity — only include entries where the two diverge. Discovered via http://localhost/api/datasources/.",
+  "9gag": "ninegag",
+  "truth": "truthsocial",
+  "rednote": "xiaohongshu",
+  "rednote-comments": "xiaohongshu-comments"
+}