From 3e02b1a08dee892a377a20f04eb005db73e044f1 Mon Sep 17 00:00:00 2001 From: 128na Date: Mon, 22 Jun 2026 23:12:21 +0900 Subject: [PATCH 1/3] Close remaining known-risks gaps (A2, D4, and SPOF test coverage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two items needed a behavior decision and got one: - A2: FetchHtml now calls Http::get()->throw(), treating any non-2xx response the same as a connection failure — neither writes to RawPage nor gets cached. Previously only connection exceptions were caught; a 404/500 response body would silently get written/cached. - D4: extracted Page update and Pak sync into UpdateOrCreatePageWithPaks, wrapping both in DB::transaction() so a SyncPak failure can no longer leave a Page with new title/text but stale pak associations visible to search. All three site Handlers now use this combined action. B3 (Notion re-sync throttling) stays unimplemented per decision — recorded in the ledger only, revisit if API quota becomes a problem. The rest is closing out every remaining SPOF row (single test) to OK (2+ tests) per the ledger's own correction conditions: A1/A3, B1/B2, C1/C2, D1/D3. Notably C1 had no direct web-route (non-JSON) test, and B2's null-URL delete guard and delete-side fault isolation had no test at all despite being implemented in the prior PR. known-risks.md updated to 🟢 OK across the board except B3 (still 🔴 Missing, by decision) and D2 (N/A, no draft/publish concept exists). Co-Authored-By: Claude Sonnet 4.6 --- app/Actions/Extract/Japan/Handler.php | 13 +- app/Actions/Extract/Portal/Handler.php | 11 +- app/Actions/Extract/Twitrans/Handler.php | 13 +- .../Extract/UpdateOrCreatePageWithPaks.php | 38 ++++++ app/Actions/Scrape/FetchHtml.php | 3 +- docs/known-risks.md | 20 +-- .../Extract/Japan/HandlerIsolationTest.php | 4 +- .../Extract/UpdateOrCreatePageTest.php | 14 +++ .../UpdateOrCreatePageWithPaksTest.php | 47 +++++++ .../Feature/Actions/Scrape/FetchHtmlTest.php | 19 +++ .../Scrape/Japan/HandlerFailureTest.php | 24 ++++ .../Scrape/Japan/HandlerIsolationTest.php | 50 ++++++++ .../Actions/SearchPage/FeedActionTest.php | 30 +++++ .../Actions/SyncNotion/SyncActionTest.php | 119 ++++++++++++++++++ tests/Feature/Http/ApiErrorResponseTest.php | 23 ++++ .../Http/Resources/PageResourceTest.php | 19 +++ tests/Feature/Http/WebErrorResponseTest.php | 34 +++++ 17 files changed, 445 insertions(+), 36 deletions(-) create mode 100644 app/Actions/Extract/UpdateOrCreatePageWithPaks.php create mode 100644 tests/Feature/Actions/Extract/UpdateOrCreatePageWithPaksTest.php create mode 100644 tests/Feature/Actions/Scrape/Japan/HandlerIsolationTest.php create mode 100644 tests/Feature/Actions/SearchPage/FeedActionTest.php create mode 100644 tests/Feature/Http/WebErrorResponseTest.php diff --git a/app/Actions/Extract/Japan/Handler.php b/app/Actions/Extract/Japan/Handler.php index 2d0ce0c..010b39d 100644 --- a/app/Actions/Extract/Japan/Handler.php +++ b/app/Actions/Extract/Japan/Handler.php @@ -7,8 +7,7 @@ use App\Actions\Extract\ChunkRawPages; use App\Actions\Extract\HandlerInterface; use App\Actions\Extract\MarkExtractFailed; -use App\Actions\Extract\SyncPak; -use App\Actions\Extract\UpdateOrCreatePage; +use App\Actions\Extract\UpdateOrCreatePageWithPaks; use App\Enums\SiteName; use App\Models\Page; use App\Models\RawPage; @@ -22,8 +21,7 @@ public function __construct( private ChunkRawPages $chunkRawPages, private ExtractLastModified $extractLastModified, private ExtractContents $extractContents, - private UpdateOrCreatePage $updateOrCreatePage, - private SyncPak $syncPak, + private UpdateOrCreatePageWithPaks $updateOrCreatePageWithPaks, private MarkExtractFailed $markExtractFailed, ) {} @@ -42,14 +40,13 @@ public function __invoke(LoggerInterface $logger): void // pageがあって更新有り if (! $rawPage->page || $this->needUpdate($rawPage->page, $lastModiefied)) { $contents = ($this->extractContents)($rawPage); - $page = ($this->updateOrCreatePage)( + ($this->updateOrCreatePageWithPaks)( $rawPage, $contents['title'], $contents['text'], - $lastModiefied + $lastModiefied, + $contents['paks'] ); - - ($this->syncPak)($page, $contents['paks']); } $this->markExtractFailed->clear($rawPage); diff --git a/app/Actions/Extract/Portal/Handler.php b/app/Actions/Extract/Portal/Handler.php index e7a1dd4..8d57ef4 100644 --- a/app/Actions/Extract/Portal/Handler.php +++ b/app/Actions/Extract/Portal/Handler.php @@ -7,8 +7,7 @@ use App\Actions\Extract\ChunkRawPages; use App\Actions\Extract\HandlerInterface; use App\Actions\Extract\MarkExtractFailed; -use App\Actions\Extract\SyncPak; -use App\Actions\Extract\UpdateOrCreatePage; +use App\Actions\Extract\UpdateOrCreatePageWithPaks; use App\Enums\SiteName; use App\Models\Page; use App\Models\RawPage; @@ -22,8 +21,7 @@ public function __construct( private ChunkRawPages $chunkRawPages, private FindArticle $findArticle, private ExtractContents $extractContents, - private UpdateOrCreatePage $updateOrCreatePage, - private SyncPak $syncPak, + private UpdateOrCreatePageWithPaks $updateOrCreatePageWithPaks, private MarkExtractFailed $markExtractFailed, ) {} @@ -44,14 +42,13 @@ public function __invoke(LoggerInterface $logger): void // pageがあって更新有り if (! $rawPage->page || $this->needUpdate($rawPage->page, $lastModiefied)) { $contents = ($this->extractContents)($article); - $page = ($this->updateOrCreatePage)( + ($this->updateOrCreatePageWithPaks)( $rawPage, $contents['title'], $contents['text'], $lastModiefied, + $contents['paks'] ); - - ($this->syncPak)($page, $contents['paks']); } $this->markExtractFailed->clear($rawPage); diff --git a/app/Actions/Extract/Twitrans/Handler.php b/app/Actions/Extract/Twitrans/Handler.php index 2ee7ebc..b9eda98 100644 --- a/app/Actions/Extract/Twitrans/Handler.php +++ b/app/Actions/Extract/Twitrans/Handler.php @@ -7,8 +7,7 @@ use App\Actions\Extract\ChunkRawPages; use App\Actions\Extract\HandlerInterface; use App\Actions\Extract\MarkExtractFailed; -use App\Actions\Extract\SyncPak; -use App\Actions\Extract\UpdateOrCreatePage; +use App\Actions\Extract\UpdateOrCreatePageWithPaks; use App\Enums\SiteName; use App\Models\Page; use App\Models\RawPage; @@ -22,8 +21,7 @@ public function __construct( private ChunkRawPages $chunkRawPages, private ExtractLastModified $extractLastModified, private ExtractContents $extractContents, - private UpdateOrCreatePage $updateOrCreatePage, - private SyncPak $syncPak, + private UpdateOrCreatePageWithPaks $updateOrCreatePageWithPaks, private MarkExtractFailed $markExtractFailed, ) {} @@ -42,14 +40,13 @@ public function __invoke(LoggerInterface $logger): void // pageがあって更新有り if (! $rawPage->page || $this->needUpdate($rawPage->page, $lastModiefied)) { $contents = ($this->extractContents)($rawPage); - $page = ($this->updateOrCreatePage)( + ($this->updateOrCreatePageWithPaks)( $rawPage, $contents['title'], $contents['text'], - $lastModiefied + $lastModiefied, + $contents['paks'] ); - - ($this->syncPak)($page, $contents['paks']); } $this->markExtractFailed->clear($rawPage); diff --git a/app/Actions/Extract/UpdateOrCreatePageWithPaks.php b/app/Actions/Extract/UpdateOrCreatePageWithPaks.php new file mode 100644 index 0000000..68e3e66 --- /dev/null +++ b/app/Actions/Extract/UpdateOrCreatePageWithPaks.php @@ -0,0 +1,38 @@ + $paks + */ + public function __invoke(RawPage $rawPage, string $title, string $text, CarbonImmutable $lastModified, array $paks): Page + { + return DB::transaction(function () use ($rawPage, $title, $text, $lastModified, $paks): Page { + $page = ($this->updateOrCreatePage)($rawPage, $title, $text, $lastModified); + ($this->syncPak)($page, $paks); + + return $page; + }); + } +} diff --git a/app/Actions/Scrape/FetchHtml.php b/app/Actions/Scrape/FetchHtml.php index 8f254a8..0a812fa 100644 --- a/app/Actions/Scrape/FetchHtml.php +++ b/app/Actions/Scrape/FetchHtml.php @@ -25,7 +25,8 @@ public function __invoke(string $url, Encoding $fromEncoding): Crawler /** @var string */ $html = Cache::get($key); } else { - $result = retry($this->retryTimes, fn () => Http::get($url), $this->sleepMilliseconds); + // 非2xx も失敗として扱い、RawPage にエラーページの内容を書き込まないようにする。 + $result = retry($this->retryTimes, fn () => Http::get($url)->throw(), $this->sleepMilliseconds); $html = $fromEncoding === Encoding::UTF_8 ? $result->body() : mb_convert_encoding((string) $result->body(), Encoding::UTF_8->value, $fromEncoding->value); diff --git a/docs/known-risks.md b/docs/known-risks.md index 6561680..2ed5f8d 100644 --- a/docs/known-risks.md +++ b/docs/known-risks.md @@ -12,9 +12,9 @@ | ID | 保護すべき挙動 / Expected Outcome | 制御 | テスト | Status | 是正条件 | 記録日 | |----|----------------------------------|------|--------|--------|----------|--------| -| A1 | 1 サイト/1URL の失敗で他サイト・他 URL の処理が止まらない | Preventive(ハンドラ毎の try/catch) | 1(`Extract/Japan/HandlerIsolationTest`) | 🟡 SPOF | テストを 2 本以上に(scrape 側 / 他サイトにも展開) | 2026-06-22 | -| A2 | HTTP 失敗時に RawPage を空/部分 HTML で上書きしない | Preventive(fetch 例外時は upsert に到達しない) | 1(`Scrape/Japan/HandlerFailureTest`) | 🟡 SPOF | 残課題: 非2xx でも body を書き込む経路がある。`->throw()` 等で非2xx も失敗扱いにするか検討 | 2026-06-22 | -| A3 | 同一 RawPage に extract を再実行しても Page 重複が出ない(冪等) | Preventive(`pages_url_unique` + HasOne) | 1(`Extract/UpdateOrCreatePageTest`) | 🟡 SPOF | テストを 2 本以上に | 2026-06-22 | +| A1 | 1 サイト/1URL の失敗で他サイト・他 URL の処理が止まらない | Preventive(ハンドラ毎の try/catch) | 2(`Extract/Japan/HandlerIsolationTest` + `Scrape/Japan/HandlerIsolationTest`) | 🟢 OK | — | 2026-06-22 | +| A2 | HTTP 失敗時に RawPage を空/部分 HTML で上書きしない | Preventive(`FetchHtml` で `Http::get()->throw()`。非2xx も例外として扱い `retry()` 経由で失敗、upsert に到達しない) | 3(`FetchHtmlTest::test_throws_on_non_2xx...` + `Scrape/Japan/HandlerFailureTest`×2) | 🟢 OK | — | 2026-06-22 | +| A3 | 同一 RawPage に extract を再実行しても Page 重複が出ない(冪等) | Preventive(`pages_url_unique` + HasOne) | 2(`Extract/UpdateOrCreatePageTest`) | 🟢 OK | — | 2026-06-22 | | A4 | 抽出失敗時にスクレイプ済データ(RawPage)を破壊しない | Preventive(`MarkExtractFailed`:削除せず `extract_failed_at` で隔離、成功時にクリア。`extract_failed_at` に index 追加済み) | 4(`MarkExtractFailedTest`×3 + `HandlerIsolationTest`) | 🟢 OK | — | 2026-06-22 | | A5 | 同一 URL の重複行を作らない | Preventive(DB 一意制約) | 2(`Models/UrlUniqueConstraintTest`) | 🟢 OK | — | 2026-06-22 | @@ -22,8 +22,8 @@ | ID | 保護すべき挙動 / Expected Outcome | 制御 | テスト | Status | 是正条件 | 記録日 | |----|----------------------------------|------|--------|--------|----------|--------| -| B1 | 再実行で Notion ページを重複作成しない | Preventive(URL 突合で create/update 分岐) | 1(Strong) | 🟡 SPOF | テストを 2 本以上に増やす | 2026-06-22 | -| B2 | 1 件の Notion API エラーでバッチ全体が止まらない | Preventive(delete/add 両方とも項目単位 try/catch + 継続 + 失敗件数を error ログ。`addNewNotionPages` は `keyNotionPagesByUrl()` で URL 解析失敗も個別隔離。URL が無い Notion ページは削除対象から除外) | 1(`SyncActionTest::test_continues_syncing_when_one_item_fails`) | 🟡 SPOF | テストを 2 本以上に(delete 側の失敗継続・null URL ガードの直接テストを追加) | 2026-06-22 | +| B1 | 再実行で Notion ページを重複作成しない | Preventive(URL 突合で create/update 分岐) | 2(`test_sync_action_creates_updates_and_deletes_pages` + `test_does_not_create_duplicate_when_page_already_exists_in_notion`) | 🟢 OK | — | 2026-06-22 | +| B2 | 1 件の Notion API エラーでバッチ全体が止まらない | Preventive(delete/add 両方とも項目単位 try/catch + 継続 + 失敗件数を error ログ。`addNewNotionPages` は `keyNotionPagesByUrl()` で URL 解析失敗も個別隔離。URL が無い Notion ページは削除対象から除外) | 3(`test_continues_syncing_when_one_item_fails` + `test_continues_deleting_when_one_delete_fails` + `test_does_not_delete_notion_page_without_url`) | 🟢 OK | — | 2026-06-22 | | B3 | 未変更項目を毎回 re-PUSH せず API クォータを浪費しない | None(`synced_at`/状態フラグ無し) | 0 | 🔴 Missing | **記録のみ**。API 呼び出し回数が問題化したら `synced_at` を導入 | 2026-06-22 | | B4 | Notion シークレットを例外/ログ/Discord に流出させない | Preventive(`SecretScrubber` で送出前に伏字化。C3 と一体) | 7(C3 のテストと共有) | 🟢 OK | — | 2026-06-22 | @@ -31,8 +31,8 @@ | ID | 保護すべき挙動 / Expected Outcome | 制御 | テスト | Status | 是正条件 | 記録日 | |----|----------------------------------|------|--------|--------|----------|--------| -| C1 | デバッグページが config/スタックトレースを露出しない | Preventive(`.env.example`/`.ci`/`.deploy` すべて `APP_DEBUG=false`、コード既定も false) | 1(C2 の `ApiErrorResponseTest` で間接担保) | 🟡 SPOF | Web 側デバッグページ向けの直接テストは未追加 | 2026-06-22 | -| C2 | API 例外が生のトレースを返さない | Preventive(`APP_DEBUG=false` で汎用 JSON エラー) | 1(`Http/ApiErrorResponseTest`) | 🟡 SPOF | テストを 2 本以上に | 2026-06-22 | +| C1 | デバッグページが config/スタックトレースを露出しない | Preventive(`.env.example`/`.ci`/`.deploy` すべて `APP_DEBUG=false`、コード既定も false) | 2(`Http/ApiErrorResponseTest` + `Http/WebErrorResponseTest`) | 🟢 OK | — | 2026-06-22 | +| C2 | API 例外が生のトレースを返さない | Preventive(`APP_DEBUG=false` で汎用 JSON エラー) | 2(`Http/ApiErrorResponseTest`×2) | 🟢 OK | — | 2026-06-22 | | C3 | 例外レポート(log/Discord)に機密値を混入させない | Preventive(`SecretScrubber`(伏字化対象は5文字以上の値のみ。"root"等の短い開発用パスワードでの誤爆を回避)+ `RedactSecretsProcessor`/tap + `ConvertDiscord`(親クラスの `addMessageStacktrace` を no-op 化し、未伏字化の生トレースでの上書きも防止) で送出前に伏字化) | 7(`SecretScrubberTest`×4 + `RedactSecretsProcessorTest` + `ConvertDiscordTest`×2) | 🟢 OK | — | 2026-06-22 | | C4 | 実シークレットを git に混入させない | Preventive(`.gitignore` で `.env` 除外、committed な `.env.*` は空値) | N/A | 🟢 OK | — | 2026-06-22 | @@ -40,10 +40,10 @@ | ID | 保護すべき挙動 / Expected Outcome | 制御 | テスト | Status | 是正条件 | 記録日 | |----|----------------------------------|------|--------|--------|----------|--------| -| D1 | raw_pages(生 HTML)を公開経路に露出しない | Preventive(検索/Feed で rawPage を eager-load しない、Page に html 列無し) | 1(`PageResourceTest::test_search_does_not_eager_load_raw_page_relation`) | 🟡 SPOF | Feed 経路の assert も追加 | 2026-06-22 | +| D1 | raw_pages(生 HTML)を公開経路に露出しない | Preventive(検索/Feed で rawPage を eager-load しない、Page に html 列無し) | 2(`PageResourceTest::test_search_does_not_eager_load_raw_page_relation` + `SearchPage/FeedActionTest`) | 🟢 OK | — | 2026-06-22 | | D2 | 非公開コンテンツを検索/Feed に出さない | N/A(Page に公開状態の概念が無い) | — | — | 対象外(将来 status 列を足す場合は再評価) | 2026-06-22 | -| D3 | API/Feed 応答に内部フィールドを含めない | Preventive(PageResource 許可リスト) | 1(`PageResourceTest::test_resource_exposes_only_whitelisted_fields`) | 🟡 SPOF | テストを 2 本以上に | 2026-06-22 | -| D4 | 半分書かれた Page 行を検索に見せない(原子性) | None(`UpdateOrCreatePage` に transaction 無し) | 0 | 🟡 Structural Weakness | (低優先・データ品質)将来 transaction か status 列で是正 | 2026-06-22 | +| D3 | API/Feed 応答に内部フィールドを含めない | Preventive(PageResource 許可リスト + `Page::toFeedItem()`) | 2(`PageResourceTest::test_resource_exposes_only_whitelisted_fields` + `test_feed_item_does_not_expose_internal_fields_or_raw_html`) | 🟢 OK | — | 2026-06-22 | +| D4 | 半分書かれた Page 行を検索に見せない(原子性) | Preventive(`UpdateOrCreatePageWithPaks` が Page 更新 + Pak 同期を `DB::transaction()` で原子化。3 Handler すべてこれを使用) | 2(`Extract/UpdateOrCreatePageWithPaksTest`) | 🟢 OK | — | 2026-06-22 |