From c2572746475405c8d12db1d59bbb7c46dd2eaa98 Mon Sep 17 00:00:00 2001 From: leo1987820 <290468635+leo1987820@users.noreply.github.com> Date: Fri, 26 Jun 2026 14:41:08 +0800 Subject: [PATCH] Add DNS history seen metadata --- dooked/include/cli_preprocessor.hpp | 8 + dooked/include/utils/io_utils.hpp | 15 ++ dooked/include/utils/probe_result.hpp | 3 + dooked/source/cli_preprocessor.cpp | 219 ++++++++++++++++++++++++-- dooked/source/main.cpp | 8 + dooked/source/utils/io_utils.cpp | 5 +- 6 files changed, 245 insertions(+), 13 deletions(-) diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..0b54836 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -2,6 +2,8 @@ #include "dns/dns_resolver.hpp" #include "utils/io_utils.hpp" +#include +#include #include // maximum sockets to open regardless of the number of threads @@ -24,7 +26,10 @@ struct cli_args_t { int post_http_request{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; + std::string last_seen_date{}; bool include_date{false}; + bool first_seen{false}; }; struct runtime_args_t { @@ -36,6 +41,9 @@ struct runtime_args_t { http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; + std::optional last_seen_cutoff{}; + bool first_seen{false}; }; void run_program(cli_args_t const &cli_args); diff --git a/dooked/include/utils/io_utils.hpp b/dooked/include/utils/io_utils.hpp index 829b09e..b42d615 100644 --- a/dooked/include/utils/io_utils.hpp +++ b/dooked/include/utils/io_utils.hpp @@ -26,9 +26,12 @@ void trim(std::string &); struct json_data_t { std::string domain_name{}; std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; int ttl{}; int http_code{}; int content_length{}; + int seen_count{}; dns_record_type_e type{}; static json_data_t serialize(std::string const &d, int const len, @@ -40,6 +43,18 @@ struct json_data_t { dns_str_to_record_type(json_object["type"].get()); data.rdata = json_object["info"].get(); data.ttl = json_object["ttl"].get(); + if (auto iter = json_object.find("first-seen"); + iter != json_object.end() && iter->second.is_string()) { + data.first_seen = iter->second.get(); + } + if (auto iter = json_object.find("last-seen"); + iter != json_object.end() && iter->second.is_string()) { + data.last_seen = iter->second.get(); + } + if (auto iter = json_object.find("seen"); + iter != json_object.end() && iter->second.is_number_integer()) { + data.seen_count = iter->second.get(); + } data.content_length = len; data.http_code = http_code; return data; diff --git a/dooked/include/utils/probe_result.hpp b/dooked/include/utils/probe_result.hpp index 07211c6..924a234 100644 --- a/dooked/include/utils/probe_result.hpp +++ b/dooked/include/utils/probe_result.hpp @@ -12,6 +12,9 @@ struct probe_result_t { std::string rdata{}; dns_record_type_e type{}; // RR TYPE (2 octets) std::uint32_t ttl{}; // time to live(4 octets) + std::string first_seen{}; + std::string last_seen{}; + int seen_count{}; friend bool operator==(probe_result_t const &a, probe_result_t const &b) { return case_insensitive_compare(a.rdata, b.rdata) && (a.type == b.type); diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..9e193f5 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -4,10 +4,14 @@ #include "utils/exceptions.hpp" #include "utils/random_utils.hpp" #include "utils/string_utils.hpp" +#include #include #include +#include +#include #include #include +#include // defined (and assigned to) in main.cpp extern bool silent; @@ -18,6 +22,182 @@ namespace dooked { namespace net = boost::asio; using namespace fmt::v7::literals; +std::string history_date_string(std::time_t const timestamp = std::time(nullptr)) { + std::string output{}; + if (!timet_to_string(output, timestamp, "%m/%d/%Y")) { + return {}; + } + return output; +} + +std::optional parse_history_date(std::string const &date_string) { + if (date_string.empty()) { + return std::nullopt; + } + std::tm tm{}; + std::istringstream input{date_string}; + input >> std::get_time(&tm, "%m/%d/%Y"); + if (input.fail()) { + return std::nullopt; + } + tm.tm_hour = 23; + tm.tm_min = 59; + tm.tm_sec = 59; + return std::mktime(&tm); +} + +std::optional days_before_now(int const days) { + if (days < 0) { + return std::nullopt; + } + return std::time(nullptr) - (static_cast(days) * 24 * 60 * 60); +} + +bool same_dns_record(json_data_t const &previous, probe_result_t const ¤t) { + return previous.type == current.type && + case_insensitive_compare(previous.rdata, current.rdata); +} + +bool same_dns_record(json_data_t const &previous, + std::string const &domain_name, + probe_result_t const ¤t) { + return case_insensitive_compare(previous.domain_name, domain_name) && + same_dns_record(previous, current); +} + +std::string first_seen_or_today(json_data_t const &record, + std::string const &today) { + return record.first_seen.empty() ? today : record.first_seen; +} + +std::string last_seen_or_today(json_data_t const &record, + std::string const &today) { + return record.last_seen.empty() ? today : record.last_seen; +} + +bool should_report_last_seen(json_data_t const &previous_record, + runtime_args_t const &rt_args) { + if (rt_args.last_seen_days < 0 && !rt_args.last_seen_cutoff) { + return false; + } + if (previous_record.last_seen.empty()) { + return true; + } + + auto const last_seen = parse_history_date(previous_record.last_seen); + if (!last_seen) { + return true; + } + + if (rt_args.last_seen_cutoff && *last_seen <= *rt_args.last_seen_cutoff) { + return true; + } + if (auto const days_cutoff = days_before_now(rt_args.last_seen_days); + days_cutoff && *last_seen <= *days_cutoff) { + return true; + } + return false; +} + +void report_history_changes( + std::vector::const_iterator previous_begin, + std::vector::const_iterator previous_end, + std::string const &domain_name, + std::vector const ¤t_records, + runtime_args_t const &rt_args) { + if (rt_args.first_seen) { + auto const today = history_date_string(); + for (auto const ¤t_record : current_records) { + auto const previous_iter = + std::find_if(previous_begin, previous_end, [&](auto const &previous) { + return same_dns_record(previous, current_record); + }); + if (previous_iter == previous_end) { + spdlog::info("[FIRST-SEEN][{}][{}] `{}` first seen {}", domain_name, + dns_record_type_to_str(current_record.type), + current_record.rdata, today); + } + } + } + + for (auto iter = previous_begin; iter != previous_end; ++iter) { + auto const current_iter = + std::find_if(current_records.cbegin(), current_records.cend(), + [&](auto const ¤t) { + return same_dns_record(*iter, current); + }); + if (current_iter == current_records.cend() && + should_report_last_seen(*iter, rt_args)) { + spdlog::info("[LAST-SEEN][{}][{}] `{}` last seen {}", domain_name, + dns_record_type_to_str(iter->type), iter->rdata, + iter->last_seen.empty() ? "unknown" : iter->last_seen); + } + } +} + +void apply_history_metadata(std::optional> const &previous, + map_container_t &result_map, + std::string const &today) { + auto ¤t_data = result_map.result(); + for (auto &[domain_name, domain_result] : current_data) { + for (auto &record : domain_result.dns_result_list_) { + if (previous) { + auto const previous_iter = + std::find_if(previous->cbegin(), previous->cend(), + [&](auto const &previous_record) { + return same_dns_record(previous_record, domain_name, + record); + }); + if (previous_iter != previous->cend()) { + record.first_seen = first_seen_or_today(*previous_iter, today); + record.last_seen = today; + record.seen_count = + previous_iter->seen_count > 0 ? previous_iter->seen_count + 1 : 2; + continue; + } + } + + record.first_seen = today; + record.last_seen = today; + record.seen_count = 1; + } + } + + if (!previous) { + return; + } + + for (auto const &previous_record : *previous) { + auto const current_domain_iter = current_data.find(previous_record.domain_name); + auto const found_current = + current_domain_iter != current_data.end() && + std::find_if(current_domain_iter->second.dns_result_list_.cbegin(), + current_domain_iter->second.dns_result_list_.cend(), + [&](auto const ¤t_record) { + return same_dns_record(previous_record, current_record); + }) != current_domain_iter->second.dns_result_list_.cend(); + + if (found_current) { + continue; + } + + if (current_domain_iter == current_data.end()) { + result_map.insert(previous_record.domain_name, + previous_record.content_length, + previous_record.http_code); + } + probe_result_t retained_record{}; + retained_record.rdata = previous_record.rdata; + retained_record.type = previous_record.type; + retained_record.ttl = static_cast(previous_record.ttl); + retained_record.first_seen = first_seen_or_today(previous_record, today); + retained_record.last_seen = last_seen_or_today(previous_record, today); + retained_record.seen_count = + previous_record.seen_count > 0 ? previous_record.seen_count : 1; + result_map.append(previous_record.domain_name, retained_record); + } +} + void compare_http_result(int const base_cl, json_data_t const &prev_http_result, http_response_t const ¤t_result) { auto const current_req_cl = current_result.content_length_; @@ -54,7 +234,7 @@ void compare_http_result(int const base_cl, json_data_t const &prev_http_result, std::vector::const_iterator iter, std::vector::const_iterator end_iter, http_dns_response_t const ¤t_domain_info, - int const base_content_length, + runtime_args_t const &rt_args, jd_domain_comparator_t const &domain_comparator) { auto const last_elem_iter = @@ -63,6 +243,8 @@ void compare_http_result(int const base_cl, json_data_t const &prev_http_result, (std::size_t)std::distance(iter, last_elem_iter); auto const ¤t_domain_info_list = current_domain_info.dns_result_list_; auto const current_total_elem = current_domain_info_list.size(); + report_history_changes(iter, last_elem_iter, iter->domain_name, + current_domain_info_list, rt_args); // something is missing if (current_total_elem < previous_total_elem) { @@ -133,14 +315,14 @@ void compare_http_result(int const base_cl, json_data_t const &prev_http_result, } } } - compare_http_result(base_content_length, *iter, + compare_http_result(rt_args.content_length, *iter, current_domain_info.http_result_); return last_elem_iter; } void compare_results(std::vector const &previous_result, map_container_t const ¤t_result, - int const content_length) { + runtime_args_t const &rt_args) { if (!silent) { spdlog::info("Trying to compare old with new result"); } @@ -160,12 +342,15 @@ void compare_results(std::vector const &previous_result, if (current_find_iter == current_data_map.end()) { spdlog::error("{} not found in new result", iter->domain_name); // find the next domain name following this current domain - iter = std::upper_bound(iter, end_iter, *iter, domain_comparator); + auto const next_iter = + std::upper_bound(iter, end_iter, *iter, domain_comparator); + report_history_changes(iter, next_iter, iter->domain_name, {}, rt_args); + iter = next_iter; continue; } auto const ¤t_domain_info = current_find_iter->second; auto next_iter = compare_dns_result(iter, end_iter, current_domain_info, - content_length, domain_comparator); + rt_args, domain_comparator); iter = next_iter; } } @@ -350,11 +535,6 @@ void start_name_checking(runtime_args_t &&rt_args) { } thread_pool->join(); } - if (!silent) { - spdlog::info("Writing JSON output"); - } - write_json_result(result_map, rt_args); - // compare old with new result -- only if we had previous record if (rt_args.previous_data) { auto &previous_data = *rt_args.previous_data; @@ -373,9 +553,15 @@ void start_name_checking(runtime_args_t &&rt_args) { return std::tie(a.type, a.rdata) < std::tie(b.type, b.rdata); }); } - return compare_results(*rt_args.previous_data, result_map, - rt_args.content_length); + compare_results(*rt_args.previous_data, result_map, rt_args); + } + + apply_history_metadata(rt_args.previous_data, result_map, + history_date_string()); + if (!silent) { + spdlog::info("Writing JSON output"); } + write_json_result(result_map, rt_args); } void run_program(cli_args_t const &cli_args) { @@ -477,6 +663,15 @@ void run_program(cli_args_t const &cli_args) { static_cast(cli_args.post_http_request); rt_args.thread_count = cli_args.thread_count; rt_args.content_length = cli_args.content_length; + rt_args.first_seen = cli_args.first_seen; + rt_args.last_seen_days = cli_args.last_seen_days; + if (!cli_args.last_seen_date.empty()) { + auto const parsed_date = parse_history_date(cli_args.last_seen_date); + if (!parsed_date) { + return spdlog::error("invalid --lsd date: expected MM/DD/YYYY"); + } + rt_args.last_seen_cutoff = parsed_date; + } return start_name_checking(std::move(rt_args)); } diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..c73c07c 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -36,6 +36,14 @@ int main(int argc, char **argv) { "show content lengths that changed more than --content-length"); app.add_flag("-d,--include-date", cli_args.include_date, "append present datetime(-ddMMyyyy_hhmmss) in output name"); + app.add_flag("--fs", cli_args.first_seen, + "show DNS records that are being seen for the first time"); + app.add_option("--ls", cli_args.last_seen_days, + "show DNS records missing from this run after N days since " + "they were last seen"); + app.add_option("--lsd", cli_args.last_seen_date, + "show DNS records missing from this run and last seen on or " + "before the provided US date (MM/DD/YYYY)"); app.add_flag( "--defer", cli_args.post_http_request, "defers http request until after all DNS requests have been completed"); diff --git a/dooked/source/utils/io_utils.cpp b/dooked/source/utils/io_utils.cpp index a1bd5d3..afd48da 100644 --- a/dooked/source/utils/io_utils.cpp +++ b/dooked/source/utils/io_utils.cpp @@ -5,7 +5,10 @@ namespace dooked { void to_json(json &j, probe_result_t const &record) { j = json{{"ttl", record.ttl}, {"type", dns_record_type_to_str(record.type)}, - {"info", record.rdata}}; + {"info", record.rdata}, + {"first-seen", record.first_seen}, + {"last-seen", record.last_seen}, + {"seen", record.seen_count}}; } bool is_text_file(std::string const &file_extension) {