diff --git a/tools/domain-second-pass/industry_rules.yaml b/tools/domain-second-pass/industry_rules.yaml new file mode 100644 index 0000000..52ccd84 --- /dev/null +++ b/tools/domain-second-pass/industry_rules.yaml @@ -0,0 +1,456 @@ +industries: + - id: agriculture_food_upstream + name: 农林牧渔与食品上游 + definition: 种植、畜牧、林业、渔业、农业投入品、农机服务和精准农业等上游生产与服务活动。 + priority: 12 + domain_patterns: [] + keywords: [agriculture, farming, farm, crop, grain, seed, forestry, fishery, aquaculture, livestock, dairy, fertilizer, agricultural machinery, precision agriculture, 农业, 农场, 种植, 粮食, 种子, 林业, 渔业, 水产, 畜牧, 养殖, 奶业, 饲料, 农资, 植保, 农机, 精准农业] + negative_keywords: [restaurant, food delivery, 餐厅, 外卖] + + - id: mining_oil_gas + name: 矿产与油气上游 + definition: 煤炭、石油天然气、金属矿、非金属矿、关键矿产以及油服矿服。 + priority: 16 + domain_patterns: [] + keywords: [mining, coal, oil and gas, petroleum, natural gas, upstream energy, drilling, oilfield, mineral, metals mining, lithium mine, rare earth, 煤炭, 煤矿, 石油, 天然气, 油气, 油服, 钻井, 采矿, 矿业, 金属矿, 非金属矿, 关键矿产, 锂矿, 稀土] + negative_keywords: [cloud mining, crypto mining, blockchain, 云挖矿, 区块链] + + - id: energy_power_generation + name: 电力与能源生产 + definition: 火电、水电、核电、风电、光伏、生物质、地热、储能、虚拟电厂和氢能。 + priority: 16 + domain_patterns: [] + keywords: [power generation, renewable energy, solar power, photovoltaic, wind power, hydropower, nuclear power, thermal power, biomass energy, geothermal, energy storage, battery storage, virtual power plant, hydrogen energy, 电力, 发电, 火电, 水电, 核电, 风电, 光伏, 太阳能, 可再生能源, 生物质, 地热, 储能, 虚拟电厂, 氢能, 电解制氢] + negative_keywords: [power adapter, mobile power, charger, 充电宝, 充电器] + + - id: utilities_network + name: 公用事业网络 + definition: 电网、配售电、燃气输配、供水、再生水、供热、蒸汽和污水处理等公共网络服务。 + priority: 18 + domain_patterns: [] + keywords: [utility, utilities, power grid, electricity grid, water supply, gas utility, wastewater, sewage, district heating, steam supply, 电网, 配售电, 供电, 水务, 供水, 再生水, 燃气, 天然气输配, 供热, 蒸汽, 污水处理, 公用事业] + negative_keywords: [] + + - id: environment_circular_economy + name: 环境与循环经济 + definition: 固废、危废、医废、资源回收、环境监测、污染治理、土壤修复、碳管理和节能服务。 + priority: 17 + domain_patterns: [] + keywords: [environmental, recycling, waste management, hazardous waste, medical waste, remediation, pollution control, environmental monitoring, carbon management, CCUS, sustainability, circular economy, ESG, 环保, 环境治理, 固废, 危废, 医废, 垃圾处理, 回收, 资源回收, 土壤修复, 环境监测, 污染治理, 碳管理, 碳捕集, 节能服务, 循环经济] + negative_keywords: [] + + - id: materials_chemicals + name: 基础材料与化工 + definition: 钢铁、有色、合金、石化、基础化学品、新材料、复合材料、电池材料和电子材料。 + priority: 12 + domain_patterns: [] + keywords: [steel, nonferrous, alloy, chemical, chemicals, petrochemical, materials, composite materials, advanced materials, battery materials, electronic materials, resin, polymer, 钢铁, 有色金属, 合金, 化工, 化学品, 石化, 新材料, 复合材料, 电池材料, 电子材料, 树脂, 高分子] + negative_keywords: [beauty, cosmetics, skincare, 美妆, 护肤] + + - id: food_beverage_manufacturing + name: 食品饮料与农副加工 + definition: 食品加工、饮料、酒类、乳制品、冷链食品和营养保健食品等生产制造。 + priority: 13 + domain_patterns: [] + keywords: [food manufacturing, food processing, beverage, drinks, winery, brewery, dairy products, frozen food, snack, bakery, nutrition food, supplement, 食品加工, 食品制造, 饮料, 酒类, 乳制品, 冷链食品, 冷冻食品, 零食, 烘焙, 营养食品, 保健食品] + negative_keywords: [restaurant, reservation, food delivery, menu, 餐厅, 菜单, 外卖, 预订] + + - id: textile_apparel_consumer_goods_manufacturing + name: 纺织服装与消费品制造 + definition: 纺织面料、服装鞋帽、箱包、家纺、家居用品、美妆个护和日化等消费品制造。 + priority: 11 + domain_patterns: [] + keywords: [textile, fabric, yarn, apparel manufacturing, garment factory, footwear manufacturing, luggage, home textile, personal care manufacturing, cosmetics manufacturing, daily chemical, 纺织, 面料, 纱线, 服装制造, 鞋帽, 箱包, 家纺, 家居用品制造, 美妆制造, 个护, 日化] + negative_keywords: [online store, shopping, ecommerce, 购物, 电商, 商城] + + - id: industrial_equipment_automation + name: 装备与通用机械 + definition: 工程机械、重型装备、机床、自动化设备、物流包装设备、专用设备、仪器仪表和检测设备。 + priority: 14 + domain_patterns: [] + keywords: [machinery, industrial equipment, heavy equipment, construction machinery, machine tool, CNC, automation equipment, packaging equipment, instrumentation, testing equipment, 工程机械, 重型装备, 机械设备, 机床, 数控, 自动化设备, 包装设备, 专用设备, 仪器仪表, 检测设备] + negative_keywords: [robotics, semiconductor, 半导体, 机器人] + + - id: robotics_industrial_automation + name: 机器人与工业自动化 + definition: 工业机器人、协作机器人、移动机器人、机器视觉、控制系统、伺服系统、系统集成和智能工厂。 + priority: 20 + domain_patterns: [] + keywords: [industrial robot, collaborative robot, cobot, mobile robot, AMR, AGV, machine vision, servo, motion control, smart factory, factory automation, 机器人, 工业机器人, 协作机器人, 移动机器人, 机器视觉, 伺服, 运动控制, 系统集成, 智能工厂, 工业自动化] + negative_keywords: [game, toy, 游戏, 玩具] + + - id: aerospace_ship_rail_equipment + name: 航空航天船舶轨交装备 + definition: 航空器、发动机、卫星、航天器、地面设备、船舶、海工装备和轨道交通装备。 + priority: 18 + domain_patterns: [] + keywords: [aerospace, aircraft, aviation, aircraft engine, satellite, spacecraft, ground equipment, shipbuilding, marine engineering, rail transit equipment, rolling stock, 航空, 航天, 航空器, 飞机, 发动机, 卫星, 航天器, 船舶, 海工装备, 轨道交通, 轨交装备] + negative_keywords: [travel booking, airline ticket, 机票, 旅游] + + - id: semiconductor_design + name: 半导体设计 + definition: CPU、GPU、AI 加速器、SoC、模拟芯片、电源芯片、MCU、通信射频、车规芯片、存储、FPGA 等芯片设计。 + priority: 22 + domain_patterns: [] + keywords: [semiconductor design, chip design, fabless, CPU, GPU, AI accelerator, SoC, MCU, analog chip, power management IC, RF chip, automotive chip, memory chip, FPGA, 半导体设计, 芯片设计, 芯片, 处理器, AI加速器, 模拟芯片, 电源芯片, 射频芯片, 车规芯片, 存储芯片] + negative_keywords: [potato chips, chocolate chip, 薯片] + + - id: semiconductor_manufacturing_packaging + name: 半导体制造与封测 + definition: 晶圆制造、Foundry、IDM、存储制造、封装测试、先进封装和第三代半导体制造。 + priority: 23 + domain_patterns: [] + keywords: [semiconductor manufacturing, wafer fab, foundry, IDM, wafer manufacturing, packaging and testing, advanced packaging, compound semiconductor, silicon carbide, gallium nitride, 晶圆制造, 晶圆厂, 半导体制造, 代工厂, 封装测试, 先进封装, 化合物半导体, 第三代半导体, 碳化硅, 氮化镓] + negative_keywords: [] + + - id: semiconductor_equipment_materials + name: 半导体设备与材料 + definition: 前道设备、量测测试、后道设备、硅片、靶材、光刻胶、特气、封装材料、基板和载板。 + priority: 23 + domain_patterns: [] + keywords: [semiconductor equipment, lithography, etching equipment, deposition equipment, metrology, wafer inspection, photoresist, silicon wafer, target material, specialty gas, packaging substrate, 半导体设备, 光刻, 刻蚀设备, 薄膜沉积, 量测, 测试设备, 硅片, 靶材, 光刻胶, 特气, 封装材料, 基板, 载板] + negative_keywords: [] + + - id: electronic_components_modules + name: 电子元器件与模组 + definition: PCB、连接器、被动元件、显示光电器件、传感器、MEMS、电源电池、BMS、通信模组、物联网模组和摄像模组。 + priority: 17 + domain_patterns: [] + keywords: [electronic components, PCB, connector, passive components, capacitor, resistor, display module, optical device, sensor, MEMS, power module, battery management system, communication module, camera module, 电子元器件, PCB, 连接器, 被动元件, 电容, 电阻, 显示模组, 光电器件, 传感器, MEMS, 电源模块, BMS, 通信模组, 物联网模组, 摄像模组] + negative_keywords: [] + + - id: smart_hardware_terminal + name: 智能终端与专用硬件 + definition: 手机、PC、服务器、边缘设备、可穿戴、AR/VR/XR 终端、智能家居、消费电子、医疗工业和车载电子。 + priority: 15 + domain_patterns: [apple.com, samsung.com, mi.com] + keywords: [smart hardware, smartphone, personal computer, server hardware, edge device, wearable device, virtual reality headset, augmented reality headset, smart home, consumer electronics, vehicle electronics, medical electronics, 智能硬件, 手机, 电脑, 服务器硬件, 边缘设备, 可穿戴, AR/VR, XR终端, 智能家居, 消费电子, 车载电子, 医疗电子, 工业电子] + negative_keywords: [software, SaaS, 软件] + + - id: cloud_datacenter_cdn + name: 云与数据中心 + definition: IaaS、PaaS、容器、Kubernetes、Serverless、IDC、主机托管、云主机、CDN、DNS 和全球加速。 + priority: 23 + domain_patterns: [aws.amazon.com, azure.microsoft.com, cloud.google.com, aliyun.com, cloud.tencent.com, cloudflare.com, cloudflare.net, digitalocean.com, linode.com, akamai.com, fastly.com] + keywords: [cloud computing, cloud infrastructure, cloud hosting, cloud server, IaaS, PaaS, Kubernetes, K8s, container platform, serverless, data center, colocation, dedicated server, CDN, DNS, global acceleration, 云计算, 云服务, 云主机, 云服务器, 容器平台, 数据中心, IDC, 机柜托管, 主机托管, CDN, 域名解析, 全球加速] + negative_keywords: [cloud gaming, 云游戏, weather, 天气] + + - id: edge_distributed_infra + name: 边缘计算与分布式基础设施 + definition: MEC、边缘云、边缘 AI 推理、边缘管理、工业边缘、车路边缘、分布式算力和边缘存储。 + priority: 22 + domain_patterns: [] + keywords: [edge computing, edge cloud, MEC, multi-access edge, edge AI, edge inference, edge management, industrial edge, roadside edge, distributed computing, edge storage, 边缘计算, 边缘云, MEC, 边缘AI, 边缘推理, 边缘管理, 工业边缘, 车路边缘, 分布式算力, 边缘存储] + negative_keywords: [edge browser, Microsoft Edge, 浏览器] + + - id: telecommunications_networks + name: 通信网络与运营 + definition: 固网宽带、骨干网、移动通信、5G/6G、专网、卫星通信、低轨互联网、物联网连接和 eSIM。 + priority: 20 + domain_patterns: [att.com, verizon.com, t-mobile.com, china-mobile.com, chinatelecom.com.cn, chinaunicom.com] + keywords: [telecom, telecommunications, broadband, fiber network, backbone network, mobile network, 5G, 6G, private network, satellite communication, low earth orbit internet, IoT connectivity, eSIM, carrier, ISP, 通信, 电信, 宽带, 光纤, 骨干网, 移动通信, 5G, 6G, 专网, 卫星通信, 低轨互联网, 物联网连接, eSIM, 运营商] + negative_keywords: [social network, 社交网络] + + - id: cybersecurity_trust + name: 网络安全与信任基础设施 + definition: 安全产品、安全服务、密码、PKI、零信任、可信执行、数据安全、隐私工程和合规。 + priority: 24 + domain_patterns: [paloaltonetworks.com, crowdstrike.com, okta.com, zscaler.com, fortinet.com, kaspersky.com] + keywords: [cybersecurity, information security, network security, endpoint security, zero trust, PKI, encryption, cryptography, trusted execution, data security, privacy engineering, security compliance, SOC, SIEM, 网络安全, 信息安全, 安全产品, 安全服务, 零信任, 密码, 加密, PKI, 可信执行, 数据安全, 隐私工程, 安全合规, 态势感知] + negative_keywords: [security camera, home security, 保安, 安防摄像头] + + - id: blockchain_web3_infra + name: 区块链与 Web3 基础设施 + definition: 公链、联盟链、分布式账本、节点服务、开发框架、钱包基础设施、智能合约、预言机、跨链、链上安全和合规模块。 + priority: 22 + domain_patterns: [ethereum.org, polygon.technology, chain.link, alchemy.com, infura.io] + keywords: [blockchain infrastructure, distributed ledger, public blockchain, consortium blockchain, web3 infrastructure, node service, smart contract, oracle network, cross-chain, on-chain security, wallet infrastructure, 区块链基础设施, 分布式账本, 公链, 联盟链, Web3基础设施, 节点服务, 智能合约, 预言机, 跨链, 链上安全, 钱包基础设施] + negative_keywords: [crypto exchange, trading, casino, 投注, 交易所] + + - id: enterprise_software_devtools + name: 企业软件与开发工具 + definition: ERP、CRM、HR、协同办公、数据库、中间件、操作系统、开发者工具、测试、低代码、开源商业化、API 集成和 RPA。 + priority: 21 + domain_patterns: [github.com, gitlab.com, npmjs.com, pypi.org, docker.com, stackoverflow.com, salesforce.com, atlassian.com, jetbrains.com, vercel.com] + keywords: [enterprise software, SaaS, ERP, CRM, HR software, collaboration software, database, middleware, operating system, developer tools, API platform, SDK, source code, repository, low-code, test automation, RPA, open source, 企业软件, 协同办公, 数据库, 中间件, 操作系统, 开发者工具, API平台, SDK, 源代码, 代码仓库, 低代码, 测试自动化, 开源商业化, RPA] + negative_keywords: [bank, insurance, casino, adult, 银行, 保险, 博彩, 成人] + + - id: data_ai_foundation + name: 数据智能与 AI 基础层 + definition: 数据采集、ETL、湖仓、数据治理、MLOps、训练平台、向量数据库、基础模型、多模态模型、Agent 平台、AI 安全评测和治理。 + priority: 23 + domain_patterns: [openai.com, anthropic.com, huggingface.co, databricks.com, snowflake.com] + keywords: [data governance, data warehouse, data lakehouse, ETL, MLOps, machine learning platform, vector database, foundation model, large language model, LLM, multimodal model, agent platform, artificial intelligence, AI safety, model evaluation, 数据治理, 数据仓库, 数据湖仓, MLOps, 机器学习平台, 向量数据库, 基础模型, 大模型, 多模态模型, Agent平台, 人工智能, AI安全, 模型评测] + negative_keywords: [hospital, banking, insurance, 医院, 银行, 保险] + + - id: industrial_software_iot + name: 行业软件、工业软件与物联网平台 + definition: CAD、CAE、CAM、PLM、EDA、工业互联网、MES、SCADA、医疗金融政务行业软件、数字孪生、仿真、空间计算、IoT 平台和智慧场景平台。 + priority: 22 + domain_patterns: [] + keywords: [industrial software, CAD, CAE, CAM, PLM, EDA, industrial internet, MES, SCADA, digital twin, simulation platform, spatial computing, IoT platform, smart city platform, smart campus, smart building, 工业软件, 工业互联网, 数字孪生, 仿真平台, 空间计算, 物联网平台, 智慧城市, 智慧园区, 智慧楼宇, MES, SCADA, CAD, CAE, CAM, PLM, EDA] + negative_keywords: [ecommerce, shopping, 购物, 电商] + + - id: vertical_ai_applications + name: 智能应用与垂直 AI + definition: 智能客服、营销 AI、办公 AI、智能驾驶软件、车路协同算法、视觉语音、机器人智能和行业垂直 AI 应用。 + priority: 22 + domain_patterns: [] + keywords: [AI application, AI assistant, intelligent customer service, conversational AI, marketing AI, office AI, autonomous driving software, vehicle-road collaboration, computer vision, speech recognition, robot intelligence, vertical AI, 智能客服, 对话式AI, 营销AI, 办公AI, 智能驾驶软件, 车路协同算法, 计算机视觉, 语音识别, 机器人智能, 垂直AI] + negative_keywords: [chip, semiconductor, 医疗器械, 芯片, 半导体] + + - id: immersive_metaverse_xr + name: 元宇宙与沉浸式应用 + definition: AR、VR、XR、沉浸式内容、虚拟人、虚拟空间、数字孪生展示、空间计算应用和互动体验。 + priority: 18 + domain_patterns: [] + keywords: [metaverse, virtual reality, augmented reality, mixed reality, XR application, immersive experience, virtual human, virtual space, digital avatar, spatial computing application, 元宇宙, 虚拟现实, 增强现实, 混合现实, XR应用, 沉浸式体验, 虚拟人, 虚拟空间, 数字人, 空间计算应用] + negative_keywords: [game console, 游戏主机] + + - id: automotive_parts + name: 汽车整车与零部件 + definition: 汽车整车、新能源汽车、燃油车、零部件、车身底盘、动力系统、充电补能、经销售后和车辆服务。 + priority: 16 + domain_patterns: [tesla.com, toyota.com, volkswagen.com, byd.com] + keywords: [automotive, automobile, vehicle, car manufacturer, electric vehicle, EV, auto parts, powertrain, chassis, dealership, vehicle service, charging station, 汽车, 整车, 新能源汽车, 电动车, 燃油车, 零部件, 车身, 底盘, 动力系统, 经销商, 4S店, 充电桩, 补能] + negative_keywords: [rental, ride hailing, logistics, 租车, 出行平台, 物流] + + - id: autonomous_mobility + name: 自动驾驶与出行科技 + definition: 自动驾驶系统、ADAS、车路协同、Robotaxi、智能座舱、车联网、共享出行和出行平台。 + priority: 22 + domain_patterns: [waymo.com, cruise.com] + keywords: [autonomous driving, self-driving, ADAS, robotaxi, mobility technology, vehicle-road collaboration, connected vehicle, intelligent cockpit, ride hailing, shared mobility, 自动驾驶, 无人驾驶, ADAS, Robotaxi, 出行科技, 车路协同, 车联网, 智能座舱, 网约车, 共享出行] + negative_keywords: [game, simulator, 游戏] + + - id: transportation_logistics + name: 交通运输与物流供应链 + definition: 航空、铁路、公路、水运、港口、仓储、快递、货运、冷链、供应链管理和跨境物流。 + priority: 16 + domain_patterns: [fedex.com, ups.com, dhl.com, maersk.com] + keywords: [transportation, logistics, shipping, freight, cargo, warehouse, warehousing, courier, delivery, supply chain, cold chain, port, airline cargo, rail freight, cross-border logistics, 交通运输, 物流, 仓储, 快递, 货运, 供应链, 冷链, 港口, 航空货运, 铁路货运, 跨境物流, 配送] + negative_keywords: [food delivery, 外卖] + + - id: construction_infrastructure + name: 建筑工程与基础设施 + definition: 建筑工程、施工承包、装修装饰、工程设计、市政工程、基础设施投资建设、建材和建筑部品。 + priority: 13 + domain_patterns: [] + keywords: [construction, contractor, infrastructure, civil engineering, architecture, building materials, cement, concrete, renovation, municipal engineering, EPC, 建筑, 施工, 工程承包, 基建, 基础设施, 工程设计, 建材, 水泥, 混凝土, 装修, 装饰, 市政工程] + negative_keywords: [software architecture, cloud infrastructure, 软件架构, 云基础设施] + + - id: real_estate_operations + name: 房地产开发与运营 + definition: 房地产开发、销售、租赁、物业管理、商业地产、产业园区、公寓、住房服务和不动产平台。 + priority: 14 + domain_patterns: [zillow.com, realtor.com, fang.com, lianjia.com] + keywords: [real estate, property, apartment, housing, realtor, property management, commercial real estate, rental housing, mortgage, office leasing, 房地产, 房产, 楼盘, 住宅, 公寓, 物业, 商业地产, 产业园区, 租赁住房, 租房, 买房, 按揭] + negative_keywords: [intellectual property, 知识产权] + + - id: wholesale_distribution + name: 批发与分销 + definition: 批发贸易、品牌代理、B2B 分销、工业品流通、采购寻源、供应商服务和渠道流通。 + priority: 10 + domain_patterns: [alibaba.com, globalsources.com] + keywords: [wholesale, distributor, distribution, supplier, sourcing, procurement, B2B marketplace, trade platform, industrial supplies, 批发, 分销, 经销, 代理商, 供应商, 采购, 寻源, B2B平台, 工业品, 货源] + negative_keywords: [retail, consumer, 零售, 消费者] + + - id: retail_ecommerce_marketplace + name: 零售、电商与交易平台 + definition: 面向消费者的零售、商城、电商平台、交易市场、到家零售、购物车、下单、支付结算和商品销售。 + priority: 15 + domain_patterns: [amazon.com, ebay.com, shopify.com, taobao.com, tmall.com, jd.com, walmart.com, etsy.com] + keywords: [retail, ecommerce, e-commerce, online store, shopping, marketplace, shopping cart, checkout, product catalog, buy now, order online, 零售, 电商, 商城, 网上商城, 购物, 交易平台, 商品目录, 加入购物车, 下单, 立即购买, 到家零售] + negative_keywords: [developer, API, SDK, documentation, 开发者, 接口, 文档] + + - id: consumer_brands_home_fashion + name: 消费品牌、家居与时尚 + definition: 服装鞋履、箱包珠宝、美妆护肤、个护、家具、家装、家电、园艺、宠物和家庭消费品牌。 + priority: 12 + domain_patterns: [nike.com, adidas.com, sephora.com, ikea.com, homedepot.com] + keywords: [consumer brand, fashion, apparel, clothing, footwear, beauty, cosmetics, skincare, jewelry, furniture, home decor, home appliance, garden, gardening, pet supplies, 时尚, 服装, 鞋履, 箱包, 珠宝, 美妆, 护肤, 个护, 家具, 家装, 家电, 家居, 园艺, 宠物用品, 消费品牌] + negative_keywords: [manufacturing, factory, 制造, 工厂] + + - id: banking_payment + name: 银行与支付 + definition: 银行、银行卡、账户、贷款、信用、支付网关、电子钱包、收单、清结算、跨境支付和开放银行。 + priority: 23 + domain_patterns: [paypal.com, visa.com, mastercard.com, stripe.com, unionpayintl.com, alipay.com] + keywords: [bank, banking, payment, payments, payment gateway, wallet, credit card, loan, lending, remittance, cross-border payment, open banking, merchant acquiring, 银行, 支付, 支付网关, 钱包, 信用卡, 贷款, 信贷, 汇款, 跨境支付, 开放银行, 收单, 清结算] + negative_keywords: [shopping cart, retail, ecommerce, 购物车, 零售, 电商] + + - id: securities_asset_management + name: 证券资管与市场基础设施 + definition: 证券、基金、资管、券商、交易所、行情数据、清算托管、财富管理、投顾和资本市场服务。 + priority: 22 + domain_patterns: [nasdaq.com, nyse.com, sec.gov] + keywords: [securities, brokerage, stock exchange, asset management, wealth management, fund management, mutual fund, ETF, trading platform, market data, clearing house, custodian, investment advisory, 证券, 券商, 股票交易所, 资管, 财富管理, 基金, ETF, 交易平台, 行情数据, 清算, 托管, 投顾, 资本市场] + negative_keywords: [game trading, ecommerce, 游戏交易, 电商] + + - id: insurance_insurtech + name: 保险与保险科技 + definition: 寿险、财险、健康险、车险、再保险、经纪代理、保单、理赔、风险保障和保险科技。 + priority: 24 + domain_patterns: [] + keywords: [insurance, insurer, insurtech, policy, claim, claims, coverage, life insurance, health insurance, property insurance, auto insurance, reinsurance, insurance broker, 保险, 保险科技, 保单, 理赔, 保障, 寿险, 财险, 健康险, 车险, 再保险, 保险经纪, 保险代理] + negative_keywords: [] + + - id: fintech_digital_assets + name: 金融科技与数字资产应用 + definition: 金融科技、风控科技、监管科技、数字银行、DeFi、加密资产交易、数字资产托管、代币化应用和合规金融创新。 + priority: 23 + domain_patterns: [coinbase.com, binance.com, kraken.com, chainalysis.com] + keywords: [fintech, digital banking, regtech, risk technology, DeFi, crypto exchange, cryptocurrency exchange, digital asset, tokenization, digital asset custody, blockchain payment, 金融科技, 数字银行, 监管科技, 风控科技, DeFi, 加密资产交易, 数字资产, 数字资产托管, 代币化, 区块链支付] + negative_keywords: [blockchain infrastructure, smart contract developer, 区块链基础设施] + + - id: healthcare_services + name: 医疗服务 + definition: 医院、诊所、专科医疗、基层医疗、互联网医疗、远程医疗、医院管理和医疗服务平台。 + priority: 22 + domain_patterns: [] + keywords: [healthcare, hospital, clinic, doctor, physician, patient care, medical service, telemedicine, telehealth, online medical consultation, hospital management, 医疗服务, 医院, 诊所, 专科医疗, 基层医疗, 医生, 患者, 互联网医疗, 远程医疗, 问诊, 医院管理] + negative_keywords: [pharmaceutical, medical device, 制药, 医疗器械] + + - id: medical_devices_digital_health + name: 医疗器械与数字医疗 + definition: 影像诊断、IVD、手术监护、植介入、数字疗法、医疗可穿戴、家庭医疗、医疗信息化、EHR 和临床 AI。 + priority: 24 + domain_patterns: [] + keywords: [medical device, medical devices, diagnostic imaging, IVD, in vitro diagnostics, surgical device, patient monitor, implant, interventional device, digital therapeutics, medical wearable, home medical device, EHR, clinical AI, 医疗器械, 影像诊断, 体外诊断, IVD, 手术器械, 监护设备, 植介入, 数字疗法, 医疗可穿戴, 家庭医疗, 医疗信息化, 电子病历, 临床AI] + negative_keywords: [hospital appointment, 挂号] + + - id: pharma_biotech + name: 制药与生物科技 + definition: 化学药、中成药、仿制药、生物药、抗体、疫苗、细胞基因治疗、CRO、CDMO、临床服务、合成生物和科研试剂。 + priority: 24 + domain_patterns: [] + keywords: [pharmaceutical, pharma, biotech, biotechnology, drug discovery, medicine, generic drug, biologics, antibody, vaccine, cell therapy, gene therapy, CRO, CDMO, clinical trial, synthetic biology, reagent, 制药, 医药, 生物科技, 药物研发, 化学药, 中成药, 仿制药, 生物药, 抗体, 疫苗, 细胞治疗, 基因治疗, 临床试验, 合成生物, 科研试剂] + negative_keywords: [hospital, clinic, 医院, 诊所] + + - id: life_science_health_management + name: 生命科学服务与健康管理 + definition: 基因测序、精准医学、医药流通、医药电商、健康管理、慢病管理、健康数据服务和科研支持。 + priority: 20 + domain_patterns: [] + keywords: [life science services, genomics, genetic testing, precision medicine, medical distribution, pharmaceutical ecommerce, health management, chronic disease management, health data service, research support, 生命科学服务, 基因测序, 基因检测, 精准医学, 医药流通, 医药电商, 健康管理, 慢病管理, 健康数据服务, 科研支持] + negative_keywords: [fitness, gym, 健身] + + - id: education_research + name: 教育培训与科研服务 + definition: K12、高教、职教、企业培训、考试测评、教育科技、学习平台、留学语言、科研院所和实验室服务。 + priority: 18 + domain_patterns: ["*.edu", coursera.org, edx.org, khanacademy.org, udemy.com] + keywords: [education, school, university, college, course, learning platform, training, vocational education, exam preparation, edtech, research institute, laboratory service, 教育, 学校, 大学, 学院, 课程, 学习平台, 培训, 职业教育, 考试测评, 教育科技, 留学, 语言培训, 科研院所, 实验室服务] + negative_keywords: [market research, user research, 市场研究] + + - id: professional_services + name: 专业服务 + definition: 法律、会计、审计、税务、管理咨询、人力资源、猎头、BPO、共享服务、企业服务外包、设计创意和品牌咨询。 + priority: 13 + domain_patterns: [] + keywords: [professional services, law firm, legal services, accounting, audit, tax advisory, management consulting, human resources, recruitment, headhunting, BPO, shared services, outsourcing, design agency, brand consulting, 专业服务, 律所, 法律服务, 会计, 审计, 税务, 管理咨询, 人力资源, 招聘, 猎头, 企业服务外包, 共享服务, 设计创意, 品牌咨询] + negative_keywords: [software, platform, SaaS, 软件] + + - id: advertising_marketing_data + name: 广告营销与数据服务 + definition: 广告代理、媒介投放、MarTech、营销自动化、市场研究、商业情报、数据分析、公关、会展和活动运营。 + priority: 18 + domain_patterns: [googleadservices.com, doubleclick.net, optimizely.com, hubspot.com] + keywords: [advertising, marketing, ad agency, media buying, campaign management, conversion optimization, SEO, SEM, MarTech, marketing automation, market research, business intelligence, data analytics, public relations, exhibition, event operation, 广告, 营销, 广告代理, 媒介投放, 投放, 转化优化, SEO, SEM, MarTech, 营销自动化, 市场研究, 商业情报, 数据分析, 公关, 会展, 活动运营] + negative_keywords: [news, media publishing, 新闻, 出版] + + - id: media_publishing_info + name: 新闻出版与信息服务 + definition: 新闻媒体、出版社、搜索门户、资讯平台、专业信息数据库、终端、知识内容和自媒体平台。 + priority: 16 + domain_patterns: [medium.com, wordpress.com, nytimes.com, bbc.com, wikipedia.org] + keywords: [news, media, publishing, publisher, magazine, article, blog, information service, search portal, professional database, knowledge platform, self media, newsletter, 新闻, 媒体, 出版, 出版社, 杂志, 文章, 博客, 信息服务, 搜索门户, 资讯平台, 专业数据库, 知识内容, 自媒体] + negative_keywords: [social network, streaming, game, 社交网络, 流媒体, 游戏] + + - id: social_audio_video_content + name: 音视频与社交内容 + definition: 长视频、短视频、直播、音乐、播客、有声内容、社交网络、社区平台、MCN 和内容服务。 + priority: 18 + domain_patterns: [youtube.com, netflix.com, spotify.com, tiktok.com, twitch.tv, bilibili.com, weibo.com, reddit.com] + keywords: [video platform, streaming, short video, live streaming, music streaming, podcast, audio content, social network, community platform, creator platform, MCN, content service, 视频平台, 流媒体, 短视频, 直播, 音乐, 播客, 有声内容, 社交网络, 社区平台, 创作者平台, MCN, 内容服务] + negative_keywords: [online course, education, 在线课程, 教育] + + - id: gaming_interactive_entertainment + name: 游戏与互动娱乐 + definition: 游戏研发、发行、游戏平台、渠道、云游戏、电竞、游戏社区和沉浸式互动娱乐。 + priority: 24 + domain_patterns: [steampowered.com, epicgames.com, battle.net, roblox.com] + keywords: [game, gaming, video game, mobile game, game publisher, game platform, cloud gaming, esports, gameplay, player community, interactive entertainment, 游戏, 手游, 端游, 主机游戏, 游戏研发, 游戏发行, 游戏平台, 云游戏, 电竞, 玩家社区, 互动娱乐] + negative_keywords: [gambling, casino, 博彩, 赌场] + + - id: arts_sports_events + name: 影视演艺与体育 + definition: 影视制作发行、演出票务、场馆、体育赛事、俱乐部、经纪、体育科技和健身平台。 + priority: 15 + domain_patterns: [ticketmaster.com, imdb.com, fifa.com, nba.com] + keywords: [film production, movie, television production, entertainment, performing arts, concert, ticketing, venue, sports event, sports club, sports agency, fitness platform, 影视, 电影, 电视制作, 娱乐, 演艺, 演出, 音乐会, 票务, 场馆, 体育赛事, 体育俱乐部, 体育经纪, 健身平台] + negative_keywords: [gambling, sports betting, 博彩, 体育博彩] + + - id: regulated_entertainment_gambling + name: 博彩彩票与受监管娱乐 + definition: 博彩、赌博、赌场、体育投注、彩票、赛马平台、扑克和其他高监管数字娱乐。 + priority: 30 + domain_patterns: [] + keywords: [gambling, casino, betting, sportsbook, sports betting, lottery, poker, horse racing betting, wagering, 博彩, 赌博, 赌场, 投注, 体育博彩, 彩票, 扑克, 赛马投注, 博彩平台] + negative_keywords: [game publisher, video game, 游戏研发] + + - id: adult_content + name: 成人内容与成人社区 + definition: 成人内容、限制级娱乐、成人社区、成人直播、约会及其他年龄限制内容。 + priority: 30 + domain_patterns: [] + keywords: [adult content, adult entertainment, porn, pornography, xxx, nude, escort, adult live, dating site, age-restricted content, 成人内容, 成人娱乐, 色情, 限制级, 裸聊, 成人直播, 约会网站, 年龄限制] + negative_keywords: [adult education, 成人教育] + + - id: travel_hospitality_foodservice + name: 旅游、住宿与餐饮 + definition: OTA、旅行社、景区、目的地、商旅、签证、旅游交通、酒店、民宿、服务式公寓、餐饮连锁、外卖厨房、团餐、咖啡茶饮和夜经济。 + priority: 16 + domain_patterns: [booking.com, tripadvisor.com, airbnb.com, expedia.com, agoda.com] + keywords: [travel, tourism, OTA, travel agency, destination, scenic area, hotel, accommodation, homestay, serviced apartment, restaurant, food delivery, catering, central kitchen, coffee shop, tea drink, bar, reservation, 旅游, 旅行社, 景区, 目的地, 商旅, 签证, 酒店, 住宿, 民宿, 服务式公寓, 餐饮, 餐厅, 外卖, 团餐, 中央厨房, 咖啡, 茶饮, 酒吧, 预订] + negative_keywords: [food manufacturing, 食品加工] + + - id: local_life_services + name: 本地生活与家庭个人服务 + definition: 家政、维修、搬家、美容美发、洗衣洗护、婚庆摄影、宠物服务、社区便民、家装家居服务、健身休闲、养老托育、生活照护、二手交易和回收服务。 + priority: 13 + domain_patterns: [] + keywords: [local services, home service, repair service, cleaning service, moving service, salon, beauty salon, laundry service, wedding service, photography service, pet service, community service, home improvement service, fitness, spa, elderly care, childcare, second-hand marketplace, 本地生活, 家政, 维修, 清洁, 搬家, 美容美发, 洗衣, 洗护, 婚庆, 摄影, 宠物服务, 社区服务, 便民平台, 家装服务, 健身, SPA, 养老, 托育, 生活照护, 二手交易, 回收服务] + negative_keywords: [restaurant, hotel, 餐厅, 酒店] + + - id: government_public_governance + name: 政府与公共治理 + definition: 党政机关、事业单位、公安司法、应急、政务平台、公共数据运营、财政税务、征管、社保医保、就业、城管、市政、公共文化和公共体育设施。 + priority: 28 + domain_patterns: ["*.gov", "*.gov.cn", europa.eu] + keywords: [government, public governance, public service, ministry, regulator, municipality, public data, public security, judiciary, emergency management, tax bureau, social security, public institution, 政府, 政务, 公共治理, 公共服务, 党政机关, 事业单位, 公安, 司法, 应急, 政务平台, 公共数据, 财政, 税务, 征管, 社保, 医保, 就业服务, 城管, 市政, 公共文化] + negative_keywords: [government software vendor, 政务软件供应商] + + - id: nonprofit_social_organizations + name: 社会组织与公益 + definition: 基金会、协会、商会、NGO、NPO、慈善组织、社区组织、社会服务机构、国际组织和志愿服务。 + priority: 18 + domain_patterns: [redcross.org, unicef.org, wwf.org, gatesfoundation.org] + keywords: [nonprofit, non-profit, NGO, NPO, charity, foundation, association, chamber of commerce, donation, volunteer, social service organization, international organization, 公益, 非营利, NGO, NPO, 慈善, 基金会, 协会, 商会, 捐赠, 志愿者, 社会服务机构, 国际组织] + negative_keywords: [open source foundation, software foundation, 开源基金会] + + - id: holding_mixed_group + name: 控股投资与混合业态 + definition: 综合控股、产业集团、投资平台、孵化器、SPV、壳主体、项目公司、多元化集团和难以单一归类的平台生态型企业。 + priority: 5 + domain_patterns: [] + keywords: [holding company, investment platform, industrial group, conglomerate, incubator, accelerator, SPV, project company, diversified group, ecosystem platform, 控股集团, 综合控股, 产业集团, 投资平台, 孵化器, 加速器, 壳主体, 项目公司, 多元化集团, 平台生态] + negative_keywords: [] + + - id: unknown + name: 其他/未知 + definition: 信息不足、个人站点、空壳站、测试站、工具导航聚合页、临时活动页、落地页或无法可靠判断的网站。 + priority: -100 + domain_patterns: [] + keywords: [] + negative_keywords: []