From 3773f8cff7113b4882aaacda59697286bee5fbe9 Mon Sep 17 00:00:00 2001 From: Simonov Denis Date: Fri, 22 May 2026 12:08:02 +0300 Subject: [PATCH 1/3] Implementation of Hypothetical-Set Aggregate Functions --- src/dsql/AggNodes.cpp | 525 +++++++++++++++++++++++++++- src/dsql/AggNodes.h | 78 +++++ src/dsql/Nodes.h | 5 + src/dsql/parse.y | 25 ++ src/include/firebird/impl/msg/jrd.h | 1 + src/include/gen/Firebird.pas | 1 + 6 files changed, 630 insertions(+), 5 deletions(-) diff --git a/src/dsql/AggNodes.cpp b/src/dsql/AggNodes.cpp index 0dbea8d1cca..caca4d9d981 100644 --- a/src/dsql/AggNodes.cpp +++ b/src/dsql/AggNodes.cpp @@ -85,11 +85,14 @@ DmlNode* AggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR count = csb->csb_blr_reader.getByte(); - NodeRefsHolder holder(pool); - node->getChildren(holder, false); + if (!node->isVariadicArgs()) + { + NodeRefsHolder holder(pool); + node->getChildren(holder, false); - if (count != holder.refs.getCount()) - PAR_error(csb, Arg::Gds(isc_funmismat) << name); + if (count != holder.refs.getCount()) + PAR_error(csb, Arg::Gds(isc_funmismat) << name); + } node->parseArgs(tdbb, csb, count); @@ -1285,7 +1288,6 @@ string PercentileAggNode::internalPrint(NodePrinter& printer) const return "PercentileAggNode"; } - void PercentileAggNode::aggInit(thread_db* tdbb, Request* request) const { AggNode::aggInit(tdbb, request); @@ -1496,6 +1498,519 @@ AggNode* PercentileAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ return node; } +//-------------------- + + +static AggNode::RegisterFactory1 rankAggInfo( + "RANK_AGG", RankAggNode::TYPE_RANK); +static AggNode::RegisterFactory1 denseRankAggInfo( + "DENSE_RANK_AGG", RankAggNode::TYPE_DENSE_RANK); +static AggNode::RegisterFactory1 percentRankAggInfo( + "PERCENT_RANK_AGG", RankAggNode::TYPE_PERCENT_RANK); +static AggNode::RegisterFactory1 cumeDistAggInfo( + "CUME_DIST_AGG", RankAggNode::TYPE_CUME_DIST); + +AggNode::RegisterFactory1& getRankAggInfo(RankAggNode::RankType type) +{ + switch (type) + { + case RankAggNode::TYPE_RANK: + return rankAggInfo; + + case RankAggNode::TYPE_DENSE_RANK: + return denseRankAggInfo; + + case RankAggNode::TYPE_PERCENT_RANK: + return percentRankAggInfo; + + case RankAggNode::TYPE_CUME_DIST: + default: + return cumeDistAggInfo; + } +} + +const char* getRankAggName(RankAggNode::RankType type) +{ + switch (type) + { + case RankAggNode::TYPE_RANK: + return "RANK"; + + case RankAggNode::TYPE_DENSE_RANK: + return "DENSE_RANK"; + + case RankAggNode::TYPE_PERCENT_RANK: + return "PERCENT_RANK"; + + case RankAggNode::TYPE_CUME_DIST: + default: + return "CUME_DIST"; + } +} + +RankAggNode::RankAggNode(MemoryPool& pool, RankType aType, + ValueListNode* aArgList, ValueListNode* aOrderClause) + : AggNode(pool, + getRankAggInfo(aType), + false, false, nullptr), + type(aType), + valueListArg(aArgList), + dsqlOrderClause(aOrderClause) +{ + +} + +void RankAggNode::parseArgs(thread_db* tdbb, CompilerScratch* csb, unsigned count) +{ + valueListArg = PAR_args(tdbb, csb, count, count); + + if (csb->csb_blr_reader.peekByte() == blr_within_group_order) + { + csb->csb_blr_reader.getByte(); // skip blr_within_group_order + if (const auto count = csb->csb_blr_reader.getByte()) + sort = PAR_sort_internal(tdbb, csb, true, count); + } +} + +bool RankAggNode::dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other, bool ignoreMapCast) const +{ + if (!AggNode::dsqlMatch(dsqlScratch, other, ignoreMapCast)) + return false; + + const RankAggNode* o = nodeAs(other); + fb_assert(o); + return PASS1_node_match(dsqlScratch, dsqlOrderClause, o->dsqlOrderClause, ignoreMapCast); +} + +void RankAggNode::make(DsqlCompilerScratch* /*dsqlScratch*/, dsc* desc) +{ + switch (type) + { + case RankAggNode::TYPE_RANK: + case RankAggNode::TYPE_DENSE_RANK: + desc->makeInt64(0); + break; + + default: + desc->makeDouble(); + break; + } +} + +void RankAggNode::genBlr(DsqlCompilerScratch* dsqlScratch) +{ + AggNode::genBlr(dsqlScratch); + + if (dsqlOrderClause) + GEN_sort(dsqlScratch, blr_within_group_order, dsqlOrderClause); +} + +void RankAggNode::getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) +{ + switch (type) + { + case RankAggNode::TYPE_RANK: + case RankAggNode::TYPE_DENSE_RANK: + desc->makeInt64(0); + break; + + default: + desc->makeDouble(); + break; + } +} + +void RankAggNode::makeSortDesc(thread_db*, CompilerScratch*, dsc* desc) +{ + desc->makeInt64(0); +} + +ValueExprNode* RankAggNode::copy(thread_db* tdbb, NodeCopier& copier) const +{ + RankAggNode* node = FB_NEW_POOL(*tdbb->getDefaultPool()) RankAggNode(*tdbb->getDefaultPool(), type); + + node->nodScale = nodScale; + node->valueListArg = copier.copy(tdbb, valueListArg); + node->sort = sort->copy(tdbb, copier); + + return node; +} + +AggNode* RankAggNode::pass2(thread_db* tdbb, CompilerScratch* csb) +{ + AggNode::pass2(tdbb, csb); + + // impure area for calculate + impureArgsOffset = csb->allocImpure(); + m_impureOrder = csb->allocImpure(); + + return this; +} + +string RankAggNode::internalPrint(NodePrinter& printer) const +{ + AggNode::internalPrint(printer); + + NODE_PRINT(printer, type); + NODE_PRINT(printer, valueListArg); + + return "RankAggNode"; +} + +bool RankAggNode::dsqlInvalidReferenceFinder(InvalidReferenceFinder& visitor) +{ + bool invalid = false; + + if (!visitor.insideOwnMap) + { + // We are not in an aggregate from the same scope_level so + // check for valid fields inside this aggregate + invalid |= ExprNode::dsqlInvalidReferenceFinder(visitor); + } + + if (!visitor.insideHigherMap) + { + NodeRefsHolder holder(visitor.dsqlScratch->getPool()); + getChildren(holder, true); + + for (auto i : holder.refs) + { + // If there's another aggregate with the same scope_level or + // an higher one then it's a invalid aggregate, because + // aggregate-functions from the same context can't + // be part of each other. + if (Aggregate2Finder::find(visitor.dsqlScratch->getPool(), visitor.context->ctx_scope_level, + FIELD_MATCH_TYPE_EQUAL, false, *i)) + { + // Nested aggregate functions are not allowed + ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << + Arg::Gds(isc_dsql_agg_nested_err)); + } + } + + if (visitor.visit(**holder.refs.begin())) + { + // The percent argument must be constant within group + ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << + Arg::Gds(isc_argmustbe_const_within_group) << + Arg::Str(getRankAggName(type))); + } + } + + return invalid; +} + +int RankAggNode::lookForChange(thread_db* tdbb, Request* request, UCHAR* data, impure_value* values) const +{ + unsigned cnt = 0; + for (auto desc : asb->descOrder) + { + int sortDirection = 1; + int nullsPlacement = 1; + + unsigned index = cnt++; + + if (sort->direction[index] == ORDER_DESC) + sortDirection = -1; + + if (sort->getEffectiveNullOrder(index) == NULLS_LAST) + nullsPlacement = -1; + + desc.dsc_address = data + (IPTR) desc.dsc_address; + + impure_value* vtemp = &values[index]; + + int n = 0; + + if (!vtemp->vlu_desc.dsc_address) + return 1 * nullsPlacement; + else if ((n = MOV_compare(tdbb, &desc, &vtemp->vlu_desc)) != 0) + return n * sortDirection; + } + + return 0; +} + +void RankAggNode::cacheValues(thread_db* tdbb, Request* request, UCHAR* data, impure_value* values) const +{ + unsigned cnt = 0; + for (auto desc : asb->descOrder) + { + unsigned index = cnt++; + + desc.dsc_address = data + (IPTR) desc.dsc_address; + + impure_value* target = &values[index]; + + EVL_make_value(tdbb, &desc, target); + } +} + +void RankAggNode::aggInit(thread_db* tdbb, Request* request) const +{ + AggNode::aggInit(tdbb, request); + + Impure* impureOrder = request->getImpure(m_impureOrder); + impureOrder->vlux_count = 0; + impureOrder->vlux_rank = 0; + impureOrder->vlux_dense_rank = 0; + + unsigned impureCount = sort ? sort->expressions.getCount() : 0; + if (!impureOrder->orderValues && impureCount > 0) + { + impureOrder->orderValues = FB_NEW_POOL(*tdbb->getDefaultPool()) impure_value[impureCount]; + memset(impureOrder->orderValues, 0, sizeof(impure_value) * impureCount); + } + + impure_value_ex* impure = request->getImpure(impureOffset); + switch (type) + { + case RankAggNode::TYPE_RANK: + case RankAggNode::TYPE_DENSE_RANK: + impure->make_int64(1); + break; + + default: + impure->make_double(1); + break; + } + impure->vlux_count = 1; + + impure_value_ex* impureArgs = request->getImpure(impureArgsOffset); + impureArgs->vlu_desc.dsc_dtype = 0; + impureArgs->vlux_count = 0; +} + +void RankAggNode::aggFinish(thread_db* tdbb, Request* request) const +{ + AggNode::aggFinish(tdbb, request); + Impure* impureOrder = request->getImpure(m_impureOrder); + if (impureOrder->orderValues) + { + delete[] impureOrder->orderValues; + impureOrder->orderValues = nullptr; + } +} + +bool RankAggNode::aggPass(thread_db* tdbb, Request* request) const +{ + // Put function argument to sort + impure_value_ex* impure = request->getImpure(impureOffset); + if (impure->vlux_count == 1 && sort) // first call to aggPass() + { + if (valueListArg->items.getCount() != sort->expressions.getCount()) + ERRD_post(Arg::Gds(isc_hypfun_args_non_equal_sort_item) << Arg::Str(getRankAggName(type))); + + NestConst findArg = MAKE_const_sint64(1, 0); + dsc* findValueDesc = EVL_expr(tdbb, request, findArg); + if (!findValueDesc) + return false; + + fb_assert(asb); + // "Put" the value to sort. + impure_agg_sort* asbImpure = request->getImpure(asb->impure); + UCHAR* data = nullptr; + asbImpure->iasb_sort->put(tdbb, reinterpret_cast(&data)); + + MOVE_CLEAR(data, asb->length); + + auto descOrder = asb->descOrder.begin(); + auto keyItem = asb->keyItems.begin(); + + for (auto& nodeArg : valueListArg->items) + { + dsc toDesc = *(descOrder++); + toDesc.dsc_address = data + (IPTR) toDesc.dsc_address; + if (const auto fromDsc = EVL_expr(tdbb, request, nodeArg)) + { + if (IS_INTL_DATA(fromDsc)) + { + INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()), + fromDsc, &toDesc, INTL_KEY_UNIQUE); + } + else + MOV_move(tdbb, fromDsc, &toDesc); + } + else + *(data + keyItem->getSkdOffset()) = TRUE; + + // The first key for NULLS FIRST/LAST, the second key for the sorter + keyItem += 2; + } + + dsc toDesc = asb->desc; + toDesc.dsc_address = data + (IPTR) toDesc.dsc_address; + MOV_move(tdbb, findValueDesc, &toDesc); + } + + // Put WITHIN GROUP arguments to sort + NestConst otherArg = MAKE_const_sint64(0, 0); + dsc* desc = EVL_expr(tdbb, request, otherArg); + if (!desc) + return false; + + if (sort) + { + impure->vlux_count++; + + fb_assert(asb); + // "Put" the value to sort. + impure_agg_sort* asbImpure = request->getImpure(asb->impure); + UCHAR* data = nullptr; + asbImpure->iasb_sort->put(tdbb, reinterpret_cast(&data)); + + MOVE_CLEAR(data, asb->length); + + auto descOrder = asb->descOrder.begin(); + auto keyItem = asb->keyItems.begin(); + + for (auto& nodeOrder : sort->expressions) + { + dsc toDesc = *(descOrder++); + toDesc.dsc_address = data + (IPTR) toDesc.dsc_address; + if (const auto fromDsc = EVL_expr(tdbb, request, nodeOrder)) + { + if (IS_INTL_DATA(fromDsc)) + { + INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()), + fromDsc, &toDesc, INTL_KEY_UNIQUE); + } + else + MOV_move(tdbb, fromDsc, &toDesc); + } + else + *(data + keyItem->getSkdOffset()) = TRUE; + + // The first key for NULLS FIRST/LAST, the second key for the sorter + keyItem += 2; + } + + dsc toDesc = asb->desc; + toDesc.dsc_address = data + (IPTR) toDesc.dsc_address; + MOV_move(tdbb, desc, &toDesc); + + return true; + } + + return true; +} + +dsc* RankAggNode::execute(thread_db* tdbb, Request* request) const +{ + impure_value_ex* impure = request->getImpure(impureOffset); + + impure_value_ex* argsImpure = request->getImpure(impureArgsOffset); + + if (sort) + { + Impure* const impureOrder = request->getImpure(m_impureOrder); + + impure_agg_sort* asbImpure = request->getImpure(asb->impure); + dsc desc = asb->desc; + + // Sort the values already "put" to sort. + asbImpure->iasb_sort->sort(tdbb); + + // Now get the sorted/projected values and compute the aggregate. + bool find = false; + while (true) + { + UCHAR* data = nullptr; + asbImpure->iasb_sort->get(tdbb, reinterpret_cast(&data)); + + if (!data) + { + // We are done, close the sort. + delete asbImpure->iasb_sort; + asbImpure->iasb_sort = NULL; + break; + } + + if (impureOrder->vlux_count++ == 0) + { + impureOrder->vlux_dense_rank = 1; + impureOrder->vlux_rank = 1; + cacheValues(tdbb, request, data, impureOrder->orderValues); + } + else if (lookForChange(tdbb, request, data, impureOrder->orderValues)) + { + impureOrder->vlux_dense_rank++; + impureOrder->vlux_rank = impureOrder->vlux_count; + cacheValues(tdbb, request, data, impureOrder->orderValues); + find = false; + } + + desc.dsc_address = data + (IPTR) asb->desc.dsc_address; + EVL_make_value(tdbb, &desc, argsImpure); + find = find || (argsImpure->vlu_misc.vlu_int64 == 1); + + if (find) + aggPass(tdbb, request, &desc); + + } + } + + return aggExecute(tdbb, request); +} + +void RankAggNode::aggPass(thread_db* tdbb, Request* request, dsc* /* desc */) const +{ + impure_value_ex* impure = request->getImpure(impureOffset); + Impure* const impureOrder = request->getImpure(m_impureOrder); + switch (type) + { + case RankAggNode::TYPE_RANK: + impure->make_int64(impureOrder->vlux_rank); + break; + + case RankAggNode::TYPE_DENSE_RANK: + impure->make_int64(impureOrder->vlux_dense_rank); + break; + + case RankAggNode::TYPE_PERCENT_RANK: + impure->make_double(impureOrder->vlux_rank - 1); + break; + + case RankAggNode::TYPE_CUME_DIST: + impure->make_double(impureOrder->vlux_count); + break; + + default: + fb_assert(false); + break; + } +} + +dsc* RankAggNode::aggExecute(thread_db* tdbb, Request* request) const +{ + impure_value_ex* impure = request->getImpure(impureOffset); + + if (!impure->vlux_count || !impure->vlu_desc.dsc_dtype) + return nullptr; + + if (type == RankAggNode::TYPE_PERCENT_RANK) + { + double percent_rank = (impure->vlux_count > 1) ? impure->vlu_misc.vlu_double / (impure->vlux_count - 1) : 0; + impure->make_double(percent_rank); + } + + if (type == RankAggNode::TYPE_CUME_DIST) + { + double percent_rank = impure->vlu_misc.vlu_double / impure->vlux_count; + impure->make_double(percent_rank); + } + + return &impure->vlu_desc; +} + +AggNode* RankAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ +{ + AggNode* node = FB_NEW_POOL(dsqlScratch->getPool()) RankAggNode(dsqlScratch->getPool(), type, + doDsqlPass(dsqlScratch, valueListArg), + doDsqlPass(dsqlScratch, dsqlOrderClause)); + + return node; +} //-------------------- diff --git a/src/dsql/AggNodes.h b/src/dsql/AggNodes.h index defcd89e3e4..3fad27d0b42 100644 --- a/src/dsql/AggNodes.h +++ b/src/dsql/AggNodes.h @@ -197,6 +197,84 @@ class PercentileAggNode final : public AggNode ULONG percentileImpureOffset = 0; }; +class RankAggNode final : public AggNode +{ +public: + enum RankType : UCHAR + { + TYPE_RANK, + TYPE_DENSE_RANK, + TYPE_PERCENT_RANK, + TYPE_CUME_DIST + }; + + struct Impure + { + impure_value* orderValues; + int64_t vlux_count; + int64_t vlux_rank; + int64_t vlux_dense_rank; + }; + + explicit RankAggNode(MemoryPool& pool, RankType aType, + ValueListNode* aArgList = nullptr, ValueListNode* aOrderClause = nullptr); + + void parseArgs(thread_db* tdbb, CompilerScratch* csb, unsigned count) override; + + unsigned getCapabilities() const override + { + return 0; + } + + bool isVariadicArgs() const override + { + return true; + } + + bool dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other, bool ignoreMapCast) const override; + + void getChildren(NodeRefsHolder& holder, bool dsql) const override + { + ValueExprNode::getChildren(holder, dsql); + + for (FB_SIZE_T i = 0; i < valueListArg->items.getCount(); i++) + holder.add(valueListArg->items[i]); + } + + bool dsqlInvalidReferenceFinder(InvalidReferenceFinder& visitor) override; + + Firebird::string internalPrint(NodePrinter& printer) const override; + void make(DsqlCompilerScratch* dsqlScratch, dsc* desc) override; + void genBlr(DsqlCompilerScratch* dsqlScratch) override; + + void makeSortDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) override; + + void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc) override; + ValueExprNode* copy(thread_db* tdbb, NodeCopier& copier) const override; + AggNode* pass2(thread_db* tdbb, CompilerScratch* csb) override; + + int lookForChange(thread_db* tdbb, Request* request, UCHAR* data, impure_value* values) const; + void cacheValues(thread_db* tdbb, Request* request, UCHAR* data, impure_value* values) const; + + void aggInit(thread_db* tdbb, Request* request) const override; + void aggFinish(thread_db* tdbb, Request* request) const override; + bool aggPass(thread_db* tdbb, Request* request) const override; + dsc* execute(thread_db* tdbb, Request* request) const override; + + void aggPass(thread_db* tdbb, Request* request, dsc* desc) const override; + dsc* aggExecute(thread_db* tdbb, Request* request) const override; + +protected: + AggNode* dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ override; + +private: + const RankType type; + NestConst valueListArg; + NestConst dsqlOrderClause; + ULONG impureArgsOffset = 0; + ULONG m_impureOrder = 0; +}; + class CountAggNode final : public AggNode { public: diff --git a/src/dsql/Nodes.h b/src/dsql/Nodes.h index 778343b2bfd..e731c44e530 100644 --- a/src/dsql/Nodes.h +++ b/src/dsql/Nodes.h @@ -1151,6 +1151,11 @@ class AggNode : public TypedNode virtual void makeSortDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc); + virtual bool isVariadicArgs() const + { + return false; + } + virtual void aggInit(thread_db* tdbb, Request* request) const = 0; // pure, but defined virtual void aggFinish(thread_db* tdbb, Request* request) const; virtual bool aggPass(thread_db* tdbb, Request* request) const; diff --git a/src/dsql/parse.y b/src/dsql/parse.y index 8d66220d367..6285164701b 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -8723,6 +8723,7 @@ long_integer %type function function : aggregate_function { $$ = $1; } + | hypothetical_set_function { $$ = $1; } | non_aggregate_function | over_clause ; @@ -8909,6 +8910,30 @@ within_group_specification : WITHIN GROUP '(' order_clause ')' { $$ = $4; } ; +%type hypothetical_set_function +hypothetical_set_function + : hypothetical_set_function_prefix + | hypothetical_set_function_prefix FILTER '(' WHERE search_condition ')' + { + $$ = $1; + + fb_assert($$->arg); + $$->arg = newNode($5, $$->arg, NullNode::instance()); + } + ; + +%type hypothetical_set_function_prefix +hypothetical_set_function_prefix + : DENSE_RANK '(' value_list ')' within_group_specification + { $$ = newNode(RankAggNode::TYPE_DENSE_RANK, $3, $5); } + | RANK '(' value_list ')' within_group_specification + { $$ = newNode(RankAggNode::TYPE_RANK, $3, $5); } + | PERCENT_RANK '(' value_list ')' within_group_specification + { $$ = newNode(RankAggNode::TYPE_PERCENT_RANK, $3, $5); } + | CUME_DIST '(' value_list ')' within_group_specification + { $$ = newNode(RankAggNode::TYPE_CUME_DIST, $3, $5); } + ; + %type window_function window_function : DENSE_RANK '(' ')' diff --git a/src/include/firebird/impl/msg/jrd.h b/src/include/firebird/impl/msg/jrd.h index 340fd1fafdb..227f4c25242 100644 --- a/src/include/firebird/impl/msg/jrd.h +++ b/src/include/firebird/impl/msg/jrd.h @@ -1016,3 +1016,4 @@ FB_IMPL_MSG(JRD, 1013, not_defined_constant, -901, "42", "000", "The constant @1 FB_IMPL_MSG(JRD, 1014, const_name, -901, "42", "000", "CONSTANT @1") FB_IMPL_MSG(JRD, 1015, private_table, -901, "42", "000", "Table @1 is private to package @2") FB_IMPL_MSG(JRD, 1016, temp_space_invalid_pos, -901, "HY", "000", "Invalid position to read/write in a temporary file (positon: @1, size: @2)") +FB_IMPL_MSG(JRD, 1017, hypfun_args_non_equal_sort_item, -833, "42", "000", "Number of arguments hypothetical-set aggregate function @1 must match number of sort items in WITHIN GROUP") diff --git a/src/include/gen/Firebird.pas b/src/include/gen/Firebird.pas index 5303fde2004..41d4657082a 100644 --- a/src/include/gen/Firebird.pas +++ b/src/include/gen/Firebird.pas @@ -5973,6 +5973,7 @@ IPerformanceStatsImpl = class(IPerformanceStats) isc_const_name = 335545334; isc_private_table = 335545335; isc_temp_space_invalid_pos = 335545336; + isc_hypfun_args_non_equal_sort_item = 335545337; isc_gfix_db_name = 335740929; isc_gfix_invalid_sw = 335740930; isc_gfix_incmp_sw = 335740932; From 6ffdf3b97737a297bb2fafd1eeb6f92ac94371a0 Mon Sep 17 00:00:00 2001 From: Simonov Denis Date: Fri, 22 May 2026 12:40:16 +0300 Subject: [PATCH 2/3] Documentation added --- .../README.hypothetical_set_agg_functions.md | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 doc/sql.extensions/README.hypothetical_set_agg_functions.md diff --git a/doc/sql.extensions/README.hypothetical_set_agg_functions.md b/doc/sql.extensions/README.hypothetical_set_agg_functions.md new file mode 100644 index 00000000000..4b56e1e556e --- /dev/null +++ b/doc/sql.extensions/README.hypothetical_set_agg_functions.md @@ -0,0 +1,80 @@ +# Hypothetical-Set Aggregate Functions + +Hypothetical set functions include ranking and rank distribution functions, which you are already familiar with as window functions, but they are applied to groups to enter values ​​in hypothetical form. + +There are two sorted set ranking functions: `RANK` and `DENSE_RANK`. There are also two sorted set rank distribution functions: `PERCENT_RANK` and `CUME_DIST`. Window functions and ordered set functions differ in the way they order. The former order within a window, while the latter order within a group. + +Syntax of aggregate functions of a hypothetical set: + +``` + ::= + RANK() WITHIN GROUP (ORDER BY ) + | DENSE_RANK() WITHIN GROUP (ORDER BY ) + | PERCENT_RANK() WITHIN GROUP (ORDER BY ) + | CUME_DIST() WITHIN GROUP (ORDER BY ) +``` + +Each of the "hypothetical-set" aggregates is associated with a window function of the same name. In each case, the aggregate's result is the value that the associated window function would have returned for the "hypothetical" row constructed from args, if such a row had been added to the sorted group of rows represented by the sorted_args. For each of these functions, the list of direct arguments given in args must match the number and types of the aggregated arguments given in sorted_args. Unlike most built-in aggregates, these aggregates are not strict, that is they do not drop input rows containing nulls. `NULL` values sort according to the rule specified in the `ORDER BY` clause. + +The values ​​of the input arguments must be constant within each group. + +The number of input arguments of the hypothetical set functions must match the number of sorting elements in the `WITHIN GROUP (ORDER BY )` clause. + +An example of using hypothetical set functions without grouping: + +```sql +SELECT + RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS R1, + DENSE_RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS R2, + PERCENT_RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS PR, + CUME_DIST(35500) WITHIN GROUP(ORDER BY E.SALARY) AS CD +FROM EMPLOYEE E; +``` + +``` + R1 R2 PR CD +===================== ===================== ======================= ======================= + 9 8 0.1904761904761905 0.2093023255813954 +``` + +An example of using hypothetical set functions with grouping: + +```sql +SELECT + E.JOB_COUNTRY, + RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS R1, + DENSE_RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS R2, + PERCENT_RANK(35500) WITHIN GROUP(ORDER BY E.SALARY) AS PR, + CUME_DIST(35500) WITHIN GROUP(ORDER BY E.SALARY) AS CD +FROM EMPLOYEE E +GROUP BY E.JOB_COUNTRY; +``` + +``` +JOB_COUNTRY R1 R2 PR CD +=============== ===================== ===================== ======================= ======================= +Canada 1 1 0.000000000000000 0.5000000000000000 +England 3 3 0.6666666666666666 0.7500000000000000 +France 1 1 0.000000000000000 0.5000000000000000 +Italy 2 2 1.000000000000000 1.000000000000000 +Japan 1 1 0.000000000000000 0.3333333333333333 +Switzerland 1 1 0.000000000000000 0.5000000000000000 +USA 6 5 0.1515151515151515 0.1764705882352941 +``` + +An example of using hypothetical functions with two arguments: + +```sql +SELECT + RANK('England', 35500) WITHIN GROUP(ORDER BY E.JOB_COUNTRY, E.SALARY) AS R1, + DENSE_RANK('England', 35500) WITHIN GROUP(ORDER BY E.JOB_COUNTRY, E.SALARY) AS R2, + PERCENT_RANK('England', 35500) WITHIN GROUP(ORDER BY E.JOB_COUNTRY, E.SALARY) AS PR, + CUME_DIST('England', 35500) WITHIN GROUP(ORDER BY E.JOB_COUNTRY, E.SALARY) AS CD +FROM EMPLOYEE E; +``` + +``` + R1 R2 PR CD +===================== ===================== ======================= ======================= + 4 4 0.07142857142857142 0.09302325581395349 +``` From d896f6bb1534f7459e58df7fb3f61f321a82bbf8 Mon Sep 17 00:00:00 2001 From: Simonov Denis Date: Fri, 22 May 2026 13:05:55 +0300 Subject: [PATCH 3/3] correction documentation --- doc/sql.extensions/README.hypothetical_set_agg_functions.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/sql.extensions/README.hypothetical_set_agg_functions.md b/doc/sql.extensions/README.hypothetical_set_agg_functions.md index 4b56e1e556e..5f6ecd1e648 100644 --- a/doc/sql.extensions/README.hypothetical_set_agg_functions.md +++ b/doc/sql.extensions/README.hypothetical_set_agg_functions.md @@ -14,11 +14,9 @@ Syntax of aggregate functions of a hypothetical set: | CUME_DIST() WITHIN GROUP (ORDER BY ) ``` -Each of the "hypothetical-set" aggregates is associated with a window function of the same name. In each case, the aggregate's result is the value that the associated window function would have returned for the "hypothetical" row constructed from args, if such a row had been added to the sorted group of rows represented by the sorted_args. For each of these functions, the list of direct arguments given in args must match the number and types of the aggregated arguments given in sorted_args. Unlike most built-in aggregates, these aggregates are not strict, that is they do not drop input rows containing nulls. `NULL` values sort according to the rule specified in the `ORDER BY` clause. +Each of the "hypothetical-set" aggregates is associated with a window function of the same name. In each case, the aggregate's result is the value that the associated window function would have returned for the "hypothetical" row constructed from `args`, if such a row had been added to the sorted group of rows represented by the `sorted_args`. For each of these functions, the list of direct arguments given in `args` must match the number and types of the aggregated arguments given in `sorted_args`. Unlike most built-in aggregates, these aggregates are not strict, that is they do not drop input rows containing NULLs. `NULL` values sort according to the rule specified in the `ORDER BY` clause. -The values ​​of the input arguments must be constant within each group. - -The number of input arguments of the hypothetical set functions must match the number of sorting elements in the `WITHIN GROUP (ORDER BY )` clause. +The values ​​of the input arguments `args` must be constant within each group. An example of using hypothetical set functions without grouping: