Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,6 @@ BUCKAROO_DEPS
# Vim
*.swp
*.swo

# clangd cache
/.cache/clangd
10 changes: 10 additions & 0 deletions clickhouse/columns/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,26 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti

case TypeAst::Tuple: {
std::vector<ColumnRef> columns;
std::vector<std::string> names;

columns.reserve(ast.elements.size());
names.reserve(ast.elements.size());
bool any_named = false;
for (const auto& elem : ast.elements) {
if (auto col = CreateColumnFromAst(elem, settings)) {
columns.push_back(col);
names.push_back(elem.element_name);
if (!elem.element_name.empty()) {
any_named = true;
}
} else {
return nullptr;
}
}

if (any_named) {
return std::make_shared<ColumnTuple>(columns, std::move(names));
}
return std::make_shared<ColumnTuple>(columns);
}

Expand Down
53 changes: 48 additions & 5 deletions clickhouse/columns/tuple.cpp
Original file line number Diff line number Diff line change
@@ -1,33 +1,67 @@
#include "tuple.h"

namespace clickhouse {
namespace {

static std::vector<TypeRef> CollectTypes(const std::vector<ColumnRef>& columns) {
std::vector<TypeRef> CollectTypes(const std::vector<ColumnRef>& columns) {
std::vector<TypeRef> types;
for (const auto& col : columns) {
types.push_back(col->Type());
}
return types;
}

/// Tuple types can be appended if they have the same shape.
bool CanAppendType(const TypeRef& destination_type, const TypeRef& source_type) {
if (destination_type->GetCode() != Type::Tuple || source_type->GetCode() != Type::Tuple) {
return destination_type->IsEqual(source_type);
}

const auto* destination_tuple = destination_type->As<TupleType>();
const auto* source_tuple = source_type->As<TupleType>();

const auto destination_item_types = destination_tuple->GetTupleType();
const auto source_item_types = source_tuple->GetTupleType();
if (destination_item_types.size() != source_item_types.size()) {
return false;
}

for (size_t i = 0; i < destination_item_types.size(); ++i) {
if (!CanAppendType(destination_item_types[i], source_item_types[i])) {
return false;
}
}

return true;
}

}

ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns)
: Column(Type::CreateTuple(CollectTypes(columns)))
, columns_(columns)
{
}

ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns,
std::vector<std::string> names)
: Column(Type::CreateTuple(CollectTypes(columns), std::move(names)))
, columns_(columns)
{
}

size_t ColumnTuple::TupleSize() const {
return columns_.size();
}

void ColumnTuple::Reserve(size_t new_cap) {
for (auto& column : columns_) {
column->Reserve(new_cap);
}
}
}

void ColumnTuple::Append(ColumnRef column) {
if (!this->Type()->IsEqual(column->Type())) {
if (!CanAppendType(this->Type(), column->Type())) {
throw ValidationError(
"can't append column of type " + column->Type()->GetName() + " "
"to column type " + this->Type()->GetName());
Expand All @@ -37,6 +71,7 @@ void ColumnTuple::Append(ColumnRef column) {
columns_[ci]->Append((*source_tuple_column)[ci]);
}
}

size_t ColumnTuple::Size() const {
return columns_.empty() ? 0 : columns_[0]->Size();
}
Expand All @@ -48,7 +83,11 @@ ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const {
sliced_columns.push_back(column->Slice(begin, len));
}

return std::make_shared<ColumnTuple>(sliced_columns);
const auto& names = this->Type()->As<TupleType>()->GetItemNames();
if (names.empty()) {
return std::make_shared<ColumnTuple>(sliced_columns);
}
return std::make_shared<ColumnTuple>(sliced_columns, names);
}

ColumnRef ColumnTuple::CloneEmpty() const {
Expand All @@ -59,7 +98,11 @@ ColumnRef ColumnTuple::CloneEmpty() const {
result_columns.push_back(column->CloneEmpty());
}

return std::make_shared<ColumnTuple>(result_columns);
const auto& names = this->Type()->As<TupleType>()->GetItemNames();
if (names.empty()) {
return std::make_shared<ColumnTuple>(result_columns);
}
return std::make_shared<ColumnTuple>(result_columns, names);
}

bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) {
Expand Down
2 changes: 2 additions & 0 deletions clickhouse/columns/tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ namespace clickhouse {
class ColumnTuple : public Column {
public:
ColumnTuple(const std::vector<ColumnRef>& columns);
ColumnTuple(const std::vector<ColumnRef>& columns,
std::vector<std::string> names);

/// Returns count of columns in the tuple.
size_t TupleSize() const;
Expand Down
8 changes: 8 additions & 0 deletions clickhouse/types/type_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ bool TypeAst::operator==(const TypeAst & other) const {
return meta == other.meta
&& code == other.code
&& name == other.name
&& element_name == other.element_name
&& value == other.value
&& value_string == other.value_string
&& std::equal(elements.begin(), elements.end(), other.elements.begin(), other.elements.end());
}

Expand Down Expand Up @@ -167,6 +169,12 @@ bool TypeParser::Parse(TypeAst* type) {
break;
}
case Token::Name:
if (!type_->name.empty()) {
// A second Name token on the same element means the
// previous one was a field name in a named-tuple element
// (e.g. "a" in "Tuple(a Int32, …)").
type_->element_name = std::move(type_->name);
}
type_->meta = GetTypeMeta(token.value);
type_->name = token.value.to_string();
type_->code = GetTypeCode(type_->name);
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/types/type_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ struct TypeAst {
/// Type's name.
/// Need to cache TypeAst, so can't use StringView for name.
std::string name;
/// Name of this element inside its parent (e.g. field name inside a named
/// Tuple). Empty for unnamed elements.
std::string element_name;
Comment on lines +34 to +36
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would make more sense to add a vector here, called all_names, store both the name and the type in it, and keep the name field set to the last value as it currently works.

/// Type's name.
/// Need to cache TypeAst, so can't use StringView for name.
std::string name;
/// List of all names assigned to the element, for example 
/// named tuples, have name and type, `Tuple(i Int64, s String)`.
std::vector<std::string> all_names

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I dont really like that, because it

  • Creates two spots containing the same information
  • Does not make it clear through the type-system/name that the entries in all_names are rather different entities (field name, vs type name)

/// Value associated with the node,
/// used for fixed-width types and enum values.
int64_t value = 0;
Expand Down
32 changes: 25 additions & 7 deletions clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,9 @@ TypeRef Type::CreateString(size_t n) {
return TypeRef(new FixedStringType(n));
}

TypeRef Type::CreateTuple(const std::vector<TypeRef>& item_types) {
return TypeRef(new TupleType(item_types));
TypeRef Type::CreateTuple(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names) {
return TypeRef(new TupleType(item_types, std::move(item_names)));
}

TypeRef Type::CreateEnum8(const std::vector<EnumItem>& enum_items) {
Expand Down Expand Up @@ -442,9 +443,17 @@ FixedStringType::FixedStringType(size_t n) : Type(FixedString), size_(n) {
NullableType::NullableType(TypeRef nested_type) : Type(Nullable), nested_type_(nested_type) {
}

/// class TupleType

TupleType::TupleType(const std::vector<TypeRef>& item_types) : Type(Tuple), item_types_(item_types) {
TupleType::TupleType(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names)
: Type(Tuple), item_types_(item_types), item_names_(std::move(item_names)) {
if (!item_names_.empty() && item_names_.size() != item_types_.size()) {
throw ValidationError("Tuple field names count doesn't match tuple element count");
}
for (const auto& item_name : item_names_) {
if (item_name.empty()) {
throw ValidationError("Tuple field names can't be empty");
}
}
}

/// class LowCardinalityType
Expand All @@ -456,13 +465,22 @@ LowCardinalityType::~LowCardinalityType() {

std::string TupleType::GetName() const {
Comment thread
IyeOnline marked this conversation as resolved.
std::string result("Tuple(");
bool has_complete_names = !item_names_.empty();

if (!item_types_.empty()) {
result += item_types_[0]->GetName();
if (has_complete_names) {
result += item_names_[0] + " " + item_types_[0]->GetName();
} else {
result += item_types_[0]->GetName();
}
}

for (size_t i = 1; i < item_types_.size(); ++i) {
result += ", " + item_types_[i]->GetName();
if (has_complete_names) {
result += ", " + item_names_[i] + " " + item_types_[i]->GetName();
} else {
result += ", " + item_types_[i]->GetName();
}
}

result += ")";
Expand Down
11 changes: 9 additions & 2 deletions clickhouse/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ class Type {

static TypeRef CreateString(size_t n);

static TypeRef CreateTuple(const std::vector<TypeRef>& item_types);
static TypeRef CreateTuple(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names = {});

Comment thread
IyeOnline marked this conversation as resolved.
static TypeRef CreateEnum8(const std::vector<EnumItem>& enum_items);

Expand Down Expand Up @@ -292,15 +293,21 @@ class NullableType : public Type {

class TupleType : public Type {
public:
explicit TupleType(const std::vector<TypeRef>& item_types);
explicit TupleType(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names = {});

std::string GetName() const;

/// Type of nested Tuple element type.
std::vector<TypeRef> GetTupleType() const { return item_types_; }

/// Field names for named tuples. Same length as GetTupleType() when
/// populated, or empty when the tuple has no field names.
const std::vector<std::string>& GetItemNames() const { return item_names_; }

private:
std::vector<TypeRef> item_types_;
std::vector<std::string> item_names_;
};

class LowCardinalityType : public Type {
Expand Down
83 changes: 83 additions & 0 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,89 @@ TEST(ColumnsCase, TupleAppend){
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "2");
}

TEST(ColumnsCase, TupleAppendWithSameFieldNames){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"a", "b"});
auto tuple2 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"a", "b"});
(*tuple1)[0]->As<ColumnUInt64>()->Append(2u);
(*tuple1)[1]->As<ColumnString>()->Append("2");
tuple2->Append(tuple1);

ASSERT_EQ((*tuple2)[0]->As<ColumnUInt64>()->At(0), 2u);
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "2");
}

TEST(ColumnsCase, TupleAppendUnnamedSourceIntoNamedDestination){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}));
auto tuple2 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"a", "b"});
(*tuple1)[0]->As<ColumnUInt64>()->Append(2u);
(*tuple1)[1]->As<ColumnString>()->Append("2");
tuple2->Append(tuple1);

ASSERT_EQ((*tuple2)[0]->As<ColumnUInt64>()->At(0), 2u);
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "2");
}

TEST(ColumnsCase, TupleAppendWithDifferentFieldNames){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"x", "y"});
auto tuple2 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"a", "b"});

(*tuple1)[0]->As<ColumnUInt64>()->Append(2u);
(*tuple1)[1]->As<ColumnString>()->Append("2");
tuple2->Append(tuple1);

ASSERT_EQ((*tuple2)[0]->As<ColumnUInt64>()->At(0), 2u);
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "2");
}

TEST(ColumnsCase, TupleAppendNamedSourceIntoUnnamedDestination){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}), std::vector<std::string>{"a", "b"});
auto tuple2 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}));

(*tuple1)[0]->As<ColumnUInt64>()->Append(2u);
(*tuple1)[1]->As<ColumnString>()->Append("2");
tuple2->Append(tuple1);

ASSERT_EQ((*tuple2)[0]->As<ColumnUInt64>()->At(0), 2u);
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "2");
}

TEST(ColumnsCase, TupleAppendRejectsIncompatibleStructure){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnUInt64>()
}));
auto tuple2 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
std::make_shared<ColumnString>()
}));

EXPECT_THROW(tuple2->Append(tuple1), ValidationError);
}

TEST(ColumnsCase, TupleSlice){
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({
std::make_shared<ColumnUInt64>(),
Expand Down
Loading
Loading