Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/iceberg/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,9 @@ nlohmann::json ToJson(const Type& type) {
nlohmann::json ToJson(const Schema& schema) {
nlohmann::json json = ToJson(static_cast<const Type&>(schema));
SetOptionalField(json, kSchemaId, schema.schema_id());
// TODO(gangwu): add identifier-field-ids.
if (!schema.IdentifierFieldIds().empty()) {
json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
}
return json;
}

Expand Down Expand Up @@ -473,8 +475,19 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json) {
return JsonParseError("Schema must be a struct type, but got {}", SafeDumpJson(json));
}

// Parse identifier-field-ids if present
ICEBERG_ASSIGN_OR_RAISE(
auto identifier_field_ids,
GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));

auto& struct_type = static_cast<StructType&>(*type);
return FromStructType(std::move(struct_type), schema_id);
std::vector<SchemaField> fields;
fields.reserve(struct_type.fields().size());
for (auto& field : struct_type.fields()) {
fields.emplace_back(std::move(field));
}
return std::make_unique<Schema>(std::move(fields), schema_id,
std::move(identifier_field_ids));
}

nlohmann::json ToJson(const PartitionField& partition_field) {
Expand Down
3 changes: 2 additions & 1 deletion src/iceberg/test/metadata_serde_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
SchemaField::MakeRequired(2, "y", int64()),
SchemaField::MakeRequired(3, "z", int64())},
/*schema_id=*/1);
/*schema_id=*/1,
/*identifier_field_ids=*/std::vector<int32_t>{1, 2});

auto expected_spec_result = PartitionSpec::Make(
/*spec_id=*/0,
Expand Down
47 changes: 47 additions & 0 deletions src/iceberg/test/schema_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,51 @@ TEST(SchemaJsonTest, RoundTrip) {
ASSERT_EQ(dumped_json, json);
}

TEST(SchemaJsonTest, IdentifierFieldIds) {
// Test schema with identifier-field-ids
constexpr std::string_view json_with_identifiers =
R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"data","required":false,"type":"string"}],"identifier-field-ids":[1],"schema-id":1,"type":"struct"})";

// Deserialize
auto from_json_result = SchemaFromJson(nlohmann::json::parse(json_with_identifiers));
ASSERT_TRUE(from_json_result.has_value());
auto schema = std::move(from_json_result.value());
ASSERT_EQ(schema->fields().size(), 2);
ASSERT_EQ(schema->schema_id(), 1);
ASSERT_EQ(schema->IdentifierFieldIds().size(), 1);
ASSERT_EQ(schema->IdentifierFieldIds()[0], 1);

// Serialize back
auto serialized_json = ToJson(*schema).dump();
ASSERT_EQ(serialized_json, json_with_identifiers);

// Test schema without identifier-field-ids
constexpr std::string_view json_without_identifiers =
R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},{"id":2,"name":"name","required":false,"type":"string"}],"schema-id":1,"type":"struct"})";

auto from_json_result2 =
SchemaFromJson(nlohmann::json::parse(json_without_identifiers));
ASSERT_TRUE(from_json_result2.has_value());
auto schema2 = std::move(from_json_result2.value());
ASSERT_TRUE(schema2->IdentifierFieldIds().empty());

// Serialize - should not include identifier-field-ids field
auto serialized_json2 = ToJson(*schema2).dump();
ASSERT_EQ(serialized_json2, json_without_identifiers);

// Test schema with multiple identifier fields
constexpr std::string_view json_multi_identifiers =
R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},{"id":2,"name":"org_id","required":true,"type":"long"},{"id":3,"name":"data","required":false,"type":"string"}],"identifier-field-ids":[1,2],"schema-id":2,"type":"struct"})";

auto from_json_result3 = SchemaFromJson(nlohmann::json::parse(json_multi_identifiers));
ASSERT_TRUE(from_json_result3.has_value());
auto schema3 = std::move(from_json_result3.value());
ASSERT_EQ(schema3->IdentifierFieldIds().size(), 2);
ASSERT_EQ(schema3->IdentifierFieldIds()[0], 1);
ASSERT_EQ(schema3->IdentifierFieldIds()[1], 2);

auto serialized_json3 = ToJson(*schema3).dump();
ASSERT_EQ(serialized_json3, json_multi_identifiers);
}

} // namespace iceberg
Loading