欢迎您访问365答案网,请分享给你的朋友!
生活常识 学习资料

iceberg系列(2):存储详解-partition-2

时间:2023-05-13

下面查看V2格式下表的格式
创建一张表

CREATE TABLE local.db.sampleV2 ( id bigint, data string, category string) USING iceberg PARTITIonED BY (category)TBLPROPERTIES ('format-version'='2');

查看表结构文件:

{ "format-version" : 2, "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809", "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2", "last-sequence-number" : 0, "last-updated-ms" : 1642173468635, "last-column-id" : 3, "current-schema-id" : 0, "schemas" : [ { "type" : "struct", "schema-id" : 0, "fields" : [ { "id" : 1, "name" : "id", "required" : false, "type" : "long" }, { "id" : 2, "name" : "data", "required" : false, "type" : "string" }, { "id" : 3, "name" : "category", "required" : false, "type" : "string" } ] } ], "default-spec-id" : 0, "partition-specs" : [ { "spec-id" : 0, "fields" : [ { "name" : "category", "transform" : "identity", "source-id" : 3, "field-id" : 1000 } ] } ], "last-partition-id" : 1000, "default-sort-order-id" : 0, "sort-orders" : [ { "order-id" : 0, "fields" : [ ] } ], "properties" : { "owner" : "liliwei" }, "current-snapshot-id" : -1, "snapshots" : [ ], "snapshot-log" : [ ], "metadata-log" : [ ]}

插入数据:

insert into local.db.sampleV2 values(1,'a','1');

查看manifest list文件

(base) ➜ metadata tree -l.├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro├── v1.metadata.json├── v2.metadata.json└── version-hint.text0 directories, 5 files

java -jar ~/plat/tools/avro-tools-1.10.2.jar tojson snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro

{ "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro", "manifest_length": 6833, "partition_spec_id": 0, "content": 0, "sequence_number": 1, "min_sequence_number": 1, "added_snapshot_id": 1504400791559924261, "added_data_files_count": 1, "existing_data_files_count": 0, "deleted_data_files_count": 0, "added_rows_count": 1, "existing_rows_count": 0, "deleted_rows_count": 0, "partitions": { "array": [{ "contains_null": false, "contains_nan": { "boolean": false }, "lower_bound": { "bytes": "1" }, "upper_bound": { "bytes": "1" } }] }}

进行变更

ALTER TABLE local.db.sampleV2 ADD PARTITION FIELD data;

查看目录结构 :

(base) ➜ metadata tree -l.├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro├── v1.metadata.json├── v2.metadata.json├── v3.metadata.json└── version-hint.text0 directories, 6 files

查看v3内容:

{ "format-version" : 2, "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809", "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2", "last-sequence-number" : 1, "last-updated-ms" : 1642176605638, "last-column-id" : 3, "current-schema-id" : 0, "schemas" : [ { "type" : "struct", "schema-id" : 0, "fields" : [ { "id" : 1, "name" : "id", "required" : false, "type" : "long" }, { "id" : 2, "name" : "data", "required" : false, "type" : "string" }, { "id" : 3, "name" : "category", "required" : false, "type" : "string" } ] } ], "default-spec-id" : 1, "partition-specs" : [ { "spec-id" : 0, "fields" : [ { "name" : "category", "transform" : "identity", "source-id" : 3, "field-id" : 1000 } ] }, { "spec-id" : 1, "fields" : [ { "name" : "category", "transform" : "identity", "source-id" : 3, "field-id" : 1000 }, { "name" : "data", "transform" : "identity", "source-id" : 2, "field-id" : 1001 } ] } ], "last-partition-id" : 1001, "default-sort-order-id" : 0, "sort-orders" : [ { "order-id" : 0, "fields" : [ ] } ], "properties" : { "owner" : "liliwei" }, "current-snapshot-id" : 1504400791559924261, "snapshots" : [ { "sequence-number" : 1, "snapshot-id" : 1504400791559924261, "timestamp-ms" : 1642176476606, "summary" : { "operation" : "append", "spark.app.id" : "local-1642173017469", "added-data-files" : "1", "added-records" : "1", "added-files-size" : "874", "changed-partition-count" : "1", "total-records" : "1", "total-files-size" : "874", "total-data-files" : "1", "total-delete-files" : "0", "total-position-deletes" : "0", "total-equality-deletes" : "0" }, "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro", "schema-id" : 0 } ], "snapshot-log" : [ { "timestamp-ms" : 1642176476606, "snapshot-id" : 1504400791559924261 } ], "metadata-log" : [ { "timestamp-ms" : 1642173468635, "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json" }, { "timestamp-ms" : 1642176476606, "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json" } ]}

插入数据:

insert into local.db.sampleV2 values(2,'b','2');

查看目录结构:

(base) ➜ metadata tree -l.├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro├── 3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro├── snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro├── v1.metadata.json├── v2.metadata.json├── v3.metadata.json├── v4.metadata.json└── version-hint.text0 directories, 9 files

查看v4文件:

{ "format-version" : 2, "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809", "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2", "last-sequence-number" : 2, "last-updated-ms" : 1642176734997, "last-column-id" : 3, "current-schema-id" : 0, "schemas" : [ { "type" : "struct", "schema-id" : 0, "fields" : [ { "id" : 1, "name" : "id", "required" : false, "type" : "long" }, { "id" : 2, "name" : "data", "required" : false, "type" : "string" }, { "id" : 3, "name" : "category", "required" : false, "type" : "string" } ] } ], "default-spec-id" : 1, "partition-specs" : [ { "spec-id" : 0, "fields" : [ { "name" : "category", "transform" : "identity", "source-id" : 3, "field-id" : 1000 } ] }, { "spec-id" : 1, "fields" : [ { "name" : "category", "transform" : "identity", "source-id" : 3, "field-id" : 1000 }, { "name" : "data", "transform" : "identity", "source-id" : 2, "field-id" : 1001 } ] } ], "last-partition-id" : 1001, "default-sort-order-id" : 0, "sort-orders" : [ { "order-id" : 0, "fields" : [ ] } ], "properties" : { "owner" : "liliwei" }, "current-snapshot-id" : 506027699712535420, "snapshots" : [ { "sequence-number" : 1, "snapshot-id" : 1504400791559924261, "timestamp-ms" : 1642176476606, "summary" : { "operation" : "append", "spark.app.id" : "local-1642173017469", "added-data-files" : "1", "added-records" : "1", "added-files-size" : "874", "changed-partition-count" : "1", "total-records" : "1", "total-files-size" : "874", "total-data-files" : "1", "total-delete-files" : "0", "total-position-deletes" : "0", "total-equality-deletes" : "0" }, "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro", "schema-id" : 0 }, { "sequence-number" : 2, "snapshot-id" : 506027699712535420, "parent-snapshot-id" : 1504400791559924261, "timestamp-ms" : 1642176734997, "summary" : { "operation" : "append", "spark.app.id" : "local-1642173017469", "added-data-files" : "1", "added-records" : "1", "added-files-size" : "874", "changed-partition-count" : "1", "total-records" : "2", "total-files-size" : "1748", "total-data-files" : "2", "total-delete-files" : "0", "total-position-deletes" : "0", "total-equality-deletes" : "0" }, "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro", "schema-id" : 0 } ], "snapshot-log" : [ { "timestamp-ms" : 1642176476606, "snapshot-id" : 1504400791559924261 }, { "timestamp-ms" : 1642176734997, "snapshot-id" : 506027699712535420 } ], "metadata-log" : [ { "timestamp-ms" : 1642173468635, "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json" }, { "timestamp-ms" : 1642176476606, "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json" }, { "timestamp-ms" : 1642176605638, "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v3.metadata.json" } ]}

查看manifest list文件:

{ "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro", "manifest_length": 7036, "partition_spec_id": 1, "content": 0, "sequence_number": 2, "min_sequence_number": 2, "added_snapshot_id": 506027699712535420, "added_data_files_count": 1, "existing_data_files_count": 0, "deleted_data_files_count": 0, "added_rows_count": 1, "existing_rows_count": 0, "deleted_rows_count": 0, "partitions": { "array": [{ "contains_null": false, "contains_nan": { "boolean": false }, "lower_bound": { "bytes": "2" }, "upper_bound": { "bytes": "2" } }, { "contains_null": false, "contains_nan": { "boolean": false }, "lower_bound": { "bytes": "b" }, "upper_bound": { "bytes": "b" } }] }} { "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro", "manifest_length": 6833, "partition_spec_id": 0, "content": 0, "sequence_number": 1, "min_sequence_number": 1, "added_snapshot_id": 1504400791559924261, "added_data_files_count": 1, "existing_data_files_count": 0, "deleted_data_files_count": 0, "added_rows_count": 1, "existing_rows_count": 0, "deleted_rows_count": 0, "partitions": { "array": [{ "contains_null": false, "contains_nan": { "boolean": false }, "lower_bound": { "bytes": "1" }, "upper_bound": { "bytes": "1" } }] }}

Copyright © 2016-2020 www.365daan.com All Rights Reserved. 365答案网 版权所有 备案号:

部分内容来自互联网,版权归原作者所有,如有冒犯请联系我们,我们将在三个工作时内妥善处理。