createtabletest(id bigint,name string) using iceberg
写数据
INSERT
INSERT INTO test values(1,'hsm');INSERT INTO test select id ,namefrom test1 where id=1;
spark iceberg支持行级别的更新通过MERGE INTO和DELETE FROM
-- 如果匹配上了则相加,匹配不上则插入MERGEINTO test t USING(select*from updates) u ON t.id=u.idWHENMATCHEDTHENUPDATESET t.count=t.count+u.countWHENNOTMATCHEDTHENINSERT*
-- create a struct columnALTERTABLE prod.db.sampleADD COLUMN point struct<x: double, y: double>;-- add a field to the structALTERTABLE prod.db.sampleADD COLUMN point.z double
ALTERTABLE prod.db.sample RENAME COLUMN dataTO payloadALTERTABLE prod.db.sample RENAME COLUMN location.lat TO latitude
ALTER TABLE ... ALTER COLUMN
修改列的类型
ALTERTABLE prod.db.sampleALTER COLUMN measurement TYPE double
修改列的类型和描述
ALTERTABLE prod.db.sampleALTER COLUMN measurement TYPE double COMMENT 'unit is bytes per second'ALTERTABLE prod.db.sampleALTER COLUMN measurement COMMENT 'unit is kilobytes per second'
使用First和After
ALTERTABLE prod.db.sampleALTER COLUMN col FIRSTALTERTABLE prod.db.sampleALTER COLUMN nested.col AFTER other_col
设置Not NULL和删除NOT Null
ALTERTABLE prod.db.sampleALTER COLUMN id DROPNOT NULL
ALTERTABLE prod.db.sampleADDPARTITION FIELD catalog-- identity transform
分区转换也支持
ALTERTABLE prod.db.sampleADDPARTITION FIELD bucket(16, id)ALTERTABLE prod.db.sampleADDPARTITION FIELD truncate(data, 4)ALTERTABLE prod.db.sampleADDPARTITION FIELD years(ts)-- use optional AS keyword to specify a custom name for the partition field ALTERTABLE prod.db.sampleADDPARTITION FIELD bucket(16, id) AS shard
ALTERTABLE prod.db.sampleDROPPARTITION FIELD catalogALTERTABLE prod.db.sampleDROPPARTITION FIELD bucket(16, id)ALTERTABLE prod.db.sampleDROPPARTITION FIELD truncate(data, 4)ALTERTABLE prod.db.sampleDROPPARTITION FIELD years(ts)ALTERTABLE prod.db.sampleDROPPARTITION FIELD shard
ALTERTABLE prod.db.sample WRITE ORDERED BY category, id-- use optional ASC/DEC keyword to specify sort order of each field (default ASC)ALTERTABLE prod.db.sample WRITE ORDERED BY category ASC, id DESC-- use optional NULLS FIRST/NULLS LAST keyword to specify null order of each field (default FIRST)ALTERTABLE prod.db.sample WRITE ORDERED BY category ASCNULLSLAST, id DESCNULLSFIRST
// time travel to October 26, 1986 at 01:21:00spark.read .option("as-of-timestamp", "499162860000") .format("iceberg") .load("path/to/table")// time travel to snapshot with ID 10963874102873Lspark.read .option("snapshot-id",8745438249199332230L) .format("iceberg") .load("iceberg_db.test") .show(20)