GreenPlum分区表维护
greenplum的分区管理方式。添加分区、重命名分区、添加一个默认的分区、删除一个分区、清空一个分区、交换一个分区、分割一个分区,修改一个分区的模板等。
本文记录的比较详细,非常适合来学习。
在greenplum中,分区,是一个非常好的工具。尤其适合大表,在查询的时候,能分区裁剪,避免扫描其他无用分区。一般原则是,使用关联的主键做分布,使用时间来进行分区裁剪。这在很多情况下非常的有用。
分区也不是越多越好。一个表最终是写到文件上,对于一个带子分区的列存储模式的表,表最终写入文件的数量是:分区数 * 子分区 * 列。所以,很容易导致文件量过多,如:1000*1000*100,影响到性能。
万能法则:建新表、挪数据。如何将一个未分区的表,变成一个分区表?如果可以拆分的话,直接拆分。当遇到其他,无法直接解决的方式。都可以通过新建一张表,然后将旧表,重新插入到新表中。并更换新的表名。
以下内容,需要postgres版本10以后,(但是貌似gp集成的postgres 9版本也可以)以下内容是对表继承的抽象、封装。也能按表的继承封装来实现。
资源
本文转载自https://www.itdaan.com/blog/2018/04/20/62f3bc29b7dc3521c557b7b3e75817a.html
对原内容有删改。
正文
添加分区
备注:以下分区因为没有默认分区,故可直接添加分区。如果有默认分区,则需要参考,分隔一个分区的内容。
CREATE TABLE sales (
trans_id int,
date date,
amount decimal(9,2),
region text
)
DISTRIBUTED BY (trans_id)
PARTITION BY RANGE (date)
SUBPARTITION BY LIST (region)
SUBPARTITION TEMPLATE
( SUBPARTITION usa VALUES ('usa'),
SUBPARTITION asia VALUES ('asia'),
SUBPARTITION europe VALUES ('europe'),
DEFAULT SUBPARTITION other_regions)
(START (date '2011-01-01') INCLUSIVE
END (date '2011-03-01') EXCLUSIVE
EVERY (INTERVAL '1 month'));
-- 如果原表的建表语句有SUBPARTITION TEMPLATE,那么新加的分区还是按照原来的template
ALTER TABLE sales ADD PARTITION
START (date '2017-02-01') INCLUSIVE
END (date '2017-03-01') EXCLUSIVE;
-- 以下语句无法正常添加,提示ERROR: subpartition configuration conflicts with subpartition template
ALTER TABLE sales ADD PARTITION
START (date '2011-05-01') INCLUSIVE
END (date '2017-06-01') EXCLUSIVE
( SUBPARTITION usa VALUES ('usa'),
SUBPARTITION asia VALUES ('asia'),
SUBPARTITION europe VALUES ('europe'),
DEFAULT SUBPARTITION other_regions );
重命名分区
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-----------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_1 | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_2 | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap |
warehouse=# ALTER TABLE sales RENAME PARTITION FOR ('2011-01-01') TO jan11;
ALTER TABLE
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+---------------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_2 | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_jan11 | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_usa | table | gpadmin | heap |
(11 rows)
添加一个默认的分区
warehouse=# ALTER TABLE sales ADD DEFAULT PARTITION other;
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+---------------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_2 | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_jan11 | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_jan11_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_other | table | gpadmin | heap |
public | sales_1_prt_other_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_other_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_other_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_other_2_prt_usa | table | gpadmin | heap |
删除一个分区
-- 删除一个默认分区
warehouse=# alter table sales drop default partition;
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-----------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_1 | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_2 | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap |
-- 删除rank值为2的分区,自分区会随之一起删除
warehouse=# select tablename,partitiontablename,partitionrank,partitionlevel from pg_partitions where tablename='sales';
tablename | partitiontablename | partitionrank | partitionlevel
-----------+-----------------------------------+---------------+----------------
sales | sales_1_prt_1 | 1 | 0
sales | sales_1_prt_2 | 2 | 0
sales | sales_1_prt_1_2_prt_usa | | 1
sales | sales_1_prt_1_2_prt_asia | | 1
sales | sales_1_prt_1_2_prt_europe | | 1
sales | sales_1_prt_1_2_prt_other_regions | | 1
sales | sales_1_prt_2_2_prt_usa | | 1
sales | sales_1_prt_2_2_prt_asia | | 1
sales | sales_1_prt_2_2_prt_europe | | 1
sales | sales_1_prt_2_2_prt_other_regions | | 1
(10 rows)
warehouse=# alter table sales drop partition for(rank(2));
NOTICE: dropped partition for rank 2 for relation "sales" and its children
ALTER TABLE
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-----------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_1 | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_usa | table | gpadmin | heap |
清空一个分区
alter table sales truncate default partition;
alter table sales truncate partition for(rank(1));
-- 直接使用分区的表名称清空。
truncate tableanme;
交换一个分区
CREATE TABLE sales (id int, date date, amt decimal(10,2))
DISTRIBUTED BY (id)
PARTITION BY RANGE (date)
( PARTITION Jan16 START (date '2016-01-01') INCLUSIVE ,
PARTITION Feb16 START (date '2016-02-01') INCLUSIVE ,
PARTITION Mar16 START (date '2016-03-01') INCLUSIVE ,
PARTITION Apr16 START (date '2016-04-01') INCLUSIVE ,
PARTITION May16 START (date '2016-05-01') INCLUSIVE ,
PARTITION Jun16 START (date '2016-06-01') INCLUSIVE ,
PARTITION Jul16 START (date '2016-07-01') INCLUSIVE ,
PARTITION Aug16 START (date '2016-08-01') INCLUSIVE ,
PARTITION Sep16 START (date '2016-09-01') INCLUSIVE ,
PARTITION Oct16 START (date '2016-10-01') INCLUSIVE ,
PARTITION Nov16 START (date '2016-11-01') INCLUSIVE ,
PARTITION Dec16 START (date '2016-12-01') INCLUSIVE
END (date '2017-01-01') EXCLUSIVE );
CREATE TABLE jan12 (LIKE sales) WITH (appendonly=true);
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_apr16 | table | gpadmin | heap |
public | sales_1_prt_aug16 | table | gpadmin | heap |
public | sales_1_prt_dec16 | table | gpadmin | heap |
public | sales_1_prt_feb16 | table | gpadmin | heap |
public | sales_1_prt_jan16 | table | gpadmin | heap |
public | sales_1_prt_jul16 | table | gpadmin | heap |
public | sales_1_prt_jun16 | table | gpadmin | heap |
public | sales_1_prt_mar16 | table | gpadmin | heap |
public | sales_1_prt_may16 | table | gpadmin | heap |
public | sales_1_prt_nov16 | table | gpadmin | heap |
public | sales_1_prt_oct16 | table | gpadmin | heap |
public | sales_1_prt_sep16 | table | gpadmin | heap |
(13 rows)
-- 插入数据
warehouse=# INSERT INTO jan12 SELECT * FROM sales_1_prt_jan16;
-- 用jan12替换sales_1_prt_jan16分区
warehouse=# ALTER TABLE sales EXCHANGE PARTITION FOR (DATE '2016-01-01') WITH TABLE jan12;
NOTICE: exchanged partition "jan16" of relation "sales" with relation "jan12"
ALTER TABLE
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-------------------+-------+---------+-------------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_apr16 | table | gpadmin | heap |
public | sales_1_prt_aug16 | table | gpadmin | heap |
public | sales_1_prt_dec16 | table | gpadmin | heap |
public | sales_1_prt_feb16 | table | gpadmin | heap |
public | sales_1_prt_jan16 | table | gpadmin | append only |
public | sales_1_prt_jul16 | table | gpadmin | heap |
public | sales_1_prt_jun16 | table | gpadmin | heap |
public | sales_1_prt_mar16 | table | gpadmin | heap |
public | sales_1_prt_may16 | table | gpadmin | heap |
public | sales_1_prt_nov16 | table | gpadmin | heap |
public | sales_1_prt_oct16 | table | gpadmin | heap |
public | sales_1_prt_sep16 | table | gpadmin | heap |
(13 rows)
分割一个分区
默认分区,分割出新的分区
-- 分割2016年1月的分区,分为半个月一个分区
ALTER TABLE sales SPLIT PARTITION FOR ('2016-01-01')
AT ('2016-01-16')
INTO (PARTITION jan161to15, PARTITION jan1616to31);
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-------------------------+-------+---------+-------------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_apr16 | table | gpadmin | heap |
public | sales_1_prt_aug16 | table | gpadmin | heap |
public | sales_1_prt_dec16 | table | gpadmin | heap |
public | sales_1_prt_feb16 | table | gpadmin | heap |
public | sales_1_prt_jan1616to31 | table | gpadmin | append only |
public | sales_1_prt_jan161to15 | table | gpadmin | append only |
public | sales_1_prt_jul16 | table | gpadmin | heap |
public | sales_1_prt_jun16 | table | gpadmin | heap |
public | sales_1_prt_mar16 | table | gpadmin | heap |
public | sales_1_prt_may16 | table | gpadmin | heap |
public | sales_1_prt_nov16 | table | gpadmin | heap |
public | sales_1_prt_oct16 | table | gpadmin | heap |
public | sales_1_prt_sep16 | table | gpadmin | heap |
-- 添加默认分区
warehouse=# ALTER TABLE sales ADD DEFAULT PARTITION other;
NOTICE: CREATE TABLE will create partition "sales_1_prt_other" for table "sales"
ALTER TABLE
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-------------------------+-------+---------+-------------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_apr16 | table | gpadmin | heap |
public | sales_1_prt_aug16 | table | gpadmin | heap |
public | sales_1_prt_dec16 | table | gpadmin | heap |
public | sales_1_prt_feb16 | table | gpadmin | heap |
public | sales_1_prt_jan1616to31 | table | gpadmin | append only |
public | sales_1_prt_jan161to15 | table | gpadmin | append only |
public | sales_1_prt_jul16 | table | gpadmin | heap |
public | sales_1_prt_jun16 | table | gpadmin | heap |
public | sales_1_prt_mar16 | table | gpadmin | heap |
public | sales_1_prt_may16 | table | gpadmin | heap |
public | sales_1_prt_nov16 | table | gpadmin | heap |
public | sales_1_prt_oct16 | table | gpadmin | heap |
public | sales_1_prt_other | table | gpadmin | heap |
public | sales_1_prt_sep16 | table | gpadmin | heap |
(15 rows)
-- 从默认分区拆分一个2017年1月的分区
ALTER TABLE sales SPLIT DEFAULT PARTITION
START ('2017-01-01') INCLUSIVE
END ('2017-02-01') EXCLUSIVE
INTO (PARTITION jan17, default partition);
NOTICE: exchanged partition "other" of relation "sales" with relation "pg_temp_77047924"
NOTICE: dropped partition "other" for relation "sales"
NOTICE: CREATE TABLE will create partition "sales_1_prt_jan17" for table "sales"
NOTICE: CREATE TABLE will create partition "sales_1_prt_other" for table "sales"
ALTER TABLE
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-------------------------+-------+---------+-------------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_apr16 | table | gpadmin | heap |
public | sales_1_prt_aug16 | table | gpadmin | heap |
public | sales_1_prt_dec16 | table | gpadmin | heap |
public | sales_1_prt_feb16 | table | gpadmin | heap |
public | sales_1_prt_jan1616to31 | table | gpadmin | append only |
public | sales_1_prt_jan161to15 | table | gpadmin | append only |
public | sales_1_prt_jan17 | table | gpadmin | heap |
public | sales_1_prt_jul16 | table | gpadmin | heap |
public | sales_1_prt_jun16 | table | gpadmin | heap |
public | sales_1_prt_mar16 | table | gpadmin | heap |
public | sales_1_prt_may16 | table | gpadmin | heap |
public | sales_1_prt_nov16 | table | gpadmin | heap |
public | sales_1_prt_oct16 | table | gpadmin | heap |
public | sales_1_prt_other | table | gpadmin | heap |
public | sales_1_prt_sep16 | table | gpadmin | heap |
修改一个子分区模板
-- 创建表:
CREATE TABLE sales (trans_id int, date date, amount decimal(9,2), region text)
DISTRIBUTED BY (trans_id)
PARTITION BY RANGE (date)
SUBPARTITION BY LIST (region)
SUBPARTITION TEMPLATE
( SUBPARTITION usa VALUES ('usa'),
SUBPARTITION asia VALUES ('asia'),
SUBPARTITION europe VALUES ('europe'),
DEFAULT SUBPARTITION other_regions )
( START (date '2014-01-01') INCLUSIVE
END (date '2014-04-01') EXCLUSIVE
EVERY (INTERVAL '1 month') );
-- 修改模板:
ALTER TABLE sales SET SUBPARTITION TEMPLATE
( SUBPARTITION usa VALUES ('usa'),
SUBPARTITION asia VALUES ('asia'),
SUBPARTITION europe VALUES ('europe'),
SUBPARTITION africa VALUES ('africa'),
DEFAULT SUBPARTITION regions );
-- 新增的分区会使用修改后的模板:
ALTER TABLE sales ADD PARTITION "4"
START ('2014-04-01') INCLUSIVE
END ('2014-05-01') EXCLUSIVE ;
\dt+ sales_1_prt_4*
public | sales_1_prt_4 | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_africa | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_regions | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_usa | table | gpadmin | heap |
-- 清空模板
ALTER TABLE sales SET SUBPARTITION TEMPLATE ();
-- 添加分区报错
ALTER TABLE sales ADD PARTITION "4"
START ('2014-04-01') INCLUSIVE
END ('2014-05-01') EXCLUSIVE ;
NOTICE: CREATE TABLE will create partition "sales_1_prt_4" for table "sales"
ERROR: no partitions specified at depth 2
warehouse=# \dt sales*
List of relations
Schema | Name | Type | Owner | Storage
--------+-----------------------------------+-------+---------+---------
public | sales | table | gpadmin | heap
public | sales_1_prt_1 | table | gpadmin | heap
public | sales_1_prt_1_2_prt_asia | table | gpadmin | heap
public | sales_1_prt_1_2_prt_europe | table | gpadmin | heap
public | sales_1_prt_1_2_prt_other_regions | table | gpadmin | heap
public | sales_1_prt_1_2_prt_usa | table | gpadmin | heap
public | sales_1_prt_2 | table | gpadmin | heap
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap
public | sales_1_prt_3 | table | gpadmin | heap
public | sales_1_prt_3_2_prt_asia | table | gpadmin | heap
public | sales_1_prt_3_2_prt_europe | table | gpadmin | heap
public | sales_1_prt_3_2_prt_other_regions | table | gpadmin | heap
public | sales_1_prt_3_2_prt_usa | table | gpadmin | heap
(16 rows)
-- 指定相关子分区的值再次添加
ALTER TABLE sales ADD PARTITION "4"
START ('2014-04-01') INCLUSIVE
END ('2014-05-01') EXCLUSIVE
( SUBPARTITION usa VALUES ('usa'),
SUBPARTITION asia VALUES ('asia'),
SUBPARTITION europe VALUES ('europe'),
DEFAULT SUBPARTITION other_regions );
NOTICE: CREATE TABLE will create partition "sales_1_prt_4" for table "sales"
NOTICE: CREATE TABLE will create partition "sales_1_prt_4_2_prt_usa" for table "sales_1_prt_4"
NOTICE: CREATE TABLE will create partition "sales_1_prt_4_2_prt_asia" for table "sales_1_prt_4"
NOTICE: CREATE TABLE will create partition "sales_1_prt_4_2_prt_europe" for table "sales_1_prt_4"
NOTICE: CREATE TABLE will create partition "sales_1_prt_4_2_prt_other_regions" for table "sales_1_prt_4"
ALTER TABLE
-- 成功添加
warehouse=# \dt+ sales*
List of relations
Schema | Name | Type | Owner | Storage | Description
--------+-----------------------------------+-------+---------+---------+-------------
public | sales | table | gpadmin | heap |
public | sales_1_prt_1 | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_1_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_2 | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_2_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_3 | table | gpadmin | heap |
public | sales_1_prt_3_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_3_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_3_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_3_2_prt_usa | table | gpadmin | heap |
public | sales_1_prt_4 | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_asia | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_europe | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_other_regions | table | gpadmin | heap |
public | sales_1_prt_4_2_prt_usa | table | gpadmin | heap |
(21 rows)