1.最基础啥都不带的建表
create table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')
2.添加分隔符,以逗号","分割
create table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')row format delimited fields terminated by ','
3.创建分区表
创建单一分区
create table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','
创建多分区
create table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','
4.创建外部表(默认内表)EXTERNAL
create external table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','
5.创建几种压缩格式的表
普通默认text文本格式,没有压缩
create external table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','
parquet格式,以及压缩
--parquet格式,无压缩(不设置压缩也会有默认的压缩,需要看配置,正常默认就是snappy)--lzo,gzip,snappy,uncompressedcreate external table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','STORED AS PARQUET; --指定parquet格式--parquet格式,设置压缩create external table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','STORED AS PARQUET --指定parquet格式TBLPROPERTIES('parquet.compression'='SNAPPY');--指定压缩为snappy--如果原来创建表的时候没有指定压缩,后续可以通过修改表属性的方式添加压缩:ALTER TABLE tabname SET TBLPROPERTIES ('parquet.compression'='SNAPPY');--或者在写入的时候set parquet.compression=SNAPPY;--不过只会影响后续入库的数据,原来的数据不会被压缩,需要重跑原来的数据。--采用压缩之后大概可以降低1/3的存储大小
ORC格式,必须有分桶
create table if not exists user( id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')PARTITIonED BY( year string COMMENT 'year=yyyy', month string COMMENT 'month=yyyy-MM', day string COMMENT 'day=yyyy-MM-dd')row format delimited fields terminated by ','CLUSTERED BY (id)INTO 5 BUCKETSstored as orc TBLPROPERTIES('transactional'='false');
6.创建Hbase映射表
create table if not exists user( row string COMMENT 'hbase rowkey', id string COMMENT 'ID', name string COMMENT '名字', age string COMMENT '年龄')STORED BY 'org.apache.hadoop.hive.hbase.HbaseStorageHandler'WITH SERDEPROPERTIES ("hbase.columns.mapping" =":key,f:id,f:name,f:age")TBLPROPERTIES ("hbase.table.name"= "habse_user_tb");--f 为hbase列族
7.复制表格式
create table if not exists user_temp like user;
8.复制表结构和数据
create table if not exists user_temp asselect * from user;