数据类型
- array
# 测试数据
1 huangbo guangzhou,xianggang,shenzhen a1:30,a2:20,a3:100 beijing,112233,13522334455,500
2 xuzheng xianggang b2:50,b3:40 tianjin,223344,13644556677,600
3 wangbaoqiang beijing,zhejinag c1:200 chongqinjg,334455,15622334455,20
create table t_data_type(
id int,
name string,
work_location array<string>,
piaofang map<string,bigint>,
address struct<location:string,zipcode:int,phone:string,value:int>
) row format delimited fields terminated by "\t" collection items terminated by "," map keys terminated by ":" lines terminated by "\n";
+-----+---------------+---------------------------------------+-----------------------------+------------------------------------------------------------------------------+--+
| id | name | work_location | piaofang | address |
+-----+---------------+---------------------------------------+-----------------------------+------------------------------------------------------------------------------+--+
| 1 | huangbo | ["guangzhou","xianggang","shenzhen"] | {"a1":30,"a2":20,"a3":100} | {"location":"beijing","zipcode":112233,"phone":"13522334455","value":500} |
| 2 | xuzheng | ["xianggang"] | {"b2":50,"b3":40} | {"location":"tianjin","zipcode":223344,"phone":"13644556677","value":600} |
| 3 | wangbaoqiang | ["beijing","zhejinag"] | {"c1":200} | {"location":"chongqinjg","zipcode":334455,"phone":"15622334455","value":20} |
+-----+---------------+---------------------------------------+-----------------------------+------------------------------------------------------------------------------+--+
select work_location[0] from t_data_type;
+-------------------+--+
| work_location[0] |
+-------------------+--+
| guangzhou |
| xianggang |
| beijing |
+-------------------+--+
- map
select piaofang,piaofang["a1"] from t_data_type;
+-----------------------------+---------------+--+
| piaofang | piaofang[a1] |
+-----------------------------+---------------+--+
| {"a1":30,"a2":20,"a3":100} | 30 |
| {"b2":50,"b3":40} | NULL |
| {"c1":200} | NULL |
+-----------------------------+---------------+--+
特殊分隔符处理
hive 读取数据的机制:
- 首先用 InputFormat<默认是:org.apache.hadoop.mapred.TextInputFormat >的一个具体实 现类读入文件数据,返回一条一条的记录(可以是行,或者是你逻辑中的“行”)
- 然后利用 SerDe<默认:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe>的一个具体 实现类,对上面返回的一条一条的记录进行字段切割
Hive 对文件中字段的分隔符默认情况下只支持单字节分隔符,对于多字符,可以参考使用RegexSerDe正则表达式解析
create table t_bi_reg(id string,name string)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'with serdeproperties('input.regex'='(.*)\\|\\|(.*)','output.format.string'='%1$s %2$s');
01||huangbo
02||xuzheng
03||wangbaoqiang