<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.43.50:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hadoop</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>xxxx</value> <description>password to use against metastore database</description> </property> </configuration>
use rel; create table student_info( student_id string comment '学号', name string comment '姓名', age int comment '年龄', origin string comment '地域' ) comment '学生信息表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as textfile;
hive> load data local inpath '/home/hadoop/apps/hive_test_data/student_info_data.txt' into table student_info; Loading data to table rel.student_info Table rel.student_info stats: [numFiles=1, totalSize=341] OK Time taken: 1.144 seconds
hive> load data inpath '/student_info_data.txt' overwrite into table student_info; Loading data to table rel.student_info Table rel.student_info stats: [numFiles=1, numRows=0, totalSize=341, rawDataSize=0] OK Time taken: 0.41 seconds
10.Hive的数据类型
字段名
类型
注释
user_id
string
用户ID
salary
int
工资
worked_citys
array
工作过的城市
social_security
map
社保缴费情况(养老,医保)
wealfare
struct
福利(吃饭补助(float),是否转正(boolean),商业保险(float)
创建员工表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
hive> create table rel.employee( > user_id string, > salary int, > worked_citys array<string>, > social_security map<string,float>, > welfare struct<meal_allowance:float,if_regular:boolean,commercial_insurance:float> > ) > row format delimited fields terminated by '\t' > collection items terminated by ',' > map keys terminated by ':' > lines terminated by '\n' > stored as textfile; OK Time taken: 0.212 seconds hive> show tables; OK employee student_info Time taken: 0.057 seconds, Fetched: 2 row(s)
从本地加载数据到表employee
1 2 3 4 5 6 7 8 9 10 11
hive> load data local inpath '/home/hadoop/apps/hive_test_data/employee_data.txt' into table employee; Loading data to table rel.employee Table rel.employee stats: [numFiles=1, totalSize=206] OK Time taken: 0.388 seconds hive> select * from employee; OK zhangsan 10800 ["beijing","shanghai"] {"养老":1000.0,"医疗":600.0} {"meal_allowance":2000.0,"if_regular":true,"commercial_insurance":500.0} lisi 20000 ["beijing","nanjing"] {"养老":2000.0,"医疗":1200.0} {"meal_allowance":2000.0,"if_regular":false,"commercial_insurance":500.0} wangwu 17000 ["shanghai","nanjing"] {"养老":1800.0,"医疗":1100.0} {"meal_allowance":2000.0,"if_regular":true,"commercial_insurance":500.0} Time taken: 0.127 seconds, Fetched: 3 row(s)
查询已转正的员工编号,工资,工作过的第一个城市,社保养老缴费情况,福利餐补金额
1 2 3 4 5 6 7
select user_id, salary, worked_citys[0], social_security['养老'], welfare.meal_allowance from rel.employee where welfare.if_regular=true;
insert into table student_school_info_partition partition(school_year='2017') select t1.student_id,t1.name,t1.institute_id,t1.major_id from student_school_info t1 where t1.school_year=2017;
13.查看分区
1 2 3 4
hive> show partitions student_school_info_partition; OK school_year=2017 Time taken: 0.191 seconds, Fetched: 1 row(s)
hive> select * from student_school_info_partition where school_year='2017'; OK 1 xiaoming information software 2017 2 xiaobai information computer 2017 3 zhangfei information computer 2017 4 likui information bigdata 2017 5 zhaoyun information bigdata 2017 6 zhangsan information software 2017 7 lisi information bigdata 2017 8 wangwu information computer 2017 Time taken: 0.226 seconds, Fetched: 8 row(s)
14.删除分区
1 2 3 4
hive> alter table student_school_info_partition drop partition (school_year='2017'); Dropped the partition school_year=2017 OK Time taken: 0.71 seconds
15.使用动态分区添加数据
1 2 3 4 5
set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table student_school_info_partition partition(school_year) select t1.student_id,t1.name,t1.institute_id,t1.major_id,t1.school_year from student_school_info t1
查看分区
1 2 3 4
hive> show partitions student_school_info_partition; OK school_year=2017 Time taken: 0.12 seconds, Fetched: 1 row(s)
hive> alter table student_school_info_external_partition add partition(school_year='2017'); OK Time taken: 0.111 seconds
1 2 3 4 5 6 7 8 9 10 11
hive> select * from student_school_info_external_partition; OK 1 xiaoming information software 2017 2 xiaobai information computer 2017 3 zhangfei information computer 2017 4 likui information bigdata 2017 5 zhaoyun information bigdata 2017 6 zhangsan information software 2017 7 lisi information bigdata 2017 8 wangwu information computer 2017 Time taken: 0.127 seconds, Fetched: 8 row(s)
删除分区
1 2 3 4
hive> alter table student_school_info_external_partition drop partition(school_year='2017'); Dropped the partition school_year=2017 OK Time taken: 0.19 seconds
查看分区,分区已经被删除
1 2 3
hive> show partitions student_school_info_external_partition; OK Time taken: 0.168 seconds