您好,欢迎访问一九零五行业门户网

Hive 创建表

hive 创建表时正则表达式与java代码中一致,regexserde 中用\\来表示反转码
1. hive 创建表时正则表达式与java代码中一致,regexserde 中用\\来表示反转码
create table inst_cn_3 (
  ip string,
  time string,
  mac string,
  lan string,
  ver string,
  lc string,
  pn string,
  reg string,
  vrf string,
  line string)
row format serde 'org.apache.hadoop.hive.contrib.serde2.regexserde'
with serdeproperties (
  input.regex = - *([\\d\\.]+) *\\[([\\d]{2}/[\\w]+/[\\d]{4}:[\\d]{2}:[\\d]{2}:[\\d]{2}\\s+\\+[\\d]+)\\] *get */mx3/inst/([0-9a-f]{12})/ver=([\\.\\d]+),lan=(0x[\\w]+)(?:,lc=([\\w]+))(?:,pn=([\\w]+))(?:,reg=([0-1]))(?:,vrf=([\\w]+))?.*
 )
stored as textfile;
alter table inst_cn_3 add columns(line string);
alter table inst_cn_3 set serdeproperties (
  input.regex = - ([\\d\\.]+) \\[([\\d]{2}/[\\w]+/[\\d]{4}:[\\d]{2}:[\\d]{2}:[\\d]{2}\\s+\\+[\\d]+)\\] get /mx3/inst/([0-9a-f]{12})/ver=([\\.\\d]+),lan=(0x[\\w]+)(?:,lc=([\\w]+))(?:,pn=([\\w]+))(?:,reg=([0-1]))(?:,vrf=([\\w]+))?.*|(.*)
 );
select * from inst_cn_3 limit 100;
select
  line
from inst_cn_3
where
  1=1
  and mac is null
  and line is not null
  and  !(line rlike '.*unknowuser00.*')
  ;
hadoop fs -cp /mnt/nfs/log/statcn/inst/inst_cn_3.*.txt /hive/warehouse/inst_cn_3/
2011.06.09 hive时间处理
1.select
  from_unixtime(unix_timestamp('02/may/2011:00:00:00 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss') ,
  from_unixtime(unix_timestamp('02/may/2011:23:59:59 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss')  ,
  from_unixtime(unix_timestamp('03/may/2011:00:00:00 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss') ,
  from_unixtime(unix_timestamp('03/may/2011:23:59:59 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss')
 from
  stat_c_log
where
  1=1
  and partkey = '20110503'
  and logtype = 'inst_cn_3'
  and url rlike '/mx3/inst/.*'
limit
  10
;
2.
select
  from_unixtime(unix_timestamp('02/may/2011:00:00:00 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss') ,
  from_unixtime(unix_timestamp('02/may/2011:23:59:59 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss')  ,
  from_unixtime(unix_timestamp('03/may/2011:00:00:00 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss') ,
  from_unixtime(unix_timestamp('03/may/2011:23:59:59 +0800','dd/mmmmm/yyyy:hh:mm:ss z'),'yyyy-mm-dd hh:mm:ss')  ,
  round((unix_timestamp('02/may/2011:00:00:00 +0800','dd/mmmmm/yyyy:hh:mm:ss z')-4*3600)/(3600*24))*(3600*24*1000),
  round((unix_timestamp('02/may/2011:23:59:59 +0800','dd/mmmmm/yyyy:hh:mm:ss z')-4*3600)/(3600*24))*(3600*24*1000)
 from
  stat_c_log
where
  1=1
  and partkey = '20110503'
  and logtype = 'inst_cn_3'
  and url rlike '/mx3/inst/.*'
limit
  10
;
2012.03.01
  1. hive 方法注册类 functionregistry
2012.06.14
  1. set hive.cli.print.header=true; 可以设置hive shell的输出.
2012.06.26
  1. hive cdh4b2 使用arichive 对表归档后, 使用select line 对归档后的partition查询时,, 报filenotfoundexception 异常。
    https://issues.apache.org/jira/browse/mapreduce-2704 是因为combinefileinputformat constructs new path objects by converting an existing path to a uri, and then only pulling out the path part of it. this drops the scheme and host, which makes combinefileinputformat fail if the paths are on a filesystem other than the default one.
 2012.07.16
  1. explain extended hive_query; 查看运行.
2012.07.29
  1. describe formatted mock; 显示columns, location, params等.
  2. /src/ql/src/test/queries/clientpositive/
其它类似信息

推荐信息