工作日常整理大数据分区创建表实例

来源:互联网 发布:魔剑生死棋刘依依知乎 编辑:程序博客网 时间:2024/06/18 15:57

查询表结构

desc formatted dm_hrssgps.hrss_clock_log_data;


----创建有分区的表结构

CREATE EXTERNAL TABLE `hrss_clock_log_data`(
  `clockid` int, 
  `empcode` string, 
  `empname` string, 
  `orgid` bigint, 
  `orgcode` string, 
  `orgname` string, 
  `posname` string, 
  `posid` bigint, 
  `equipmentno` string, 
  `clocktime` string, 
  `longitude` double, 
  `latitude` double, 
  `runid` string, 
  `addressid` bigint, 
  `addressname` string, 
  `status` int, 
  `remark` string, 
  `outsideclockaddress` string, 
  `macaddress` string, 
  `persontype` int, 
  `syncstatus` int, 
  `synctm` string, 
  `brandtype` string, 
  `clversion` string, 
  `reserve2` string, 
  `reserve3` string)
comment '打卡报表'
partitioned by(inc_day string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
  'hdfs://sfbdp1/hive/warehouse/dm/dm_hrssgps/hrss_clock_log_data';


--同步考勤记录

注解:基础表如果是分区,那查询也按分区查,其它可以不加分区时间查询

insert overwrite table dm_hrssgps.hrss_clock_log_data  partition(inc_day='${yyyymmdd}') 
    SELECT 
   L.ID as clockId,
L.EMP_CODE AS empCode,
u.EMP_NAME As empName,
u.org_id As orgId,
tmo.ORG_CODE as orgCode,
tmo.ORG_NAME as orgName,
tmp.POSNAME as  posname,
tmp.POSID as POSID,
L.EQUIPMENT_NO AS equipmentNo,
L.CLOCK_TIME AS clockTime,
L.LONGITUDE AS longitude,
L.LATITUDE AS latitude,
L.RUN_ID AS runId,
L.ADDRESS_ID AS addressId,
A.NAME AS addressName,
L.STATUS AS status,
L.REMARK AS remark,
L.OUTSIDE_CLOCK_ADDRESS AS outsideClockAddress,
L.MAC_ADDRESS AS macAddress,
L.PERSON_TYPE AS personType,
L.SYNC_STATUS AS syncStatus,
L.SYNC_TM AS syncTm,
L.BRAND_TYPE As brandType,
        L.RESERVE1 AS clVersion
FROM  ods_hrssgps.TT_CLOCK_LOG L
left join  ods_hrssgps.tm_employee u on u.EMP_CODE=L.EMP_CODE
left join  ods_hrssgps.tm_org tmo on tmo.ORG_ID=u.ORG_ID
left join  ods_hrssgps.tm_position tmp on  tmp.POSID=u.POSITION_ID
left join  ods_hrssgps.TM_CLOCK_ADDRESS A on A.ID = L.ADDRESS_ID
WHERE  L.CLOCK_TIME = '${yesterday}'


后边按分区查询

left join  ods_hrssgps.tm_employee u on u.emp_code=L.emp_code and u.inc_day='${yyyymmdd}'
left join  ods_hrssgps.tm_org tmo on tmo.org_id=u.org_id  and tmo.inc_day='${yyyymmdd}'
left join  ods_hrssgps.tm_position tmp on  tmp.posid=u.position_id  and tmp.inc_day='${yyyymmdd}'
left join  ods_hrssgps.TM_CLOCK_ADDRESS A on A.id = L.address_id  and A.inc_day='${yyyymmdd}'
WHERE day(L.clock_time) =day('${yesterday}') and L.inc_day='${yyyypmmdd}'

原创粉丝点击