hive -e "SELECT * from student" | sed 's/\t/,/g' > /tmp/student.csv
复制代码
4. hive on mr 的参数设置 开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=500000;
set hive.exec.max.dynamic.partitions.pernode=100000;
set mapreduce.reduce.memory.mb=4096;
复制代码
5. MYSQL hive元数据
set session group_concat_max_len = 20480;
select concat_ws('',
a.create_body_str,
CHAR(10),
c.tbl_comment,
CHAR(10),
b.partition_str,
CHAR(10),
a.stored_format,
CHAR(10),
d.compress_str,
';') AS create_sql
FROM (
select t.TBL_ID,
t.TBL_NAME,
case when k.INPUT_FORMAT like '%.parquet%' then 'STORED AS PARQUET'
when k.INPUT_FORMAT like '%.SequenceFile%' then 'STORED AS SEQUENCEFILE'
when k.INPUT_FORMAT like '%.Text%' then ''
else 'STORED AS NULL'
end AS stored_format,
concat_ws('',
'CREATE',
CASE t.TBL_TYPE
WHEN 'EXTERNAL_TABLE' THEN ' EXTERNAL'
ELSE '' END,
' TABLE IF NOT EXISTS ${schema}.',
t.TBL_NAME,
'(',
CHAR(10),
group_concat(concat_ws('',
g.COLUMN_NAME,
' ',
g.TYPE_NAME,
' COMMENT ',
'''',
REPLACE(REPLACE(g.COMMENT,';',' '),'; ',' '),
'''',
CHAR(10)) ORDER BY g.INTEGER_IDX separator ','),
')'
) AS create_body_str
from hive.TBLS t,hive.SDS k,hive.COLUMNS_V2 g,hive.DBS s
where t.SD_ID = k.SD_ID
and k.CD_ID = g.CD_ID
and s.DB_ID = t.DB_ID
and k.INPUT_FORMAT not like '%.kudu%'
and s.NAME = 'stg' -- 限制数据库
group by t.TBL_ID
-- limit 100
) a
left join (select t.TBL_ID,
concat_ws('','COMMENT ','''',t.param_value,'''') AS tbl_comment
from hive.TABLE_PARAMS t
where t.param_key = 'comment'
group by t.TBL_ID
) c
on c.tbl_id = a.tbl_id
left join (select t.TBL_ID,concat_ws('','PARTITIONED BY (',group_concat(concat_ws('',t.pkey_name,' ',t.pkey_type,' ','COMMENT ','''',t.pkey_comment,'''')
order by t.integer_idx separator ','),')') AS partition_str