quartz+oozie+sqoop批量导数
来源:互联网 发布:日本人用的软件 编辑:程序博客网 时间:2024/06/06 00:20
完成工作 通过quartz任务调度oozie,oozie调sqoop完成mysql数据到hive数据的导入
重点是workflow.xml 先创建表,然后在导入mysql到hdfs,即完成导入,注意路径要一致
package com.dpi.data_model.core.hadoopUtil;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.data_model.core.job.OozieJob;import com.dpi.data_model.core.util.DateTimeUtil;import org.apache.commons.httpclient.util.DateUtil;import org.quartz.*;import org.quartz.impl.StdSchedulerFactory;import java.util.ArrayList;import java.util.Date;import java.util.List;/** * Created by xutao on 2017/8/2. */public class QuartzUtil { private static Scheduler scheduler; static{ try { scheduler = StdSchedulerFactory.getDefaultScheduler(); } catch (SchedulerException e) { e.printStackTrace(); } } public static void quartzTest(SchedulingParameters parameters) throws InterruptedException{ try { //获取调度器 scheduler.start(); //1表示修改任务2表示添加任务 if(parameters.getOperate() ==1){ scheduler.deleteJob(new JobKey(parameters.getTaskName(),"oozieGroup")); } //创建任务器:定义任务细节 JobDetail myjob = JobBuilder.newJob(OozieJob.class).withIdentity(parameters.getTaskName(), "oozieGroup").build(); myjob.getJobDataMap().put("parameters",parameters); ScheduleBuilder scheduleBuilder = SimpleScheduleBuilder.simpleSchedule().withIntervalInHours(24).withRepeatCount(1000); //定义触发器 Trigger trigger=TriggerBuilder.newTrigger().withIdentity("simpleTrigger", "simpleTriggerGroup") .withSchedule(scheduleBuilder).startAt(parameters.getStartTime()).endAt(parameters.getEndTime()).build(); //将任务和触发器注册到调度器中 scheduler.scheduleJob(myjob, trigger); Thread.sleep(1000*30); scheduler.shutdown(); } catch (SchedulerException e) { e.printStackTrace(); } } public static void main(String[] args) throws InterruptedException { SchedulingParameters parameters = new SchedulingParameters(); parameters.setTaskName("abc"); parameters.setStartTime(new Date()); parameters.setEndTime(new Date()); parameters.setTaskName("123"); parameters.setDb_user("root"); parameters.setPwd("dpibigdata"); parameters.setUrl("jdbc:mysql://192.168.1.233:3306/dpimodel?useUnicode=true&characterEncoding=UTF-8"); List<TaskInfo> taskInfoList = new ArrayList<TaskInfo>(); TaskInfo taskInfo = new TaskInfo(); taskInfo.setColumns("id string,mode_name string,technology_name string,mode_describe string,mode_arrangement string,status string,c_user_id string,c_user string,c_time string,u_time string"); taskInfo.setWare_name("tbl_mode"); taskInfo.setHive_tb_name("h_tbl_mode"); taskInfoList.add(taskInfo); parameters.setTaskInfoList(taskInfoList); new QuartzUtil().quartzTest(parameters); System.out.println("=================="); }}
package com.dpi.data_model.core.job;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.data_model.core.hadoopUtil.WorkflowClient;import org.apache.oozie.client.OozieClientException;import org.quartz.Job;import org.quartz.JobDataMap;import org.quartz.JobExecutionContext;import org.quartz.JobExecutionException;/** * Created by xutao on 2017/8/2. */public class OozieJob implements Job { @Override public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException { // 执行定时任务 WorkflowClient client = new WorkflowClient(); String jobId = null; try { JobDataMap dataMap = jobExecutionContext.getJobDetail().getJobDataMap(); SchedulingParameters parameters = (SchedulingParameters)dataMap.get("parameters"); //1.执行数据清理任务 2. 执行数据导入任务 for(int i=0;i<parameters.getTaskInfoList().size();i++){ jobId = client.startHISJob(parameters,parameters.getTaskInfoList().get(i)); client.jobWait(jobId,client); } } catch (OozieClientException e) { e.printStackTrace(); } }}package com.dpi.data_model.core.hadoopUtil;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.oozie.etl.JDBCUtil;import org.apache.log4j.Logger;import org.apache.oozie.client.OozieClient;import org.apache.oozie.client.OozieClientException;import org.apache.oozie.client.WorkflowJob;import org.apache.oozie.client.WorkflowJob.Status;import java.util.Properties;public class WorkflowClient { private static final Logger logger = Logger.getLogger(WorkflowClient.class); // oozie,hadoop private static String USER_NAME = "hadoop"; private static String OOZIE_URL = "http://192.168.1.237:11000/oozie/"; private static String JOB_TRACKER = "rm1"; private static String NAMENAME = "hdfs://192.168.1.235:9000/"; private static String QUEUENAME = "default"; private static String OOZIE_LIBPATH = "hdfs://192.168.1.235:9000/share/lib_oozie/share/lib/hive,hdfs://192.168.1.235:9000/share/lib_oozie/share/lib/sqoop"; private static String jdbcUrl="jdbc:mysql://192.168.1.235:3306/dbview?useUnicode=true&characterEncoding=UTF-8"; private static String jdbcUser="hive"; private static String jdbcPassword="hive"; private static String DRIVER = "com.mysql.jdbc.Driver"; private static Properties conf = null; private static OozieClient wc = null; private static JDBCUtil jdbc = null; private static WorkflowClient client = null; private static String ENV = "TEST"; public WorkflowClient() { wc = new OozieClient(OOZIE_URL); conf = wc.createConfiguration(); conf.setProperty(OozieClient.USER_NAME, USER_NAME); // setting workflow parameters conf.setProperty(OozieClient.CHANGE_VALUE_CONCURRENCY, "2"); conf.setProperty("nameNode", NAMENAME); conf.setProperty("jobTracker", JOB_TRACKER); conf.setProperty("queueName", QUEUENAME); conf.setProperty("oozie.libpath", OOZIE_LIBPATH); conf.setProperty("env", ENV); } public void jobWait(String jobId,WorkflowClient client){ Status status = null; try { status = client.getJobStatus(jobId); if (status == Status.RUNNING) { while (true) { try { Thread.sleep(30*1000); } catch (Exception e) { e.printStackTrace(); } status = client.getJobStatus(jobId); if (status == Status.SUCCEEDED || status == Status.FAILED || status == Status.KILLED) { break; } } logger.info("Workflow job running ."); } else { logger.info("Problem starting Workflow job ."); } } catch (OozieClientException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public String startHISJob(SchedulingParameters parameters, TaskInfo taskInfo) throws OozieClientException { conf.setProperty(OozieClient.APP_PATH, "hdfs://192.168.1.235:9000/user/workflows/oozie-table04/workflow.xml"); conf.setProperty("datareportRoot", "user/workflows"); conf.setProperty("jdbcUrl",parameters.getUrl()); conf.setProperty("jdbcUser",parameters.getDb_user()); conf.setProperty("jdbcPassword",parameters.getPwd()); conf.setProperty("table",taskInfo.getWare_name()); conf.setProperty("htable",taskInfo.getHive_tb_name());// conf.setProperty("basis_one",taskInfo.getBasis_one());// conf.setProperty("basis_two",taskInfo.getBasis_two());// conf.setProperty("basis_three",taskInfo.getBasis_three());// conf.setProperty("exportPath","/hive/warehouse/dpimodel/thm/"+taskInfo.getWare_name()); conf.setProperty("columns",taskInfo.getColumns()); return wc.run(conf); } // job 状态 public Status getJobStatus(String jobID) throws OozieClientException { WorkflowJob job = wc.getJobInfo(jobID); return job.getStatus(); } public static void main(String[] args) throws OozieClientException, InterruptedException { // Create client client = new WorkflowClient();// String jobId = client.startHISJob(); Thread.sleep(30*1000);// client.jobWait(jobId); }}
workflow.xml
<?xml version="1.0" encoding="UTF-8"?><workflow-app xmlns="uri:oozie:workflow:0.2" name="oozie-table04"> <start to="statHive"/> <action name="statHive"> <hive xmlns="uri:oozie:hive-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <job-xml>${nameNode}/user/oozie/conf/hive-site.xml</job-xml> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <script>script.hql</script> <param>htable=${htable}</param> <param>columns=${columns}</param> </hive> <ok to="exportSqoop_THM_LIPS_01"/> <error to="fail"/> </action> <action name="exportSqoop_THM_LIPS_01"> <sqoop xmlns="uri:oozie:sqoop-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <command>import --connect ${jdbcUrl} --username ${jdbcUser} --password ${jdbcPassword} --table ${table} --append --target-dir /hive/warehouse/zcyj_db.db/thm/tmp_thm_13/${htable} -m 1</command> </sqoop> <ok to="end"/> <error to="fail"/> </action> <kill name="fail"> <message>xutaotest workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> </kill> <end name="end"/></workflow-app>
script.hql
use default;drop table IF EXISTS ${htable};create external table ${htable}(${columns})ROW FORMAT DELIMITED FIELDS TERMINATED BY '\`'LOCATION '/hive/warehouse/zcyj_db.db/thm/tmp_thm_13/${htable}';
阅读全文
0 0
- quartz+oozie+sqoop批量导数
- Oozie 调用sqoop导数据出现NoClassDefFoundError问题
- Oozie Sqoop Action Extension
- sqoop导数据
- sqoop导数据总结
- Sqoop安装及操作以及oozie&sqoop
- hue下oozie调度sqoop
- sqoop+hive+shell+oozie 示例
- Oozie调度sqoop导入hive
- Sqoop部署和导数据
- Hive / pig / Sqoop/ Oozie 学习资料
- oozie sqoop 定位到出错的行
- Oozie中Sqoop的配置及应用
- CDH中oozie 定时调度sqoop job
- Sqoop导数据出现的问题
- sqoop导数据到hive失败
- sqoop hive与mysql互导数据
- 【hadoop Sqoop】Sqoop从mysql导数据到hive
- C语言学习
- Unity官方案例精讲笔记-unity脚本事件的执行顺序
- C++中for循环语句简析
- Zabbix Agent active主动模式监控日志(多关键字)
- 在web中判断是否有app,没有则跳转,包括读取a标签参数,获取后台数据
- quartz+oozie+sqoop批量导数
- JSONObject使用方法
- 【整理】I/O复用模型中的 select、poll、epoll
- 采药
- mapdb实现分析
- 闲聊软件测试自动化(3): 我们期望什么样的测试自动化?
- maven出现 invalid LOC header (bad signature)的解决办法
- 矩阵快速幂与快速幂模板 整理
- Java的反射机制