quartz+oozie+sqoop批量导数

来源:互联网 发布:日本人用的软件 编辑:程序博客网 时间:2024/06/06 00:20

完成工作 通过quartz任务调度oozie,oozie调sqoop完成mysql数据到hive数据的导入
重点是workflow.xml 先创建表,然后在导入mysql到hdfs,即完成导入,注意路径要一致
package com.dpi.data_model.core.hadoopUtil;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.data_model.core.job.OozieJob;import com.dpi.data_model.core.util.DateTimeUtil;import org.apache.commons.httpclient.util.DateUtil;import org.quartz.*;import org.quartz.impl.StdSchedulerFactory;import java.util.ArrayList;import java.util.Date;import java.util.List;/** * Created by xutao on 2017/8/2. */public class QuartzUtil {        private static  Scheduler scheduler;        static{            try {                scheduler = StdSchedulerFactory.getDefaultScheduler();            } catch (SchedulerException e) {                e.printStackTrace();            }        }    public static void quartzTest(SchedulingParameters parameters) throws InterruptedException{        try {            //获取调度器            scheduler.start();            //1表示修改任务2表示添加任务            if(parameters.getOperate() ==1){                scheduler.deleteJob(new JobKey(parameters.getTaskName(),"oozieGroup"));            }            //创建任务器:定义任务细节            JobDetail myjob = JobBuilder.newJob(OozieJob.class).withIdentity(parameters.getTaskName(), "oozieGroup").build();            myjob.getJobDataMap().put("parameters",parameters);            ScheduleBuilder scheduleBuilder = SimpleScheduleBuilder.simpleSchedule().withIntervalInHours(24).withRepeatCount(1000);            //定义触发器            Trigger trigger=TriggerBuilder.newTrigger().withIdentity("simpleTrigger", "simpleTriggerGroup")                    .withSchedule(scheduleBuilder).startAt(parameters.getStartTime()).endAt(parameters.getEndTime()).build();            //将任务和触发器注册到调度器中            scheduler.scheduleJob(myjob, trigger);            Thread.sleep(1000*30);            scheduler.shutdown();        } catch (SchedulerException e) {            e.printStackTrace();        }    }    public static void main(String[] args) throws InterruptedException {        SchedulingParameters parameters = new SchedulingParameters();        parameters.setTaskName("abc");        parameters.setStartTime(new Date());        parameters.setEndTime(new Date());        parameters.setTaskName("123");        parameters.setDb_user("root");        parameters.setPwd("dpibigdata");        parameters.setUrl("jdbc:mysql://192.168.1.233:3306/dpimodel?useUnicode=true&amp;characterEncoding=UTF-8");        List<TaskInfo> taskInfoList = new ArrayList<TaskInfo>();        TaskInfo taskInfo = new TaskInfo();        taskInfo.setColumns("id string,mode_name string,technology_name string,mode_describe string,mode_arrangement string,status string,c_user_id string,c_user string,c_time string,u_time string");        taskInfo.setWare_name("tbl_mode");        taskInfo.setHive_tb_name("h_tbl_mode");        taskInfoList.add(taskInfo);        parameters.setTaskInfoList(taskInfoList);        new QuartzUtil().quartzTest(parameters);        System.out.println("==================");    }}
package com.dpi.data_model.core.job;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.data_model.core.hadoopUtil.WorkflowClient;import org.apache.oozie.client.OozieClientException;import org.quartz.Job;import org.quartz.JobDataMap;import org.quartz.JobExecutionContext;import org.quartz.JobExecutionException;/** * Created by xutao on 2017/8/2. */public class OozieJob implements Job {    @Override    public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {        // 执行定时任务        WorkflowClient client = new WorkflowClient();        String jobId = null;        try {            JobDataMap dataMap = jobExecutionContext.getJobDetail().getJobDataMap();            SchedulingParameters parameters = (SchedulingParameters)dataMap.get("parameters");            //1.执行数据清理任务  2. 执行数据导入任务            for(int i=0;i<parameters.getTaskInfoList().size();i++){                jobId = client.startHISJob(parameters,parameters.getTaskInfoList().get(i));                client.jobWait(jobId,client);            }        } catch (OozieClientException e) {            e.printStackTrace();        }    }}
package com.dpi.data_model.core.hadoopUtil;import com.dpi.data_model.core.entity.mybeans.SchedulingParameters;import com.dpi.data_model.core.entity.mybeans.TaskInfo;import com.dpi.oozie.etl.JDBCUtil;import org.apache.log4j.Logger;import org.apache.oozie.client.OozieClient;import org.apache.oozie.client.OozieClientException;import org.apache.oozie.client.WorkflowJob;import org.apache.oozie.client.WorkflowJob.Status;import java.util.Properties;public class WorkflowClient {      private static final Logger logger = Logger.getLogger(WorkflowClient.class);   // oozie,hadoop   private static String USER_NAME = "hadoop";   private static String OOZIE_URL = "http://192.168.1.237:11000/oozie/";   private static String JOB_TRACKER = "rm1";   private static String NAMENAME = "hdfs://192.168.1.235:9000/";   private static String QUEUENAME = "default";   private static String OOZIE_LIBPATH = "hdfs://192.168.1.235:9000/share/lib_oozie/share/lib/hive,hdfs://192.168.1.235:9000/share/lib_oozie/share/lib/sqoop";    private static String jdbcUrl="jdbc:mysql://192.168.1.235:3306/dbview?useUnicode=true&amp;characterEncoding=UTF-8";    private static String jdbcUser="hive";    private static String jdbcPassword="hive";   private static String DRIVER = "com.mysql.jdbc.Driver";      private static Properties conf = null;   private static OozieClient wc = null;   private static JDBCUtil jdbc = null;   private static WorkflowClient client = null;    private static String ENV = "TEST";   public WorkflowClient() {      wc = new OozieClient(OOZIE_URL);      conf = wc.createConfiguration();      conf.setProperty(OozieClient.USER_NAME, USER_NAME);      // setting workflow parameters      conf.setProperty(OozieClient.CHANGE_VALUE_CONCURRENCY, "2");      conf.setProperty("nameNode", NAMENAME);      conf.setProperty("jobTracker", JOB_TRACKER);      conf.setProperty("queueName", QUEUENAME);      conf.setProperty("oozie.libpath", OOZIE_LIBPATH);      conf.setProperty("env", ENV);   }    public void jobWait(String jobId,WorkflowClient client){        Status status = null;        try {         status = client.getJobStatus(jobId);         if (status == Status.RUNNING) {             while (true) {                 try {                     Thread.sleep(30*1000);                 } catch (Exception e) {                     e.printStackTrace();                 }                 status = client.getJobStatus(jobId);                 if (status == Status.SUCCEEDED                         || status == Status.FAILED                         || status == Status.KILLED) {                     break;                 }             }             logger.info("Workflow job running .");         } else {             logger.info("Problem starting Workflow job .");         }      } catch (OozieClientException e) {         // TODO Auto-generated catch block         e.printStackTrace();      }    }    public String startHISJob(SchedulingParameters parameters, TaskInfo taskInfo) throws OozieClientException {        conf.setProperty(OozieClient.APP_PATH, "hdfs://192.168.1.235:9000/user/workflows/oozie-table04/workflow.xml");        conf.setProperty("datareportRoot", "user/workflows");      conf.setProperty("jdbcUrl",parameters.getUrl());      conf.setProperty("jdbcUser",parameters.getDb_user());      conf.setProperty("jdbcPassword",parameters.getPwd());      conf.setProperty("table",taskInfo.getWare_name());      conf.setProperty("htable",taskInfo.getHive_tb_name());//    conf.setProperty("basis_one",taskInfo.getBasis_one());//    conf.setProperty("basis_two",taskInfo.getBasis_two());//    conf.setProperty("basis_three",taskInfo.getBasis_three());//    conf.setProperty("exportPath","/hive/warehouse/dpimodel/thm/"+taskInfo.getWare_name());      conf.setProperty("columns",taskInfo.getColumns());        return wc.run(conf);    }       // job 状态   public Status getJobStatus(String jobID) throws OozieClientException {            WorkflowJob job = wc.getJobInfo(jobID);      return job.getStatus();   }   public static void main(String[] args) throws OozieClientException,         InterruptedException {      // Create client      client = new WorkflowClient();//        String jobId = client.startHISJob();        Thread.sleep(30*1000);//        client.jobWait(jobId);   }}
workflow.xml
<?xml version="1.0" encoding="UTF-8"?><workflow-app xmlns="uri:oozie:workflow:0.2" name="oozie-table04">      <start to="statHive"/>    <action name="statHive">        <hive xmlns="uri:oozie:hive-action:0.2">            <job-tracker>${jobTracker}</job-tracker>            <name-node>${nameNode}</name-node>            <job-xml>${nameNode}/user/oozie/conf/hive-site.xml</job-xml>            <configuration>                <property>                    <name>mapred.job.queue.name</name>                    <value>${queueName}</value>                </property>            </configuration>            <script>script.hql</script>           <param>htable=${htable}</param>           <param>columns=${columns}</param>          </hive>         <ok to="exportSqoop_THM_LIPS_01"/>        <error to="fail"/>    </action>    <action name="exportSqoop_THM_LIPS_01">        <sqoop xmlns="uri:oozie:sqoop-action:0.2">            <job-tracker>${jobTracker}</job-tracker>            <name-node>${nameNode}</name-node>            <configuration>                <property>                    <name>mapred.job.queue.name</name>                    <value>${queueName}</value>                </property>            </configuration>                      <command>import --connect ${jdbcUrl} --username ${jdbcUser} --password ${jdbcPassword} --table ${table} --append     --target-dir /hive/warehouse/zcyj_db.db/thm/tmp_thm_13/${htable} -m 1</command>              </sqoop>           <ok to="end"/>        <error to="fail"/>    </action>    <kill name="fail">        <message>xutaotest workflow  failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>    </kill>    <end name="end"/></workflow-app>
script.hql
use default;drop table IF EXISTS ${htable};create external table ${htable}(${columns})ROW FORMAT DELIMITED FIELDS TERMINATED BY '\`'LOCATION '/hive/warehouse/zcyj_db.db/thm/tmp_thm_13/${htable}';
 
原创粉丝点击