sqoop导出多字符分割

来源:互联网 发布:mysql 事务回滚 编辑:程序博客网 时间:2024/05/21 14:56
package org.apache.sqoop.mapreduce;

import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.*;

/**
 * An {@link OutputFormat} that writes plain text files.
 * Only writes the key. Does not write any delimiter/newline after the key.
 */
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {

  /**
   * RecordWriter to write to plain text files.
   */
  public static class RawKeyRecordWriter<K, V> extends RecordWriter<K, V> {

    private static final String UTF8 = "UTF-8";

    protected DataOutputStream out;

    public RawKeyRecordWriter(DataOutputStream out) {
      this.out = out;
    }

    /**
     * Write the object to the byte stream, handling Text as a special
     * case.
     * @param o the object to print
     * @throws IOException if the write throws, we pass it on
     */
    private void writeObject(Object o) throws IOException {
      if (o instanceof Text) {
        Text to = (Text) o;
        String s = to.toString();
        to.set(s.replaceAll("\001", "\\|\\|"));
        out.write(to.getBytes(), 0, to.getLength());
      } else {
        out.write(o.toString().getBytes(UTF8));
      }
    }

    public synchronized void write(K key, V value) throws IOException {
      writeObject(key);
    }

    public synchronized void close(TaskAttemptContext context)
        throws IOException {
      out.close();
    }

  }

  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
      throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
      // create the named codec
      Class<? extends CompressionCodec> codecClass =
        getOutputCompressorClass(context, GzipCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);

      ext = codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
      ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new RawKeyRecordWriter<K, V>(ostream);
  }

}



package org.apache.sqoop.mapreduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import com.cloudera.sqoop.lib.SqoopRecord;
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * Converts an input record from a string representation to a parsed Sqoop
 * record and emits that DBWritable to the OutputFormat for writeback to the
 * database.
 */
public class TextExportMapper
    extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {

  public static final Log LOG =
    LogFactory.getLog(TextExportMapper.class.getName());

  private SqoopRecord recordImpl;

  public TextExportMapper() {
  }

  protected void setup(Context context)
      throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();

    // Instantiate a copy of the user's class to hold and parse the record.
    String recordClassName = conf.get(
        ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
    if (null == recordClassName) {
      throw new IOException("Export table class name ("
          + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY
          + ") is not set!");
    }

    try {
      Class cls = Class.forName(recordClassName, true,
          Thread.currentThread().getContextClassLoader());
      recordImpl = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }

    if (null == recordImpl) {
      throw new IOException("Could not instantiate object of type "
          + recordClassName);
    }
  }


  public void map(LongWritable key, Text val, Context context)
      throws IOException, InterruptedException {
    try {
    String s = val.toString().replaceAll("\\|\\|", "|");
        val.set(s);
      recordImpl.parse(val);
      context.write(recordImpl, NullWritable.get());
    } catch (Exception e) {
      // Something bad has happened
      LOG.error("");
      LOG.error("Exception raised during data export");
      LOG.error("");

      LOG.error("Exception: ", e);
      LOG.error("On input: " + val);

      InputSplit is = context.getInputSplit();
      if (is instanceof FileSplit) {
        LOG.error("On input file: " + ((FileSplit)is).getPath());
      } else if (is instanceof CombineFileSplit) {
        LOG.error("On input file: "
          + context.getConfiguration().get("map.input.file"));
      }
      LOG.error("At position " + key);

      LOG.error("");
      LOG.error("Currently processing split:");
      LOG.error(is);

      LOG.error("");
      LOG.error("This issue might not necessarily be caused by current input");
      LOG.error("due to the batching nature of export.");
      LOG.error("");

      throw new IOException("Can't export data, please check task tracker logs",
        e);
    }
  }
}



原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 孩子不爱吃水果蔬菜怎么办 一岁宝宝不吃水果怎么办 一岁宝宝不爱吃水果怎么办 1岁多宝宝不吃水果怎么办 中学生不爱与家长交流怎么办 孩子一直37度多怎么办 小孩39度3算高烧怎么办 7岁发烧怎么办如何退烧 7岁反复发烧7天怎么办 一岁发烧39.8度怎么办 反复发烧39度4天怎么办 大人发烧到39度怎么办 小孩高烧39度多怎么办 孩子嗓子发炎发烧怎么办吃什么药 猫咪吃了点桃子怎么办 猫吃了牛油果怎么办 苹果手机死机开不了机怎么办 新生儿出生第一天没奶水怎么办 第一天断奶奶水一直流出怎么办 3岁宝宝不长个子怎么办 5个月宝宝不长个怎么办 孩子比同龄人矮很多怎么办 孕妇做春梦宫缩怎么办 减肥掉头发很厉害怎么办 孕妇吃了金枪鱼罐头怎么办 怀孕吃了烂水果怎么办 快递水果压坏了怎么办 谈对象被骗了钱怎么办 如果遇到半夜坏人敲门怎么办 小孩晚上不进房间睡觉怎么办 房子照不到阳光潮湿怎么办 4个月婴儿不拉屎怎么办 小孩六个月奶不够吃怎么办 到晚上就没奶水怎么办 20多天奶水不够怎么办 四个月奶不够吃怎么办 7个月宝宝不肯吃东西怎么办 6个月宝宝不肯吃东西怎么办 十一个月宝宝不肯吃东西怎么办 九个月宝宝不肯吃辅食怎么办 八个月宝宝不肯吃辅食怎么办