MySQL数据库导入BIN格式定长文件

来源:互联网 发布:上证综合指数季度数据 编辑:程序博客网 时间:2024/04/19 13:07

环境:MySQL5.7 widows10 jdk1.8

工具:Navicat Premium 11.1.14.0  eclipse mar2 


实现思路:

因为没有分隔符对数据进行分割,所以不能使用 load data infile 命令导入。故此需要自己写程序将数据解析导入。

将BIN格式的定长文件按照行读取,根据字段长度解析后,导入数据库。

步骤:

1.确定数据文件中的行长度,确定每一个字段的长度

2.将数据库表按照需求建好

3.创建bean对应数据库的每一个字段,私有变量的处理setter/getter 和 toString。

public class InfoBean {private String STRU_ID;public String getSTRU_ID() {return STRU_ID;}public void setSTRU_ID(String sTRU_ID) {STRU_ID = sTRU_ID;}}
4.创建数据库的操作类
import java.lang.reflect.Field;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;public class OperationDB {private Connection con = null;public void addRcorder(InfoBean infoBean) throws SQLException, Exception {if (con == null) {con = BeyondbConnection.getConnection();}String sql = "insert into bom_cmpstruinfo (STRU_ID,STRU_FNAME,STRU_SNAME,OLDSYS_STRUID,FLICENCE_ID,STRU_ADDR,ZIPCODE,PHONE,STRU_SIGN,STRU_LV,ADMIN_LV,SUP_STRU,SETUP_TIME,LST_ALT_TYPE,LST_ALT_TIME,STRU_STATE,BLICENCE_ID,REVOKE_TIME,DIST_SIGN,STRU_GRADE,CODECERT_ID,TOWN_FLAG,BUSI_AREA,BUSI_SITE_USE,FEXCHANGE_FLAG,MAN_GRADE,CHARGE_PROP,PROFESSION_LEVEL,NODE_TYPE,ECON_AREA,IS_HUN_CITY,IS_HUN_COUNTY,COUNTRY,VILLAGE,NP_OPER_TYPE,MANAGE_STRU_ID,SPECIALTY_PROP,FINANCE_STRUID,PBANK_STRUID,ADMIN_CODE,ADMIN_VALUE,LST_OPTIMIZE_TYPE,OPTIMIZE_PLAN,BUSI_TYPE,MEMO,IS_NEW_BUSI_NODE,CREATE_DATE,STRU_FOREIGN_FNAME,STRU_FOREIGN_SNAME,STRU_CHN_FNAME,STRU_CHN_SNAME,BACK1,BACK2,BACK3,BACK4,BACK5,BACK6,BACK7,BACK8,BACK9,BACK10)"+ " values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";PreparedStatement pstmt = con.prepareStatement(sql);String[] arr = new String[]{"STRU_ID","STRU_FNAME","STRU_SNAME","OLDSYS_STRUID","FLICENCE_ID","STRU_ADDR","ZIPCODE","PHONE","STRU_SIGN","STRU_LV","ADMIN_LV","SUP_STRU","SETUP_TIME","LST_ALT_TYPE","LST_ALT_TIME","STRU_STATE","BLICENCE_ID","REVOKE_TIME","DIST_SIGN","STRU_GRADE","CODECERT_ID","TOWN_FLAG","BUSI_AREA","BUSI_SITE_USE","FEXCHANGE_FLAG","MAN_GRADE","CHARGE_PROP","PROFESSION_LEVEL","NODE_TYPE","ECON_AREA","IS_HUN_CITY","IS_HUN_COUNTY","COUNTRY","VILLAGE","NP_OPER_TYPE","MANAGE_STRU_ID","SPECIALTY_PROP","FINANCE_STRUID","PBANK_STRUID","ADMIN_CODE","ADMIN_VALUE","LST_OPTIMIZE_TYPE","OPTIMIZE_PLAN","BUSI_TYPE","MEMO","IS_NEW_BUSI_NODE","CREATE_DATE","STRU_FOREIGN_FNAME","STRU_FOREIGN_SNAME","STRU_CHN_FNAME","STRU_CHN_SNAME","BACK1","BACK2","BACK3","BACK4","BACK5","BACK6","BACK7","BACK8","BACK9","BACK10"};for (int i = 0; i < arr.length; i++) {Field field = infoBean.getClass().getDeclaredField(arr[i]);field.setAccessible(true);Object object = field.get(infoBean);pstmt.setObject(i+1,object);}//返回更新数据的条数int executeUpdate = pstmt.executeUpdate();System.out.println("本次更新数据:"+executeUpdate+"条。");
//下面这种是笨方法,一个个的赋值。已废弃。//pstmt.setString(1,infoBean.getSTRU_ID());//pstmt.setString(2,infoBean.getSTRU_CHN_FNAME());//pstmt.setString(3,infoBean.getSTRU_SNAME());//pstmt.setString(4,infoBean.getOLDSYS_STRUID());//pstmt.setString(5,infoBean.getFLICENCE_ID());//pstmt.setString(6,infoBean.getSTRU_ADDR());//pstmt.setString(7,infoBean.getZIPCODE());//pstmt.setString(8,infoBean.getPHONE());//pstmt.setString(9,infoBean.getSTRU_SIGN());//pstmt.setString(1,infoBean.getSTRU_LV());//pstmt.setString(1,infoBean.getADMIN_LV());//pstmt.setString(1,infoBean.getSUP_STRU());//pstmt.setString(1,infoBean.getSETUP_TIME());//pstmt.setString(1,infoBean.getLST_ALT_TYPE();//pstmt.setString(1,infoBean.getLST_ALT_TIME();//pstmt.setString(1,infoBean.getSTRU_STATE();//pstmt.setString(1,infoBean.getBLICENCE_ID();//pstmt.setString(1,infoBean.getREVOKE_TIME();//pstmt.setString(1,infoBean.getDIST_SIGN();//pstmt.setString(1,infoBean.getSTRU_GRADE();//pstmt.setString(1,infoBean.getCODECERT_ID();//pstmt.setString(1,infoBean.getTOWN_FLAG();//pstmt.setString(1,infoBean.getBUSI_AREA();//pstmt.setString(1,infoBean.getBUSI_SITE_USE();//pstmt.setString(1,infoBean.getFEXCHANGE_FLAG();//pstmt.setString(1,infoBean.getMAN_GRADE();//pstmt.setString(1,infoBean.getCHARGE_PROP();//pstmt.setString(1,infoBean.getPROFESSION_LEVEL();//pstmt.setString(1,infoBean.getNODE_TYPE();//pstmt.setString(1,infoBean.getECON_AREA();//pstmt.setString(1,infoBean.getIS_HUN_CITY();//pstmt.setString(1,infoBean.getIS_HUN_COUNTY();//pstmt.setString(1,infoBean.getCOUNTRY();//pstmt.setString(1,infoBean.getVILLAGE();//pstmt.setString(1,infoBean.getNP_OPER_TYPE();//pstmt.setString(1,infoBean.getMANAGE_STRU_ID();//pstmt.setString(1,infoBean.getSPECIALTY_PROP();//pstmt.setString(1,infoBean.getFINANCE_STRUID();//pstmt.setString(1,infoBean.getPBANK_STRUID();//pstmt.setString(1,infoBean.getADMIN_CODE();//pstmt.setString(1,infoBean.getADMIN_VALUE();//pstmt.setString(1,infoBean.getLST_OPTIMIZE_TYPE();//pstmt.setString(1,infoBean.getOPTIMIZE_PLAN();//pstmt.setString(1,infoBean.getBUSI_TYPE();//pstmt.setString(1,infoBean.getMEMO();//pstmt.setString(1,infoBean.getIS_NEW_BUSI_NODE();//pstmt.setString(1,infoBean.getCREATE_DATE();//pstmt.setString(1,infoBean.getSTRU_FOREIGN_FNAME();//pstmt.setString(1,infoBean.getSTRU_FOREIGN_SNAME();//pstmt.setString(1,infoBean.getSTRU_CHN_FNAME();//pstmt.setString(1,infoBean.getSTRU_CHN_SNAME();//pstmt.setString(1,infoBean.getBACK1();//pstmt.setString(1,infoBean.getBACK2();//pstmt.setString(1,infoBean.getBACK3();//pstmt.setString(1,infoBean.getBACK4();//pstmt.setString(1,infoBean.getBACK5();//pstmt.setString(1,infoBean.getBACK6();//pstmt.setString(1,infoBean.getBACK7();//pstmt.setString(1,infoBean.getBACK8();//pstmt.setString(1,infoBean.getBACK9();//pstmt.setString(1,infoBean.getBACK10();}}
5.jdbc链接数据库的类,此时使用的是最简单的JDBC,(并未使用连接池.)

import java.sql.Connection;import java.sql.DriverManager;import java.sql.SQLException;import java.util.logging.Level;import java.util.logging.Logger;public class BeyondbConnection {public static Connection getConnection() {Connection con = null;String driver = "com.mysql.jdbc.Driver";String url = "jdbc:mysql://localhost:3306/数据库名称";String user = "root";String password = "root";try {con = DriverManager.getConnection(url, user, password);} catch (SQLException ex) {Logger.getLogger(BeyondbConnection.class.getName()).log(Level.SEVERE, null, ex);}return con;}}

6.编写主方法,读取文件并解析数据,将数据解析后的各字段插入到对应的字段中,之后通过数据库连接, 将数据导入到MySQL数据库中。

该方法使用反射通过字段名和各字段的指定长度,获取到对应的数据bean的属性,解决了表中太多字段造成代码冗长,重复率太高。

注:反射获取私有变量时需要加上

field.setAccessible(true);//解决掉不能设置私有变量的限制

而Field field = infoBean.getClass().getDeclaredField(arr[i]);  则是获取到该数据bean中所有属性(包括私有)

import java.io.File;import java.io.UnsupportedEncodingException;import java.lang.reflect.Field;import org.apache.commons.io.FileUtils;import org.apache.commons.io.LineIterator;import sun.reflect.misc.FieldUtil;public class test {public static void main(String[] args) throws Exception{InfoBean infoBean = new InfoBean();OperationDB operationDB = new OperationDB();//  String filePath = "D:\\data1.txt";//      ReadFile readFile=new ReadFile();  //      readFile.readTxtFile(filePath);//缺点:最大支持1G的文件,一次读取所有的数据加载到内存中,容易导致崩溃。//List<String> strings = FileUtils.readLines(new File("D://data1.txt"), "gbk");//        for (String string : strings) {//            System.out.println(string + string.length());//        }//优点:分段读取数据文件,没有最大文件限制,不会造成太大的压力。        LineIterator it = FileUtils.lineIterator(new File("D://data1.txt"), "GBK");//        LineIterator it = FileUtils.lineIterator(new File("D://BOM_CMPSTRUINFO0000000000.BIN"), "GBK");        try {            while (it.hasNext()) {                String lineTxt = it.nextLine();                lineTxt=new String(lineTxt.getBytes("GBK"), "ISO8859-1");                //优势方法,使用反射的方法获取和设置原来字段的值。                String[] arr = new String[]{"STRU_ID","STRU_FNAME","STRU_SNAME","OLDSYS_STRUID","FLICENCE_ID","STRU_ADDR","ZIPCODE","PHONE","STRU_SIGN","STRU_LV","ADMIN_LV","SUP_STRU","SETUP_TIME","LST_ALT_TYPE","LST_ALT_TIME","STRU_STATE","BLICENCE_ID","REVOKE_TIME","DIST_SIGN","STRU_GRADE","CODECERT_ID","TOWN_FLAG","BUSI_AREA","BUSI_SITE_USE","FEXCHANGE_FLAG","MAN_GRADE","CHARGE_PROP","PROFESSION_LEVEL","NODE_TYPE","ECON_AREA","IS_HUN_CITY","IS_HUN_COUNTY","COUNTRY","VILLAGE","NP_OPER_TYPE","MANAGE_STRU_ID","SPECIALTY_PROP","FINANCE_STRUID","PBANK_STRUID","ADMIN_CODE","ADMIN_VALUE","LST_OPTIMIZE_TYPE","OPTIMIZE_PLAN","BUSI_TYPE","MEMO","IS_NEW_BUSI_NODE","CREATE_DATE","STRU_FOREIGN_FNAME","STRU_FOREIGN_SNAME","STRU_CHN_FNAME","STRU_CHN_SNAME","BACK1","BACK2","BACK3","BACK4","BACK5","BACK6","BACK7","BACK8","BACK9","BACK10"};                int[] nums = new int[]{10, 80, 80, 15, 40, 120, 20, 40, 3, 3, 3, 10, 6, 3, 6, 3, 36, 6, 3, 3, 36, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 100, 40, 3, 10, 80, 14, 14, 6, 100, 3, 4, 3, 100, 1, 8, 80, 80, 80, 80, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100};                int sumNum=0;                for (int i = 0; i < arr.length; i++) {                //反射获取成员变量的私有字段                Field field = infoBean.getClass().getDeclaredField(arr[i]);                //解决掉不能设置私有变量的限制                field.setAccessible(true);                if(arr[i]=="BUSI_AREA"){        field.set(infoBean,Integer.parseInt(lineTxt.substring(sumNum,sumNum+nums[i]).trim()));        sumNum+=nums[i];                }else{        field.set(infoBean,translateStr(lineTxt.substring(sumNum,sumNum+nums[i]).trim()));        sumNum+=nums[i];                }}                                //繁琐方法,一个个计算截取字符串的长度。已废弃//                infoBean.setSTRU_ID(translateStr(lineTxt.substring(0, 10)));//infoBean.setSTRU_CHN_FNAME(translateStr(lineTxt.substring(10, 80)));//infoBean.setSTRU_SNAME(translateStr(lineTxt.substring(80, 170)));//infoBean.setOLDSYS_STRUID(translateStr(lineTxt.substring(170, 185)));//infoBean.setFLICENCE_ID(translateStr(lineTxt.substring(185, 225)));//infoBean.setSTRU_ADDR(translateStr(lineTxt.substring(225, 345)));//infoBean.setZIPCODE(translateStr(lineTxt.substring(345, 365)));//infoBean.setPHONE(translateStr(lineTxt.substring(365, 405)));//infoBean.setSTRU_SIGN(translateStr(lineTxt.substring(405, 408)));//infoBean.setSTRU_LV(translateStr(lineTxt.substring(408, 411)));//infoBean.setADMIN_LV(translateStr(lineTxt.substring(411, 414)));//infoBean.setSUP_STRU(translateStr(lineTxt.substring(414, 424)));//infoBean.setSETUP_TIME(translateStr(lineTxt.substring(424, 430)));//infoBean.setLST_ALT_TYPE(translateStr(lineTxt.substring(430, 433)));//infoBean.setLST_ALT_TIME(translateStr(lineTxt.substring(433, 439)));//infoBean.setSTRU_STATE(translateStr(lineTxt.substring(439, 442)));//infoBean.setBLICENCE_ID(translateStr(lineTxt.substring(442, 478)));//infoBean.setREVOKE_TIME(translateStr(lineTxt.substring(478, 484)));//infoBean.setDIST_SIGN(translateStr(lineTxt.substring(484, 487)));//infoBean.setSTRU_GRADE(translateStr(lineTxt.substring(487, 490)));//infoBean.setCODECERT_ID(translateStr(lineTxt.substring(490, 526)));//infoBean.setTOWN_FLAG(translateStr(lineTxt.substring(526, 529)));//infoBean.setBUSI_AREA(Integer.parseInt(translateStr(lineTxt.substring(529, 538))));//infoBean.setBUSI_SITE_USE(translateStr(lineTxt.substring(538, 541)));//infoBean.setFEXCHANGE_FLAG(translateStr(lineTxt.substring(541, 544)));//infoBean.setMAN_GRADE(translateStr(lineTxt.substring(544, 547)));//infoBean.setCHARGE_PROP(translateStr(lineTxt.substring(547, 550)));//infoBean.setPROFESSION_LEVEL(translateStr(lineTxt.substring(550, 553)));//infoBean.setNODE_TYPE(translateStr(lineTxt.substring(553, 556)));//infoBean.setECON_AREA(translateStr(lineTxt.substring(556, 559)));//infoBean.setIS_HUN_CITY(translateStr(lineTxt.substring(559, 562)));//infoBean.setIS_HUN_COUNTY(translateStr(lineTxt.substring(562, 565)));//infoBean.setCOUNTRY(translateStr(lineTxt.substring(565, 665)));//infoBean.setVILLAGE(translateStr(lineTxt.substring(665, 705)));//infoBean.setNP_OPER_TYPE(translateStr(lineTxt.substring(705, 708)));//infoBean.setMANAGE_STRU_ID(translateStr(lineTxt.substring(708, 718)));//infoBean.setSPECIALTY_PROP(translateStr(lineTxt.substring(718, 798)));//infoBean.setFINANCE_STRUID(translateStr(lineTxt.substring(798, 812)));//infoBean.setPBANK_STRUID(translateStr(lineTxt.substring(812, 826)));//infoBean.setADMIN_CODE(translateStr(lineTxt.substring(826, 828)));//infoBean.setADMIN_VALUE(translateStr(lineTxt.substring(828, 928)));//infoBean.setLST_OPTIMIZE_TYPE(translateStr(lineTxt.substring(928, 931)));//infoBean.setOPTIMIZE_PLAN(translateStr(lineTxt.substring(931, 935)));//infoBean.setBUSI_TYPE(translateStr(lineTxt.substring(935, 938)));//infoBean.setMEMO(translateStr(lineTxt.substring(938, 1038)));//infoBean.setIS_NEW_BUSI_NODE(translateStr(lineTxt.substring(1038, 1039)));//infoBean.setCREATE_DATE(translateStr(lineTxt.substring(1039, 1047)));//infoBean.setSTRU_FOREIGN_FNAME(translateStr(lineTxt.substring(1047, 1127)));//infoBean.setSTRU_FOREIGN_SNAME(translateStr(lineTxt.substring(1127, 1207)));//infoBean.setSTRU_CHN_FNAME(translateStr(lineTxt.substring(1207, 1287)));//infoBean.setSTRU_CHN_SNAME(translateStr(lineTxt.substring(1287, 1367)));//infoBean.setBACK1(translateStr(lineTxt.substring(1367, 1467)));//infoBean.setBACK2(translateStr(lineTxt.substring(1467, 1567)));//infoBean.setBACK3(translateStr(lineTxt.substring(1571, 1667)));//infoBean.setBACK4(translateStr(lineTxt.substring(1667, 1767)));//infoBean.setBACK5(translateStr(lineTxt.substring(1767, 1867)));//infoBean.setBACK6(translateStr(lineTxt.substring(1867, 1967)));//infoBean.setBACK7(translateStr(lineTxt.substring(1967, 2067)));//infoBean.setBACK8(translateStr(lineTxt.substring(2067, 2167)));//infoBean.setBACK9(translateStr(lineTxt.substring(2167, 2267)));//infoBean.setBACK10(translateStr(lineTxt.substring(2267, 2367)));                System.out.println(infoBean.toString());                //将数据bean写入到数据库                operationDB.addRcorder(infoBean);            }        } finally {            LineIterator.closeQuietly(it);        }                }//转换为指定格式的方法public static String translateStr(String paramStr) throws Exception{String str =new String(paramStr.getBytes("ISO8859-1"),"GBK");return str;}}

另注:eclipse读取二进制BIN文件时,需要注意获取带有中午字符的字符串的真是长度, 因为我的BIIN文件是GBK格式,所以我使用ISO8859-1对数据进行解码,截取到正确的字符串片段,再将正确片段编码为GBK.


后续改进思路:

可以将该代码稍作改变,加上连接池可以提高导入速度,或使用框架结构对其进行优化。

此处200W数据量的导入时间约一小时左右。


原创粉丝点击