MySQL数据库导入BIN格式定长文件
来源:互联网 发布:上证综合指数季度数据 编辑:程序博客网 时间:2024/04/19 13:07
环境:MySQL5.7 widows10 jdk1.8
另注:eclipse读取二进制BIN文件时,需要注意获取带有中午字符的字符串的真是长度, 因为我的BIIN文件是GBK格式,所以我使用ISO8859-1对数据进行解码,截取到正确的字符串片段,再将正确片段编码为GBK.
工具:Navicat Premium 11.1.14.0 eclipse mar2
实现思路:
因为没有分隔符对数据进行分割,所以不能使用 load data infile 命令导入。故此需要自己写程序将数据解析导入。
将BIN格式的定长文件按照行读取,根据字段长度解析后,导入数据库。
步骤:
1.确定数据文件中的行长度,确定每一个字段的长度
2.将数据库表按照需求建好
3.创建bean对应数据库的每一个字段,私有变量的处理setter/getter 和 toString。
public class InfoBean {private String STRU_ID;public String getSTRU_ID() {return STRU_ID;}public void setSTRU_ID(String sTRU_ID) {STRU_ID = sTRU_ID;}}4.创建数据库的操作类
import java.lang.reflect.Field;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;public class OperationDB {private Connection con = null;public void addRcorder(InfoBean infoBean) throws SQLException, Exception {if (con == null) {con = BeyondbConnection.getConnection();}String sql = "insert into bom_cmpstruinfo (STRU_ID,STRU_FNAME,STRU_SNAME,OLDSYS_STRUID,FLICENCE_ID,STRU_ADDR,ZIPCODE,PHONE,STRU_SIGN,STRU_LV,ADMIN_LV,SUP_STRU,SETUP_TIME,LST_ALT_TYPE,LST_ALT_TIME,STRU_STATE,BLICENCE_ID,REVOKE_TIME,DIST_SIGN,STRU_GRADE,CODECERT_ID,TOWN_FLAG,BUSI_AREA,BUSI_SITE_USE,FEXCHANGE_FLAG,MAN_GRADE,CHARGE_PROP,PROFESSION_LEVEL,NODE_TYPE,ECON_AREA,IS_HUN_CITY,IS_HUN_COUNTY,COUNTRY,VILLAGE,NP_OPER_TYPE,MANAGE_STRU_ID,SPECIALTY_PROP,FINANCE_STRUID,PBANK_STRUID,ADMIN_CODE,ADMIN_VALUE,LST_OPTIMIZE_TYPE,OPTIMIZE_PLAN,BUSI_TYPE,MEMO,IS_NEW_BUSI_NODE,CREATE_DATE,STRU_FOREIGN_FNAME,STRU_FOREIGN_SNAME,STRU_CHN_FNAME,STRU_CHN_SNAME,BACK1,BACK2,BACK3,BACK4,BACK5,BACK6,BACK7,BACK8,BACK9,BACK10)"+ " values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";PreparedStatement pstmt = con.prepareStatement(sql);String[] arr = new String[]{"STRU_ID","STRU_FNAME","STRU_SNAME","OLDSYS_STRUID","FLICENCE_ID","STRU_ADDR","ZIPCODE","PHONE","STRU_SIGN","STRU_LV","ADMIN_LV","SUP_STRU","SETUP_TIME","LST_ALT_TYPE","LST_ALT_TIME","STRU_STATE","BLICENCE_ID","REVOKE_TIME","DIST_SIGN","STRU_GRADE","CODECERT_ID","TOWN_FLAG","BUSI_AREA","BUSI_SITE_USE","FEXCHANGE_FLAG","MAN_GRADE","CHARGE_PROP","PROFESSION_LEVEL","NODE_TYPE","ECON_AREA","IS_HUN_CITY","IS_HUN_COUNTY","COUNTRY","VILLAGE","NP_OPER_TYPE","MANAGE_STRU_ID","SPECIALTY_PROP","FINANCE_STRUID","PBANK_STRUID","ADMIN_CODE","ADMIN_VALUE","LST_OPTIMIZE_TYPE","OPTIMIZE_PLAN","BUSI_TYPE","MEMO","IS_NEW_BUSI_NODE","CREATE_DATE","STRU_FOREIGN_FNAME","STRU_FOREIGN_SNAME","STRU_CHN_FNAME","STRU_CHN_SNAME","BACK1","BACK2","BACK3","BACK4","BACK5","BACK6","BACK7","BACK8","BACK9","BACK10"};for (int i = 0; i < arr.length; i++) {Field field = infoBean.getClass().getDeclaredField(arr[i]);field.setAccessible(true);Object object = field.get(infoBean);pstmt.setObject(i+1,object);}//返回更新数据的条数int executeUpdate = pstmt.executeUpdate();System.out.println("本次更新数据:"+executeUpdate+"条。");
//下面这种是笨方法,一个个的赋值。已废弃。//pstmt.setString(1,infoBean.getSTRU_ID());//pstmt.setString(2,infoBean.getSTRU_CHN_FNAME());//pstmt.setString(3,infoBean.getSTRU_SNAME());//pstmt.setString(4,infoBean.getOLDSYS_STRUID());//pstmt.setString(5,infoBean.getFLICENCE_ID());//pstmt.setString(6,infoBean.getSTRU_ADDR());//pstmt.setString(7,infoBean.getZIPCODE());//pstmt.setString(8,infoBean.getPHONE());//pstmt.setString(9,infoBean.getSTRU_SIGN());//pstmt.setString(1,infoBean.getSTRU_LV());//pstmt.setString(1,infoBean.getADMIN_LV());//pstmt.setString(1,infoBean.getSUP_STRU());//pstmt.setString(1,infoBean.getSETUP_TIME());//pstmt.setString(1,infoBean.getLST_ALT_TYPE();//pstmt.setString(1,infoBean.getLST_ALT_TIME();//pstmt.setString(1,infoBean.getSTRU_STATE();//pstmt.setString(1,infoBean.getBLICENCE_ID();//pstmt.setString(1,infoBean.getREVOKE_TIME();//pstmt.setString(1,infoBean.getDIST_SIGN();//pstmt.setString(1,infoBean.getSTRU_GRADE();//pstmt.setString(1,infoBean.getCODECERT_ID();//pstmt.setString(1,infoBean.getTOWN_FLAG();//pstmt.setString(1,infoBean.getBUSI_AREA();//pstmt.setString(1,infoBean.getBUSI_SITE_USE();//pstmt.setString(1,infoBean.getFEXCHANGE_FLAG();//pstmt.setString(1,infoBean.getMAN_GRADE();//pstmt.setString(1,infoBean.getCHARGE_PROP();//pstmt.setString(1,infoBean.getPROFESSION_LEVEL();//pstmt.setString(1,infoBean.getNODE_TYPE();//pstmt.setString(1,infoBean.getECON_AREA();//pstmt.setString(1,infoBean.getIS_HUN_CITY();//pstmt.setString(1,infoBean.getIS_HUN_COUNTY();//pstmt.setString(1,infoBean.getCOUNTRY();//pstmt.setString(1,infoBean.getVILLAGE();//pstmt.setString(1,infoBean.getNP_OPER_TYPE();//pstmt.setString(1,infoBean.getMANAGE_STRU_ID();//pstmt.setString(1,infoBean.getSPECIALTY_PROP();//pstmt.setString(1,infoBean.getFINANCE_STRUID();//pstmt.setString(1,infoBean.getPBANK_STRUID();//pstmt.setString(1,infoBean.getADMIN_CODE();//pstmt.setString(1,infoBean.getADMIN_VALUE();//pstmt.setString(1,infoBean.getLST_OPTIMIZE_TYPE();//pstmt.setString(1,infoBean.getOPTIMIZE_PLAN();//pstmt.setString(1,infoBean.getBUSI_TYPE();//pstmt.setString(1,infoBean.getMEMO();//pstmt.setString(1,infoBean.getIS_NEW_BUSI_NODE();//pstmt.setString(1,infoBean.getCREATE_DATE();//pstmt.setString(1,infoBean.getSTRU_FOREIGN_FNAME();//pstmt.setString(1,infoBean.getSTRU_FOREIGN_SNAME();//pstmt.setString(1,infoBean.getSTRU_CHN_FNAME();//pstmt.setString(1,infoBean.getSTRU_CHN_SNAME();//pstmt.setString(1,infoBean.getBACK1();//pstmt.setString(1,infoBean.getBACK2();//pstmt.setString(1,infoBean.getBACK3();//pstmt.setString(1,infoBean.getBACK4();//pstmt.setString(1,infoBean.getBACK5();//pstmt.setString(1,infoBean.getBACK6();//pstmt.setString(1,infoBean.getBACK7();//pstmt.setString(1,infoBean.getBACK8();//pstmt.setString(1,infoBean.getBACK9();//pstmt.setString(1,infoBean.getBACK10();}}5.jdbc链接数据库的类,此时使用的是最简单的JDBC,(并未使用连接池.)
import java.sql.Connection;import java.sql.DriverManager;import java.sql.SQLException;import java.util.logging.Level;import java.util.logging.Logger;public class BeyondbConnection {public static Connection getConnection() {Connection con = null;String driver = "com.mysql.jdbc.Driver";String url = "jdbc:mysql://localhost:3306/数据库名称";String user = "root";String password = "root";try {con = DriverManager.getConnection(url, user, password);} catch (SQLException ex) {Logger.getLogger(BeyondbConnection.class.getName()).log(Level.SEVERE, null, ex);}return con;}}
6.编写主方法,读取文件并解析数据,将数据解析后的各字段插入到对应的字段中,之后通过数据库连接, 将数据导入到MySQL数据库中。
该方法使用反射通过字段名和各字段的指定长度,获取到对应的数据bean的属性,解决了表中太多字段造成代码冗长,重复率太高。
注:反射获取私有变量时需要加上
field.setAccessible(true);//解决掉不能设置私有变量的限制
而Field field = infoBean.getClass().getDeclaredField(arr[i]); 则是获取到该数据bean中所有属性(包括私有)
import java.io.File;import java.io.UnsupportedEncodingException;import java.lang.reflect.Field;import org.apache.commons.io.FileUtils;import org.apache.commons.io.LineIterator;import sun.reflect.misc.FieldUtil;public class test {public static void main(String[] args) throws Exception{InfoBean infoBean = new InfoBean();OperationDB operationDB = new OperationDB();// String filePath = "D:\\data1.txt";// ReadFile readFile=new ReadFile(); // readFile.readTxtFile(filePath);//缺点:最大支持1G的文件,一次读取所有的数据加载到内存中,容易导致崩溃。//List<String> strings = FileUtils.readLines(new File("D://data1.txt"), "gbk");// for (String string : strings) {// System.out.println(string + string.length());// }//优点:分段读取数据文件,没有最大文件限制,不会造成太大的压力。 LineIterator it = FileUtils.lineIterator(new File("D://data1.txt"), "GBK");// LineIterator it = FileUtils.lineIterator(new File("D://BOM_CMPSTRUINFO0000000000.BIN"), "GBK"); try { while (it.hasNext()) { String lineTxt = it.nextLine(); lineTxt=new String(lineTxt.getBytes("GBK"), "ISO8859-1"); //优势方法,使用反射的方法获取和设置原来字段的值。 String[] arr = new String[]{"STRU_ID","STRU_FNAME","STRU_SNAME","OLDSYS_STRUID","FLICENCE_ID","STRU_ADDR","ZIPCODE","PHONE","STRU_SIGN","STRU_LV","ADMIN_LV","SUP_STRU","SETUP_TIME","LST_ALT_TYPE","LST_ALT_TIME","STRU_STATE","BLICENCE_ID","REVOKE_TIME","DIST_SIGN","STRU_GRADE","CODECERT_ID","TOWN_FLAG","BUSI_AREA","BUSI_SITE_USE","FEXCHANGE_FLAG","MAN_GRADE","CHARGE_PROP","PROFESSION_LEVEL","NODE_TYPE","ECON_AREA","IS_HUN_CITY","IS_HUN_COUNTY","COUNTRY","VILLAGE","NP_OPER_TYPE","MANAGE_STRU_ID","SPECIALTY_PROP","FINANCE_STRUID","PBANK_STRUID","ADMIN_CODE","ADMIN_VALUE","LST_OPTIMIZE_TYPE","OPTIMIZE_PLAN","BUSI_TYPE","MEMO","IS_NEW_BUSI_NODE","CREATE_DATE","STRU_FOREIGN_FNAME","STRU_FOREIGN_SNAME","STRU_CHN_FNAME","STRU_CHN_SNAME","BACK1","BACK2","BACK3","BACK4","BACK5","BACK6","BACK7","BACK8","BACK9","BACK10"}; int[] nums = new int[]{10, 80, 80, 15, 40, 120, 20, 40, 3, 3, 3, 10, 6, 3, 6, 3, 36, 6, 3, 3, 36, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 100, 40, 3, 10, 80, 14, 14, 6, 100, 3, 4, 3, 100, 1, 8, 80, 80, 80, 80, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100}; int sumNum=0; for (int i = 0; i < arr.length; i++) { //反射获取成员变量的私有字段 Field field = infoBean.getClass().getDeclaredField(arr[i]); //解决掉不能设置私有变量的限制 field.setAccessible(true); if(arr[i]=="BUSI_AREA"){ field.set(infoBean,Integer.parseInt(lineTxt.substring(sumNum,sumNum+nums[i]).trim())); sumNum+=nums[i]; }else{ field.set(infoBean,translateStr(lineTxt.substring(sumNum,sumNum+nums[i]).trim())); sumNum+=nums[i]; }} //繁琐方法,一个个计算截取字符串的长度。已废弃// infoBean.setSTRU_ID(translateStr(lineTxt.substring(0, 10)));//infoBean.setSTRU_CHN_FNAME(translateStr(lineTxt.substring(10, 80)));//infoBean.setSTRU_SNAME(translateStr(lineTxt.substring(80, 170)));//infoBean.setOLDSYS_STRUID(translateStr(lineTxt.substring(170, 185)));//infoBean.setFLICENCE_ID(translateStr(lineTxt.substring(185, 225)));//infoBean.setSTRU_ADDR(translateStr(lineTxt.substring(225, 345)));//infoBean.setZIPCODE(translateStr(lineTxt.substring(345, 365)));//infoBean.setPHONE(translateStr(lineTxt.substring(365, 405)));//infoBean.setSTRU_SIGN(translateStr(lineTxt.substring(405, 408)));//infoBean.setSTRU_LV(translateStr(lineTxt.substring(408, 411)));//infoBean.setADMIN_LV(translateStr(lineTxt.substring(411, 414)));//infoBean.setSUP_STRU(translateStr(lineTxt.substring(414, 424)));//infoBean.setSETUP_TIME(translateStr(lineTxt.substring(424, 430)));//infoBean.setLST_ALT_TYPE(translateStr(lineTxt.substring(430, 433)));//infoBean.setLST_ALT_TIME(translateStr(lineTxt.substring(433, 439)));//infoBean.setSTRU_STATE(translateStr(lineTxt.substring(439, 442)));//infoBean.setBLICENCE_ID(translateStr(lineTxt.substring(442, 478)));//infoBean.setREVOKE_TIME(translateStr(lineTxt.substring(478, 484)));//infoBean.setDIST_SIGN(translateStr(lineTxt.substring(484, 487)));//infoBean.setSTRU_GRADE(translateStr(lineTxt.substring(487, 490)));//infoBean.setCODECERT_ID(translateStr(lineTxt.substring(490, 526)));//infoBean.setTOWN_FLAG(translateStr(lineTxt.substring(526, 529)));//infoBean.setBUSI_AREA(Integer.parseInt(translateStr(lineTxt.substring(529, 538))));//infoBean.setBUSI_SITE_USE(translateStr(lineTxt.substring(538, 541)));//infoBean.setFEXCHANGE_FLAG(translateStr(lineTxt.substring(541, 544)));//infoBean.setMAN_GRADE(translateStr(lineTxt.substring(544, 547)));//infoBean.setCHARGE_PROP(translateStr(lineTxt.substring(547, 550)));//infoBean.setPROFESSION_LEVEL(translateStr(lineTxt.substring(550, 553)));//infoBean.setNODE_TYPE(translateStr(lineTxt.substring(553, 556)));//infoBean.setECON_AREA(translateStr(lineTxt.substring(556, 559)));//infoBean.setIS_HUN_CITY(translateStr(lineTxt.substring(559, 562)));//infoBean.setIS_HUN_COUNTY(translateStr(lineTxt.substring(562, 565)));//infoBean.setCOUNTRY(translateStr(lineTxt.substring(565, 665)));//infoBean.setVILLAGE(translateStr(lineTxt.substring(665, 705)));//infoBean.setNP_OPER_TYPE(translateStr(lineTxt.substring(705, 708)));//infoBean.setMANAGE_STRU_ID(translateStr(lineTxt.substring(708, 718)));//infoBean.setSPECIALTY_PROP(translateStr(lineTxt.substring(718, 798)));//infoBean.setFINANCE_STRUID(translateStr(lineTxt.substring(798, 812)));//infoBean.setPBANK_STRUID(translateStr(lineTxt.substring(812, 826)));//infoBean.setADMIN_CODE(translateStr(lineTxt.substring(826, 828)));//infoBean.setADMIN_VALUE(translateStr(lineTxt.substring(828, 928)));//infoBean.setLST_OPTIMIZE_TYPE(translateStr(lineTxt.substring(928, 931)));//infoBean.setOPTIMIZE_PLAN(translateStr(lineTxt.substring(931, 935)));//infoBean.setBUSI_TYPE(translateStr(lineTxt.substring(935, 938)));//infoBean.setMEMO(translateStr(lineTxt.substring(938, 1038)));//infoBean.setIS_NEW_BUSI_NODE(translateStr(lineTxt.substring(1038, 1039)));//infoBean.setCREATE_DATE(translateStr(lineTxt.substring(1039, 1047)));//infoBean.setSTRU_FOREIGN_FNAME(translateStr(lineTxt.substring(1047, 1127)));//infoBean.setSTRU_FOREIGN_SNAME(translateStr(lineTxt.substring(1127, 1207)));//infoBean.setSTRU_CHN_FNAME(translateStr(lineTxt.substring(1207, 1287)));//infoBean.setSTRU_CHN_SNAME(translateStr(lineTxt.substring(1287, 1367)));//infoBean.setBACK1(translateStr(lineTxt.substring(1367, 1467)));//infoBean.setBACK2(translateStr(lineTxt.substring(1467, 1567)));//infoBean.setBACK3(translateStr(lineTxt.substring(1571, 1667)));//infoBean.setBACK4(translateStr(lineTxt.substring(1667, 1767)));//infoBean.setBACK5(translateStr(lineTxt.substring(1767, 1867)));//infoBean.setBACK6(translateStr(lineTxt.substring(1867, 1967)));//infoBean.setBACK7(translateStr(lineTxt.substring(1967, 2067)));//infoBean.setBACK8(translateStr(lineTxt.substring(2067, 2167)));//infoBean.setBACK9(translateStr(lineTxt.substring(2167, 2267)));//infoBean.setBACK10(translateStr(lineTxt.substring(2267, 2367))); System.out.println(infoBean.toString()); //将数据bean写入到数据库 operationDB.addRcorder(infoBean); } } finally { LineIterator.closeQuietly(it); } }//转换为指定格式的方法public static String translateStr(String paramStr) throws Exception{String str =new String(paramStr.getBytes("ISO8859-1"),"GBK");return str;}}
另注:eclipse读取二进制BIN文件时,需要注意获取带有中午字符的字符串的真是长度, 因为我的BIIN文件是GBK格式,所以我使用ISO8859-1对数据进行解码,截取到正确的字符串片段,再将正确片段编码为GBK.
后续改进思路:
可以将该代码稍作改变,加上连接池可以提高导入速度,或使用框架结构对其进行优化。
此处200W数据量的导入时间约一小时左右。
阅读全文
1 0
- MySQL数据库导入BIN格式定长文件
- Spring Batch 例子: 导入定长文件到数据库
- csv格式数据导入mysql数据库
- 将csv格式数据导入MySql数据库
- 将csv格式数据导入MySql数据库
- **.sql文件导入mysql数据库
- mysql 数据库导入frm文件
- sql文件导入mysql数据库
- MySQL数据库导入.sql文件
- csv文件导入mysql数据库
- SQL文件导入MySQL数据库
- mysql数据库文件夹中的mysql-bin.00001是什么文件?
- 使用mysql-bin.0000xx日志文件恢复数据库
- 关于.bin格式的文件
- mysql数据库sql文件导入导出命令
- 如何将frm文件导入MySql数据库
- 导入csv文件到mysql数据库
- 如何将frm文件导入MySql数据库
- oracle自增长主键(润乾报表)
- Android 事件处理
- 多线程的简介及方法
- 百度地图---dlopen failed: "/data/data/com.zoommax.car/files/libs/libBaiduMapSDK_base_v4_2_1.so" is 32-bi
- 实体忘记close就无法选择
- MySQL数据库导入BIN格式定长文件
- TCP服务器编程步骤
- 洛谷P1514 引水入城(深搜,贪心)
- Web开发之用canvas2image.js将canvas保存为图片(实现页面截图下载功能)
- 学习资源
- 数据源不定时间段后连接中断(解决方法:连接关闭后设置自启动,重新自动连接;Jboss异常:SQL Error 17002和SQL Error 17008
- 掌握那么多Word技巧有什么用?还不如掌握这几个比较实在!
- codeforces 864A 之 Fair Game
- 《Spring技术内幕》学习笔记6——IoC容器的高级特性