python3脚本使用sql loader批量导入字节文件并二次处理
来源:互联网 发布:mac osx yosemite.vdi 编辑:程序博客网 时间:2024/06/06 03:09
由于使用cx_oracle连接oracle然后导入数据文件速度太慢(要导的数据有几百G),出于速度的考虑,选择了oracle自带的sql loader工具,使用python脚本写界面,与用户交互,然后使用system()去批量调用sql loader,对于导入失败的bad数据以及日志数据,我用了c++写了一个程序,把bad文件转换成了ok的数据,然后又写了一个脚本处理这些bad数据。
1、python3调用sqlldr工具导入数据的脚本实现。
#!C:\Python34\python.exe'''by dairen 20170703'''from tkinter import *import tkinter.messagebox as messageboximport cx_Oracleimport timeimport datetimeimport osclass Application(Frame): def __init__(self, master=None): Frame.__init__(self, master,height=2000,width=4000) self.pack() self.createWidgets() def createWidgets(self): self.label_1 = Label(self,text = "文件起始日期:") self.label_2 = Label(self,text = "文件结束日期:") self.label_1.grid(row = 0,column = 0) self.label_2.grid(row = 1,column = 0) self.Input1 = Entry(self) self.Input2 = Entry(self) self.Input1.grid(row = 0,column = 1) self.Input2.grid(row = 1,column = 1) self.Button1 = Button(self, text='开始导入',width=20, command=self.runmain) self.Button1.grid(row =2,columnspan=2) def runmain(self): begin_date = self.Input1.get() or '20170607' end_date = self.Input2.get() or '20170607' messagebox.showinfo('Tips!', '您的输入是:%s 和 %s \n 导入即将开始!' %(begin_date,end_date)) #offset:358O b_Info 40-60 aaa=[' CHAR(42)',' CHAR(42)',' CHAR(42)',' CHAR(8)',' CHAR(3)', ' CHAR(1)',' CHAR(1)',' CHAR(1)',' CHAR(1)',' CHAR(1)', ' CHAR(6)',' CHAR(10)',' CHAR(11)',' CHAR(11)',' CHAR(11)', ' CHAR(11)',' CHAR(11)',' CHAR(21)',' CHAR(21)',' CHAR(21)', ' CHAR(4)',' CHAR(6)',' CHAR(12)',' CHAR(4)',' CHAR(6)', ' CHAR(4)',' CHAR(3)',' CHAR(2)',' CHAR(12)',' CHAR(6)', ' CHAR(8)',' CHAR(15)',' CHAR(65)',' CHAR(3)',' CHAR(4)', ' CHAR(6)',' CHAR(10)',' CHAR(1)',' CHAR(1)',' CHAR(5)' ,' CHAR(2)',' CHAR(2)',' CHAR(2)',' CHAR(2)',' CHAR(4)' ,' CHAR(4)',' CHAR(11)',' CHAR(11)',' NUMBER(12,0)',' NUMBER(12,0)' ,' NUMBER(12,0)',' NUMBER(12,0)',' CHAR(3)',' CHAR(3)',' NUMBER(8,0)' ,' CHAR(1)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' CHAR(8)',' CHAR(8)',' CHAR(8)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' CHAR(16)',' CHAR(2)' ,' CHAR(1)',' CHAR(2)',' CHAR(14)',' CHAR(4)',' CHAR(3)' ,' CHAR(9)',' NUMBER(12,0)',' CHAR(3)',' CHAR(1)',' CHAR(8)' ,' CHAR(1)',' CHAR(12)',' CHAR(15)',' CHAR(6)',' CHAR(4)' ,' CHAR(100)'] lenlist=[42,42,42,8,3,1,1,1,1,1, 6,10,11,11,11,11,11,21,21,21, 4,6,12,4,6,4,3,2,12,6, 8,15,40,3,4,6,10,1,1,5, 2,2,2,2,4,4,11,11,12,12, 12,12,3,3,8,1,8,8,8,8, 8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,16,2, 1,2,14,4,3,9,12,3,1,8, 1,12,15,6,4,100] title=[ 'Main_Key','Origin_Key','Er_Key','PostDate','Curr', 'If_Curr','If_Cen','Cups_Sett','If_Cvm_In','If_Single', 'Ssn_Trans','Time_Trans','Ob_Code','Trans_Code','Rec_Code', 'Cb_Code','Cvm_Code','Account11','Account_In','Account_Out', 'MsgType','ProcCode','TranAmt','ActDate','ActTime', 'MerChant_Type','Sevr_Point_In','Sevr_Point_Con','Search','AuthCode', 'Term','Ob_Id','Ob_Info','Curr_Tran','Reason_Code', 'Ssn_Trans_Ori','Time_Trans_Ori','Status_Snd','Status_Rec','Status_Trans', 'RejCode1','RejCode2','RejCode3','RejCode4','Sett_Snd_AreaCode', 'Sett_Rec_AreaCode','Sett_Snd_Code','Sett_Rec_Code' ,'Amt_Snd_Sett_dr','Amt_Snd_Sett_cr', 'Amt_Rec_Sett_dr','Amt_Rec_Sett_cr','Curr_Sett_Snd','Curr_Sett_Rec','Comm', 'Comm_Way','Comm_Snd_dr','Comm_Snd_cr','Comm_Rec_dr','Comm_Rec_cr', 'Fee_Snd_dr','Fee_Snd_cr','Fee_Rec_dr','Fee_Rec_cr','Comm_Cen_dr', 'Comm_Cen_cr','Fee_Snd_Branch_dr','Fee_Snd_Branch_cr','Fee_Rec_Branch_dr','Fee_Rec_Branch_cr', 'Fee_CanCel_Snd_dr','Fee_CanCel_Snd_cr','Fee_CanCel_Rec_dr','Fee_CanCel_Rec_cr','Fee_CanCel_Cen_dr', 'Fee_CanCel_Cen_cr','Fee_Cl_Snd_Branch_dr','Fee_Cl_Snd_Branch_cr','Fee_Cl_Rec_Branch_dr','Fee_Cl_Rec_Branch_cr', 'TranType','CardKind','CardType','Bin','Card', 'Er_Max','Ssn_Cen','TranAmt_Ori','TranCode_Ori','Reserved', 'variab1','variab2','variab3','variab4','variab5','variab6' ] x=len(title) fields=[] fields = [title[i]+aaa[i] for i in range(0,x)] fields_str = ', '.join(fields) #begin_date=input("input start date(格式:20170506):") #end_date=input("input end date(格式:20170606):") date_list = [] begin_date = datetime.datetime.strptime(begin_date, "%Y%m%d") end_date = datetime.datetime.strptime(end_date, "%Y%m%d") if (begin_date > end_date): messagebox.showinfo('error!', 'begin_date or end_date error!)') while( begin_date <= end_date): date_str = begin_date.strftime("%Y%m%d") date_list.append(date_str) begin_date += datetime.timedelta(days=1) #print(date_list) file_name_list=[] numlist=['01','02','03','04'] for datename in date_list: for num in numlist: bsi_str='BSI%s01_%s'%(datename,num) file_name_list.append(bsi_str) for num in numlist: bsi_str='BSI%s51_%s'%(datename,num) file_name_list.append(bsi_str) file_name_list.append('BSI%s88_01'%datename) file_name_list.append('BSI%s99_01'%datename) for num in numlist: bsa_str='BSA%s01_%s'%(datename,num) file_name_list.append(bsa_str) for num in numlist: bsa_str='BSA%s51_%s'%(datename,num) file_name_list.append(bsa_str) file_name_list.append('BSA%s88_01'%datename) file_name_list.append('BSA%s99_01'%datename) #print(file_name_list) table_list=[] for datename in date_list: bsi_str='BSI_%s'%datename table_list.append(bsi_str) bsa_str='BSA_%s'%datename table_list.append(bsa_str) #print(table_list) #开始计时 starttime=time.clock() try: conn = cx_Oracle.connect('xxx/xxx@xxx/orcl') #print("connect oracle success!!!") except: #print("connect oracle failed!!!") messagebox.showinfo('warning!', 'failed connect oracle!') return cursor = conn.cursor() data_num=len(date_list) file_num=len(file_name_list) table_num=len(table_list) #create table for k in range(0,table_num): table_name= table_list[k] #print("当前表名:%s "%table_name) sql_createtb = "create table %s (%s)" % (table_name, fields_str) sql_droptb='drop table %s purge'%table_name #print(sql_createtb) #print(sql_droptb) try: #print("开始建表!") cursor.execute(sql_createtb) #print("建表成功!") except: #print("表已存在!开始删表!") cursor.execute(sql_droptb) #print("删表成功!开始重新建表!") cursor.execute(sql_createtb) #print("重新建表成功!") conn.commit() cursor.close() conn.close() #execute cmd according to fiel_name succ=0 errornum=0 for i in range(0,file_num): try: file_name=file_name_list[i] if os.path.isfile(file_name): table_name=table_list[i//10] #CHARACTERSET ZHS16GBK #edit ctl files according to table_name lines=open('bsa.ctl','r').readlines() for k in range(len(lines)): if 'INTO TABLE' in lines[k]: lines[k]='INTO TABLE %s\n'%table_name f=open('bsa.ctl','w') f.writelines(lines) f.close() basedir=os.getcwd() newname=file_name+".dat" os.rename(file_name,newname) bad_dir='%s\\bad_files'%basedir log_dir='%s\\log_files'%basedir if not os.path.exists(bad_dir): os.mkdir(bad_dir) if not os.path.exists(log_dir): os.mkdir(log_dir) #import cmd_str="sqlldr xxx/xxx@xxx/orcl control=%s\\bsa.ctl data=%s "%(basedir,newname) cmd_str+="bad=%s\\bad_files\\%s.bad log=%s\\log_files\\%s.log direct=true"%(basedir,file_name,basedir,file_name) #print(cmd_str) os.system(cmd_str) succ+=1 #process bad file except: errornum+=1 endtime=time.clock() messagebox.showinfo('完成!!','耗时约:%f秒,success in %d files'%((endtime-starttime),succ))app = Application()# 设置窗口标题:app.master.title('CUPS IMPORTER')# 主消息循环:app.mainloop()
2、sqlldr规则文件的编写.
sqlldr读取文件有好几种方式,我要处理的源文件是字节流的,所以使用了按字节分隔的控制文件,如下:
options(ERRORS=20000)load DATAAPPENDINTO TABLE BSA_20170607(Main_Key POSITION(01:42) CHAR,Origin_Key POSITION(43:84) CHAR,Er_Key POSITION(85:126) CHAR,PostDate POSITION(127:134) CHAR,Curr POSITION(135:137) CHAR,If_Curr POSITION(138:138) CHAR,If_Cen POSITION(139:139) CHAR,Cups_Sett POSITION(140:140) CHAR,If_Cvm_In POSITION(141:141) CHAR,If_Single POSITION(142:142) CHAR,Ssn_Trans POSITION(143:148) CHAR,Time_Trans POSITION(149:158) CHAR,Ob_Code POSITION(159:169) CHAR,Trans_Code POSITION(170:180) CHAR,Rec_Code POSITION(181:191) CHAR,Cb_Code POSITION(192:202) CHAR,Cvm_Code POSITION(203:213) CHAR,Account11 POSITION(214:234) CHAR,Account_In POSITION(235:255) CHAR,Account_Out POSITION(256:276) CHAR,MsgType POSITION(277:280) CHAR,ProcCode POSITION(281:286) CHAR,TranAmt POSITION(287:298) CHAR,ActDate POSITION(299:302) CHAR,ActTime POSITION(303:308) CHAR,MerChant_Type POSITION(309:312) CHAR,Sevr_Point_In POSITION(313:315) CHAR,Sevr_Point_Con POSITION(316:317) CHAR,Search POSITION(318:329) CHAR,AuthCode POSITION(330:335) CHAR,Term POSITION(336:343) CHAR,Ob_Id POSITION(344:358) CHAR,Ob_Info POSITION(359:398) CHAR,Curr_Tran POSITION(399:401) CHAR,Reason_Code POSITION(402:405) CHAR,Ssn_Trans_Ori POSITION(406:411) CHAR,Time_Trans_Ori POSITION(412:421) CHAR,Status_Snd POSITION(422:422) CHAR,Status_Rec POSITION(423:423) CHAR,Status_Trans POSITION(424:428) CHAR,RejCode1 POSITION(429:430) CHAR,RejCode2 POSITION(431:432) CHAR,RejCode3 POSITION(433:434) CHAR,RejCode4 POSITION(435:436) CHAR,Sett_Snd_AreaCode POSITION(437:440) CHAR,Sett_Rec_AreaCode POSITION(441:444) CHAR,Sett_Snd_Code POSITION(445:455) CHAR,Sett_Rec_Code POSITION(456:466) CHAR,Amt_Snd_Sett_dr POSITION(467:478),Amt_Snd_Sett_cr POSITION(479:490),Amt_Rec_Sett_dr POSITION(491:502),Amt_Rec_Sett_cr POSITION(503:514),Curr_Sett_Snd POSITION(515:517) CHAR,Curr_Sett_Rec POSITION(518:520) CHAR,Comm POSITION(521:528) ,Comm_Way POSITION(529:529) CHAR,Comm_Snd_dr POSITION(530:537) ,Comm_Snd_cr POSITION(538:545) ,Comm_Rec_dr POSITION(546:553) ,Comm_Rec_cr POSITION(554:561) ,Fee_Snd_dr POSITION(562:569) ,Fee_Snd_cr POSITION(570:577) ,Fee_Rec_dr POSITION(578:585) ,Fee_Rec_cr POSITION(586:593) ,Comm_Cen_dr POSITION(594:601) ,Comm_Cen_cr POSITION(602:609) ,Fee_Snd_Branch_dr POSITION(610:617) CHAR,Fee_Snd_Branch_cr POSITION(618:625) CHAR,Fee_Rec_Branch_dr POSITION(626:633) CHAR,Fee_Rec_Branch_cr POSITION(634:641) ,Fee_CanCel_Snd_dr POSITION(642:649) ,Fee_CanCel_Snd_cr POSITION(650:657) ,Fee_CanCel_Rec_dr POSITION(658:665) ,Fee_CanCel_Rec_cr POSITION(666:673) ,Fee_CanCel_Cen_dr POSITION(674:681) ,Fee_CanCel_Cen_cr POSITION(682:689) ,Fee_Cl_Snd_Branch_dr POSITION(690:697) ,Fee_Cl_Snd_Branch_cr POSITION(698:705) ,Fee_Cl_Rec_Branch_dr POSITION(706:721) CHAR,Fee_Cl_Rec_Branch_cr POSITION(722:723) CHAR,TranType POSITION(724:724) CHAR,CardKind POSITION(725:726) CHAR,CardType POSITION(727:740) CHAR,Bin POSITION(741:744) CHAR,Card POSITION(745:747) CHAR,Er_Max POSITION(748:756) CHAR,Ssn_Cen POSITION(757:768) ,TranAmt_Ori POSITION(769:771) CHAR,TranCode_Ori POSITION(772:772) CHAR,Reserved POSITION(773:780) CHAR,variab1 POSITION(781:781) CHAR,variab2 POSITION(782:793) CHAR,variab3 POSITION(794:808) CHAR,variab4 POSITION(809:814) CHAR,variab5 POSITION(815:818) CHAR,variab6 POSITION(819:918) CHAR)
其中,我每次要针对不同的文件,把文件数据插入到不同的表,我都会读取ctl文件,修改其中的表名,然后再调用sqlldr命令。
3、c++处理sqlldr生成的bad文件,使之变成可以再次导入的文件.
头文件和cpp文件如下:
先是从网上找到的一个把gbk编码的字符数组转换成unicode编码的简单类:
// UtfFile.h: interface for the UtfFile class.///////////////////////////////////////////////////////////#if !defined(AFX_UTFFILE_H__8ED10D8A_D1A3_412F_A600_124F521CE4F1__INCLUDED_)#define AFX_UTFFILE_H__8ED10D8A_D1A3_412F_A600_124F521CE4F1__INCLUDED_#if _MSC_VER > 1000#pragma once#endif // _MSC_VER > 1000#include <windows.h> #include <stdio.h>#include <locale.h>#include <IOSTREAM>#include <FSTREAM>using namespace std;class UtfFile {public: static char * UnicodeToGB2312(WCHAR uData, char buffer[2]); // Unicode 转换成 GB2312 static WCHAR * UTF_8ToUnicode(char *pText, WCHAR &unicode); // 把UTF-8 转化成 Unicode static char * TranslateUTF8ToGB(char *str, size_t len); // 把UTF-8字符串转化成ANSI(GB2312)编码形式 char * GetString(char *str, int maxLen = 1024); // 读取一个字符串,以换行符为结束标示 char * GetLine(char *str, int maxLen); // 读取一行字符 void close(); // 关闭文件流 int open(const char *sFileName); // 用于打开一个文件 UtfFile(const char *sour); virtual ~UtfFile();public: ifstream inf;};#endif // !defined(AFX_UTFFILE_H__8ED10D8A_D1A3_412F_A600_124F521CE4F1__INCLUDED_)
该类的源文件:
/*-----------------------------------------------以下为.cpp文件内容------------------------------------------*/// UtfFile.cpp: implementation of the UtfFile class.////////////////////////////////////////////////////////////////////////#include "UtfFile.h"//////////////////////////////////////////////////////////////////////// Construction/Destruction//////////////////////////////////////////////////////////////////////UtfFile::UtfFile(const char *sour):inf(sour){ if( !inf.is_open() ) { char str[1024]; sprintf(str, "错误的文件路径,文件无法打开:%s", sour); throw runtime_error(str); }}UtfFile::~UtfFile(){}void UtfFile::close(){ inf.close();}char * UtfFile::GetLine(char *str, int maxLen){ if ( inf.eof() ) { str[0] = '\0'; } else { inf.getline(str, maxLen); TranslateUTF8ToGB(str, maxLen); } return str;}char * UtfFile::GetString(char *str, int maxLen){ if ( inf.eof() ) { str[0] = '\0'; } else { inf >> str; TranslateUTF8ToGB(str, maxLen); } return str;}char * UtfFile::UnicodeToGB2312(WCHAR uData, char buffer[2]){ WideCharToMultiByte(CP_ACP,NULL, &uData, 1,buffer,sizeof(WCHAR),NULL,NULL); return buffer;}WCHAR * UtfFile::UTF_8ToUnicode(char *pText, WCHAR &unicode){ /* http://blog.csdn.net/liuzhiyuan1982/article/details/3911150UTF-8是一种多字节编码的字符集,表示一个Unicode字符时,它可以是1个至多个字节,在表示上有规律:1字节:0xxxxxxx2字节:110xxxxx 10xxxxxx3字节:1110xxxx 10xxxxxx 10xxxxxx4字节:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx*/ char *uchar = (char *)&unicode; uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F); uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); return &unicode;}char * UtfFile::TranslateUTF8ToGB(char *str, size_t len){ char * newCharBuffer = new char[len]; int index =0; int nCBIndex = 0; WCHAR wTemp = 0; char cTemp[2] = " "; while(index < len) { if ( str[index] == 0 ) break; else if(str[index] > 0) // 如果是GB2312的字符 { newCharBuffer[nCBIndex] = str[index]; //直接复制 index += 1; //源字符串偏移量1 nCBIndex += 1; //目标字符串偏移量1 } else //如果是UTF-8的字符 { UTF_8ToUnicode(str + index, wTemp); //先把UTF-8转成Unicode UnicodeToGB2312(wTemp, &newCharBuffer[nCBIndex]); //再把Unicode 转成 GB2312 index += 3; //源字符串偏移量3 nCBIndex += 2; //目标字符串偏移量2 因为一个中文UTF-8占3个字节,GB2312占两个字节 } } newCharBuffer[nCBIndex] = '\0'; //结束符 strcpy( str, newCharBuffer ); delete newCharBuffer; //避免内存泄漏,这是对源代码的稍许修改 newCharBuffer = NULL; return str; }int UtfFile::open(const char *sFileName){ inf.open(sFileName); return inf.is_open();}
下面是我完成的main文件,大致就是对要处理的文件,读出一行,转码,替换里面的半中文字符‘?’,然后写入要输出的文件:
#include "UtfFile.h"#include<string>#include<vector>#include<io.h>#include<cstdio>using namespace std;void getFiles(string path, vector<string>& files){ //文件句柄 intptr_t hFile = 0; //文件信息 struct _finddata_t fileinfo; string p; if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1) { do { //如果是目录,迭代之 //如果不是,加入列表 if ((fileinfo.attrib & _A_SUBDIR)) { if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0) getFiles(p.assign(path).append("\\").append(fileinfo.name), files); } else { files.push_back(p.assign(path).append("\\").append(fileinfo.name)); } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); }}char* G2U(const char* gb2312){ int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); char* str = new char[len + 1]; memset(str, 0, len + 1); WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL); if (wstr) delete[] wstr; return str;}void do_bad(string in_file, string out_file){ UtfFile myfile(in_file.c_str()); //读bad文件 ofstream outfile; outfile.open(out_file, ios::trunc); //写文件 string tempstr; char* mystr; while (std::getline(myfile.inf, tempstr)) { mystr = const_cast<char*>(tempstr.c_str()); mystr = G2U(mystr); UtfFile::TranslateUTF8ToGB(mystr, 918); char* str_bk = mystr; for (int j = 0; j<918; j++)//可用find函数优化 { if (*mystr == '?') //replace { *mystr = '1'; break; } mystr++; } outfile << str_bk << '\n';// } outfile.close(); myfile.close();}int main(){ //char strbuffer[1024]={0}; //mystr=strbuffer; // char * filePath = "C:\\pythonfile\\sqlldr_bsibsa\\bad_process"; //\\bad_files vector<string> files; vector<string> in_files; vector<string> out_files; ////获取该路径下的所有文件 getFiles(filePath, files); //BSA2017060701_01.bad int total_size = files.size(); for (int i = 0; i < total_size; i++) { //cout << files[i] << endl; int pos = files[i].find_last_of('\\'); string name_with_exp(files[i].substr(pos + 1)); string base_dir(files[i].substr(0,pos+1)); cout << name_with_exp.size() << endl; if (name_with_exp.size() == 20) { in_files.push_back(files[i]); //cout << s << endl; string name_only = name_with_exp.substr(0, 16); // char str[50]; cout << name_only << endl; // sprintf(str,"%s.bad",name_only); // cout << str << endl; string full_path; full_path = base_dir + name_only + ".dat"; //cout << full_path << endl; out_files.push_back(full_path); } } int input_size = in_files.size(); for (int i = 0; i < input_size; i++) { do_bad(in_files[i], out_files[i]); } //C:\\pythonfile\\sql_loader\\bad_test\\BSA2016010101.dat //C:\\pythonfile\\sql_loader\\bad_test\\BSA2016010101.bad // //UtfFile myfile("C:\\pythonfile\\sql_loader\\bad_test\\1.dat"); //读bad文件 return 0;}
4、c++程序处理好bad数据使之可以用之后,我又写了一个脚本,和专门的ctl文件,再次把这些bad文件批量导进去:
import osbasedir=os.getcwd()#print(basedir)files_list=os.listdir(basedir)bad_dir='%s\\badd_files'%basedirlog_dir='%s\\logg_files'%basedirif not os.path.exists(bad_dir): os.mkdir(bad_dir)if not os.path.exists(log_dir): os.mkdir(log_dir)succ=0for file_name in files_list: if len(file_name) != 20: continue if ('BS' in file_name) and ('dat' in file_name): #print(file_name) table_name=file_name[0:3]+'_'+file_name[3:11] #print(table_name) #newname = file_name[:-4] + '.dat' #os.rename(file_name,newname) lines=open('bsa_bad.ctl','r').readlines() for k in range(len(lines)): if 'INTO TABLE' in lines[k]: lines[k]='INTO TABLE %s\n'%table_name f=open('bsa_bad.ctl','w') f.writelines(lines) f.close() file_base=file_name[0:16] #print(file_base) #import cmd_str="sqlldr xxx/xxx@xxx/orcl control=%s\\bsa_bad.ctl data=%s "%(basedir,file_name) cmd_str+="bad=%s\\badd_files\\%s.bad log=%s\\logg_files\\%s.log direct=true"%(basedir,file_base,basedir,file_base) #print(cmd_str) os.system(cmd_str) succ+=1 else: continueprint("succ:%d"%succ)
这个文件又生成了极小的bad文件,我分析了下,都是个别在源文件就是乱码的数据,可以分门别类留存下来,不必理会。
注意:其中oracle登陆远程服务器的用户名,密码,ip地址都被我用xxx代替了
欢迎提问!
阅读全文
1 0
- python3脚本使用sql loader批量导入字节文件并二次处理
- 批量导入sql文件(shell脚本)
- 批量导入Sql脚本
- SQL*LOADER导入CSV文件
- 使用SQL*Loader将CSV文件数据导入ORACLE数据表
- 使用SQL*Loader将CSV文件数据导入ORACLE数据表
- 使用SQL*Loader将CSV文件数据导入ORACLE
- 使用SQL*LOADER导入数据
- Sql server批量处理脚本
- SQL中批量处理脚本
- 检测压缩包并处理的Linux脚本(解压、批量修改文件名、导入)
- SQL*Loader: 从文件导入数据库
- mysql批量导入sql文件
- Mysql批量导入sql文件
- MySQL 批量导入sql文件
- oracle sql*loader批量导入多个CSV
- oracle如何批量导入sql脚本
- oracle使用bat脚本批量执行sql文件
- C#读取excel文件数据丢失问题
- 浅谈团队项目管理
- Android Butter Knife 框架——最好用的View注入
- 从输入网址到显示网页的全过程分析
- WEB前端使用 webpack + reack 搭建框架 02 本地服务 与 热替换(react-hot-loader)
- python3脚本使用sql loader批量导入字节文件并二次处理
- Python3 socketserver 实现局域网通信
- spark企业经典案例之手机app流量统计
- 操作系统——页面置换算法
- Myeclipse 更改web项目的访问名
- 首页学习--banner的动画及轮播
- 【BZOJ】2431 [HAOI2009]逆序对数列 递推
- Mac OS X 安装Tensorflow
- Qt Qsyntaxhighlighter给QTextEdit设置丰富的语法高亮格式