Pro*C实现导出Oracle表为文本文件

来源:互联网 发布:淘宝客服上下班时间 编辑:程序博客网 时间:2024/05/18 07:24

旧文重发,原文刊载于2006年6月8日。

近日工作需要,要大批量导出数据,开始用SQL*Plus的SPOOL功能。结果老是提示“xrealloc: cannot reallocate XXX bytes”。而且每次发生的时间还不同,有时候是到文件1G就出错,有时候是2G多出错。怀疑是32位Linux问题,换到Solaris 8上还是如故。客户端都是9201的,到Metalink上搜索没有结果。郁闷之余到Oracle-l上发了个邮件,收到一位高手的指点,说可能是内存泄露问题,SQL*Plus的开发人员估计也不会想到有人会导出如此大量数据。在邮件往复之间,那位高手提醒可以用UTL_FILE来导出。忽然想起Tom的两本书里都有提到用PL/SQL来导出,于是翻出书里的那个网址http://asktom.oracle.com/~tkyte/flat/index.html,进去一看,PL/SQL的效率比SQL*Plus要高,缺点是生成的文件必须在主机上。SQL*Plus可以调整arraysize来提高效率(当然经我实践大数据量时还有内存泄露之虞)。Pro*C是高效的方法,缺点是需要写程序、编译。
硬着头皮看Pro*C咯,好在Tom给了原始代码,抓下来放到一台Linux的机器上proc报错,应该是proc配置include库的问题,找到一个makefile,倒是没啥问题了,但是最后链接失败,报无法识别’sqlca’。到网上一搜,和我一样晕的人还有,要修改代码,多include进,但是看Tom的帖子似乎没提到这个问题,不知道为什么。
下面是修改后的源代码,我顺便在里面修改了会话的一些参数,以处理中文和时间格式:

#include#include#include#include#define MAX_VNAME_LEN     30#define MAX_INAME_LEN     30static char *   USERID = NULL;static char *   SQLSTMT = NULL;static char *   ARRAY_SIZE = "10";#define vstrcpy( a, b ) \(strcpy( a.arr, b ), a.len = strlen( a.arr ), a.arr)EXEC SQL INCLUDE SQLCA;EXEC SQL INCLUDE sqlda;extern SQLDA *sqlald();extern void   sqlclu();static void die( char * msg ){fprintf( stderr, "%s\n", msg );exit(1);}/*this array contains a default mappingI am using to constrain thelengths of returned columns.  It is mapping,for example, the OracleNUMBER type (type code = 2) to be 45 characterslong in a string.*/static int lengths[] ={ -1, 0, 45, 0, 0, 0, 0, 0, 2000, 0, 0,18, 25, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 512, 2000 };static void process_parms( argc, argv )int    argc;char *    argv[];{int    i;for( i = 1; i < argc; i++ ){if ( !strncmp( argv[i], "userid=", 7 ) )USERID = argv[i]+7;elseif ( !strncmp( argv[i], "sqlstmt=", 8 ) )SQLSTMT = argv[i]+8;elseif ( !strncmp( argv[i], "arraysize=", 10 ) )ARRAY_SIZE = argv[i]+10;else{fprintf( stderr,"usage: %s %s %s\n",argv[0],"userid=xxx/xxx sqlstmt=query ","arraysize=\n" );exit(1);}}if ( USERID == NULL  || SQLSTMT == NULL ){fprintf( stderr,"usage: %s %s %s\n",argv[0],"userid=xxx/xxx sqlstmt=query ","arraysize=\n" );exit(1);}}static void sqlerror_hard(){EXEC SQL WHENEVER SQLERROR CONTINUE;fprintf(stderr,"\nORACLE error detected:");fprintf(stderr,"\n% .70s \n", sqlca.sqlerrm.sqlerrmc);EXEC SQL ROLLBACK WORK RELEASE;exit(1);}static SQLDA * process_1(char * sqlstmt, int array_size ){SQLDA *    select_dp;int     i;int        j;int        null_ok;int        precision;int        scale;int        size = 10;fprintf( stderr, "Unloading '%s'\n", sqlstmt );fprintf( stderr, "Array size = %d\n", array_size );EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();EXEC SQL PREPARE S FROM :sqlstmt;EXEC SQL DECLARE C CURSOR FOR S;if ((select_dp = sqlald(size,MAX_VNAME_LEN,MAX_INAME_LEN))== NULL )die( "Cannot allocate  memory for select descriptor." );select_dp->N = size;EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;if ( !select_dp->F ) return NULL;if (select_dp->F < 0)     {         size = -select_dp->F;sqlclu( select_dp );if ((select_dp =sqlald (size, MAX_VNAME_LEN, MAX_INAME_LEN))== NULL )die( "Cannot allocate  memory for descriptor." );EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;}select_dp->N = select_dp->F;for (i = 0; i < select_dp->N; i++)select_dp->I[i] = (short *) malloc(sizeof(short) *array_size );for (i = 0; i < select_dp->F; i++){sqlnul (&(select_dp->T[i]),&(select_dp->T[i]), &null_ok);if ( select_dp->T[i] <                      sizeof(lengths)/sizeof(lengths[0]) )         {             if ( lengths[select_dp->T[i]] )select_dp->L[i]  = lengths[select_dp->T[i]];else select_dp->L[i] += 5;}else select_dp->L[i] += 5;select_dp->T[i] = 5;select_dp->V[i] = (char *)malloc( select_dp->L[i] *array_size );for( j = MAX_VNAME_LEN-1;j > 0 && select_dp->S[i][j] == ' ';j--);fprintf (stderr,"%s%.*s", i?",":"", j+1, select_dp->S[i]);}fprintf( stderr, "\n" );EXEC SQL OPEN C;return select_dp;}static void process_2( SQLDA * select_dp, int array_size ){int    last_fetch_count;int        row_count = 0;short    ind_value;char    * char_ptr;int    i,j;for ( last_fetch_count = 0;;last_fetch_count = sqlca.sqlerrd[2] ){EXEC SQL FOR :array_size FETCH CUSING DESCRIPTOR select_dp;for( j=0; j < sqlca.sqlerrd[2]-last_fetch_count; j++ ){for (i = 0; i < select_dp->F; i++){ind_value = *(select_dp->I[i]+j);char_ptr  = select_dp->V[i] +(j*select_dp->L[i]);printf( "%s%s", i?",":"",ind_value?"(null)":char_ptr );}row_count++;printf( "\n" );}if ( sqlca.sqlcode > 0 ) break;}sqlclu(select_dp);EXEC SQL CLOSE C;EXEC SQL COMMIT WORK;fprintf( stderr, "%d rows extracted\n", row_count );}main( argc, argv )int    argc;char *    argv[];{EXEC SQL BEGIN DECLARE SECTION;VARCHAR   oracleid[50];EXEC SQL END DECLARE SECTION;SQLDA    * select_dp;process_parms( argc, argv );/* Connect to ORACLE. */vstrcpy( oracleid, USERID );EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();EXEC SQL CONNECT :o racleid;fprintf(stderr, "\nConnected to ORACLE as user: %s\n\n",oracleid.arr);EXEC SQL ALTER SESSIONSET NLS_DATE_FORMAT = 'YYYYMMDDHH24:MI:SS';EXEC SQL ALTER SESSIONSET NLS_LANGUAGE = "SIMPLIFIED CHINESE";select_dp = process_1( SQLSTMT, atoi(ARRAY_SIZE) );process_2( select_dp , atoi(ARRAY_SIZE));/* Disconnect from ORACLE. */EXEC SQL COMMIT WORK RELEASE;exit(0);}

顺便附上我的makefile,在RHEL AS3上用的:

CC=gccLD=ldOBJ=obj/##########数据库编译选项###########PROCFLAGS=unsafe_null=yes dynamic=ansi mode=oracle dbms=V8 parse=full\release_cursor=no sqlcheck=SEMANTICS ireclen=512 include=./.\sys_include=/usr/include sys_include=/usr/lib/gcc-lib/i386-redhat-linux/3.2.3/include \def_sqlcode=yesINCL=-I/usr/include -I./. -I${ORACLE_HOME}/precomp/publicBIN_LIBS= -lc -L${ORACLE_HOME}/lib -lclntsh##########编译规则################.SUFFIXES: .pc .o .c.c.o:${CC} -g -c $*.c ${INCL}.pc.o:proc $(PROCFLAGS) iname=$*.pc$(CC) -g -c $*.c ${INCL}rm -f $*.c $*.lisall:unloadunload:unload.orm -f $@${CC} -o $@ unload.o  \$(BIN_LIBS)##########################################################编译公用目标文件#################################################################清理目标文件###########################clean:rm -f *.o *.lis

编译以后生成unload程序,执行如下:

unload userid=XXX/XXX sqlstmt='select * from dual' arraysize=100>XXX.DAT

这样DAT文件里就是数据了。

经我测试,在数据库主机上,30分钟导出了7GB的文件,强啊!效率上可以调整的一个是arraysize参数,这个一般100-200都可以,更大的除非你IO超强,不然也没有意义;另一个是所谓并行,其实这个C程序也就是执行SELECT语句,因此将原表改为PARALLEL就可以充分利用系统资源了^_^。


转载自:http://www.readern.com/export-oracle-table-to-flat-text-in-proc.html