//判断文件类型
DWORD CHXScriptReal::CheckFileType( HANDLE hFile ){ HXScriptFileCheck sfc; DWORD dwcbSize; if( ReadFile( hFile, &sfc, sizeof( sfc ), &dwcbSize, NULL ) && ( dwcbSize > sizeof( sfc ))) { if( IsBinFile( &sfc )) return HXSF_CODETYPE_BIN; } if( dwcbSize >= 3 && sfc.byBom[ 0 ] == 0xEF && sfc.byBom[ 1 ] == 0xBB && sfc.byBom[ 2 ] == 0xBF ) { SetFilePointer( hFile, 3, NULL, FILE_BEGIN ); return HXSF_CODETYPE_UTF8; } else if( dwcbSize >= 2 && sfc.byBom[ 0 ] == 0xFF && sfc.byBom[ 1 ] == 0xFE ) { SetFilePointer( hFile, 2, NULL, FILE_BEGIN ); return HXSF_CODETYPE_UNICODE; } else if( dwcbSize >= 2 && sfc.byBom[ 0 ] == 0xFE && sfc.byBom[ 1 ] == 0xFF ) { SetFilePointer( hFile, 2, NULL, FILE_BEGIN ); return HXSF_CODETYPE_UNICODE_BIGENDIAN; } else { SetFilePointer( hFile, 0, NULL, FILE_BEGIN ); return HXSF_CODETYPE_ANSI; }}
//读取一个字符
WCHAR CHXLexer::ReadNextCharFromFile(){ char btChar; WCHAR ch; DWORD dwReaded; assert( m_hFile != NULL && m_hFile != INVALID_HANDLE_VALUE ); switch( m_dwCodeType ) { case HXSF_CODETYPE_UNICODE: if(( ! ReadFile( m_hFile, &ch, sizeof( WCHAR ), &dwReaded, NULL )) || ( dwReaded != sizeof( WCHAR ))) ch = 0; break; case HXSF_CODETYPE_UNICODE_BIGENDIAN: if( ReadFile( m_hFile, &ch, sizeof( WCHAR ), &dwReaded, NULL ) && ( dwReaded == sizeof( WCHAR ))) { WCHAR chTmp = ch; ch = chTmp << 8; ch |= ( chTmp >> 8 ); } else ch = 0; break; case HXSF_CODETYPE_UTF8: if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 )) { char szch[ 8 ]; WCHAR szwch[ 2 ]; int n, i; if(( btChar & 0x80 ) == 0x00 ) n = 1; else if(( btChar & 0xE0 ) == 0xC0 ) n = 2; else if(( btChar & 0xF0 ) == 0xE0 ) n = 3; else if(( btChar & 0xF8 ) == 0xF0 ) n = 4; else if(( btChar & 0xFC ) == 0xF8 ) n = 5; else if(( btChar & 0xFE ) == 0xFC ) n = 6; else n = 0; szch[ 0 ] = btChar; for( i = 1; i < n; ++ i ) { if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 )) szch[ i ] = btChar; else break; } szch[ i ] = 0; if( ::MultiByteToWideChar( CP_UTF8, 0, szch, i, szwch, 2 ) != 0 ) ch = *szwch; else ch = 0; } else ch = 0; break; case HXSF_CODETYPE_ANSI: if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 )) { char szch[ 4 ]; WCHAR szwch[ 2 ]; int n = 1; szch[ 0 ] = btChar; if(( btChar & 0x80 ) == 0x80 ) { if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 )) { szch[ 1 ] = btChar; szch[ 2 ] = 0; ++n; } else szch[ 1 ] = 0; } else szch[ 1 ] = 0; if( ::MultiByteToWideChar( CP_ACP, 0, szch, n, szwch, 2 ) == 1 ) ch = *szwch; else ch = 0; } else ch = 0; break; } return ch;}
//判断一个缓冲区是否为UTF8编码
BOOL CHXScriptReal::IsTextUTF8( BYTE * pszBuffer, int ncb ){ int i = 0; while( i < ncb ) { int step = 0; if(( pszBuffer[ i ] & 0x80) == 0x00 ) { step = 1; } else if(( pszBuffer[ i ] & 0xe0 ) == 0xc0 ) { if( i + 1 >= ncb ) return FALSE; if(( pszBuffer[ i + 1 ] & 0xc0 ) != 0x80 ) return FALSE; step = 2; } else if(( pszBuffer[ i ] & 0xf0 ) == 0xe0 ) { if( i + 2 >= ncb ) return FALSE; if(( pszBuffer[ i + 1 ] & 0xc0 ) != 0x80 ) return FALSE; if(( pszBuffer[ i + 2 ] & 0xc0 ) != 0x80 ) return FALSE; step = 3; } else return FALSE; i += step; } if( i == ncb ) return TRUE; return FALSE;}
- UTF8/ANSI/UNICODE文件读取
- [C/C++]_[Unicode转Utf8,Ansi转Unicode,Ansi文件转Utf8文件]
- [C/C++]_[Unicode转Utf8,Ansi转Unicode,Ansi文件转Utf8文件]
- Unicode, ANSI, UTF8
- Unicode,Ansi和UTF8
- ANSI Unicode,UTF8 转换
- Ansi Unicode UTF8
- ANSI - Unicode - UTF8 转换
- utf8,unicode ,ansi转换
- ansi-unicode-utf8
- Ansi,UTF8,Unicode编码
- ANSI UTF8 UNICODE
- Ansi,UTF8,Unicode编码
- Ansi,UTF8,Unicode编码
- unicode,utf8,ansi转换
- UTF8/ANSI to Unicode
- Ansi,UTF8,Unicode编码
- ANSI UNICODE UTF8 专辑
- 关于zend studio的汉化
- hdu 4099 Revenge of Fibonacci
- 青年文摘-往世书
- AIX运行级别介绍
- matlab错误:Warning: Variable 'a' cannot be saved to a MAT-file whose version is older than 7.3.
- UTF8/ANSI/UNICODE文件读取
- 数字图像处理图库
- CODE 106: Remove Duplicates from Sorted Array
- C#中类的成员
- google
- hdu 2845 Beans (最大不连续子序列的和)
- 黑马程序员:线程间通信介绍:wait()、notify()、Lock、Condition等介绍
- 吃小吃列
- Google map 地图分块算法的解析 EPSG:900913