获取GB2312编码汉字的首字母【转】

来源:互联网 发布:广西地税优化服务 编辑:程序博客网 时间:2024/05/17 20:24

 http://www.xemean.net/blog/user1/4/archives/2006/88.html

获取GB2312编码汉字的首字母

如斯 发表于 2006-9-17 11:08:35 

 

#i nclude <stdio.h>
#i nclude <stdlib.h>
#i nclude <string.h>
int isHZ = 0;  // 汉字是否完整,当为1时说明汉字是完整的
unsigned short hz; // 当前要处理的汉字
// 我们这个函数中仅仅判断ACSII中可见字符
int isASCII(char ch)
{
 if((ch >= 8 && ch <= 126))
  return 1;
 else
  return 0;
}
int isGB2312(unsigned char * strIn)
{
    unsigned char ch1;
    unsigned char ch2;
   
    if (strlen(strIn) >= 2)
    {
  ch1 = (unsigned char)strIn[0];
  ch2 = (unsigned char)strIn[1];
  if (ch1>=176 && ch1<=247 && ch2>=160 && ch2<=254)
  {   
   //printf("strIn:%x/t%x/n", ch1, ch2);
   return 1;
  }
  else
  {
   //printf("strIn:%x/t%x/n", ch1, ch2);
   return 0;
  }
    }
    else
  return 0;
}

char getCharPY(unsigned char ch)
{
 unsigned short tmp = 0;//用来保存临时变量,该变量的值与hz正好是以字节为单位倒序的
 
 if(!isASCII(ch))
 {
  if(isHZ == 0)
  {
   isHZ++;
   hz = ch;
   return 0;
  }
  else
  {
   isHZ--;
   tmp = hz;
   tmp <<= 8;
   hz = ch;
   tmp += hz;

   //获取汉字编码,即将tmp按字节倒序
   hz <<= 8;
   hz += tmp >> 8;
   if(!isGB2312((unsigned char *)&hz))
    return '?'; // 如果不是汉字(比如是一个符号)就返回'?'
  }
 }

 if(tmp>=0xB0A1 && tmp<=0xB0C4)
  return 'a';
 else if(tmp>=0xB0C5 && tmp<=0xB2C0)
  return 'b';
 else if(tmp>=0xB2C1 && tmp<=0xB4ED)
  return 'c';
 else if(tmp>=0xB4EE && tmp<=0xB6E9)
  return 'd';
 else if(tmp>=0xB6EA && tmp<=0xB7A1)
  return 'e';
 else if(tmp>=0xB7A2 && tmp<=0xB8C0)
  return 'f';
 else if(tmp>=0xB8C1 && tmp<=0xB9FD)
  return 'g';
 else if(tmp>=0xB9FE && tmp<=0xBBF6)
  return 'h';
 else if(tmp>=0xBBF7 && tmp<=0xBFA5)
  return 'j';
 else if(tmp>=0xBFA6 && tmp<=0xC0AB)
  return 'k';
 else if(tmp>=0xC0AC && tmp<=0xC2E7)
  return 'l';
 else if(tmp>=0xC2E8 && tmp<=0xC4C2)
  return 'm';
 else if(tmp>=0xC4C3 && tmp<=0xC5B5)
  return 'n';
 else if(tmp>=0xC5B6 && tmp<=0xC5BD)
  return 'o';
 else if(tmp>=0xC5BE && tmp<=0xC6D9)
  return 'p';
 else if(tmp>=0xC6DA && tmp<=0xC8BA)
  return 'q';
 else if(tmp>=0xC8BB && tmp<=0xC8F5)
  return 'r';
 else if(tmp>=0xC8F6 && tmp<=0xCBF9)
  return 's';
 else if(tmp>=0xCBFA && tmp<=0xCDD9)
  return 't';
 else if(tmp>=0xCDDA && tmp<=0xCEF3)
  return 'w';
 else if(tmp>=0xCEF4 && tmp<=0xD188)
  return 'x';
 else if(tmp>=0xD1B9 && tmp<=0xD4D0)
  return 'y';
 else if(tmp>=0xD4D1 && tmp<=0xF351)
  return 'z';
 else
 {
   return ch;
 }
}
 
char * getStrPY(unsigned char *strIn, char *strOut)
{
 unsigned i = 0, j = 0;
 char c = 0;
 unsigned char * pWork = strIn;
 for(i = 0; i < strlen(strIn); i++)
 {
  pWork = strIn + i;
  // 我们将可见的ASCII字符直接输出
  if(!isASCII(*pWork))
  {
   c = getCharPY(*pWork);
  }
  else
  {
   c = *pWork;
  }
  if(isASCII(c))
  {
   strOut[j] = c;
   j++;
  }
 }
 return strOut;
}

void main(void)
{
 char strTest[] = "IBM Sametime是定位于企业市场的即时通讯协作平台。据介绍,IBM Lotus Sametime 7.5/n/t增加了诸如音频/视频集成技术、多用户IP语音呼叫功能等,并可实现基于位置的感知。";
 char strOut[512] = {0};//注意这个数组的大小

 printf("in:/t%s/n", strTest);
 getStrPY(strTest, strOut);
 printf("out:/t%s/n", strOut);
}

原创粉丝点击