UTF-8 编码规则

来源:互联网 发布:韩版女装淘宝网 编辑:程序博客网 时间:2024/05/18 07:26
    byte[] bytes = System.Text.Encoding.Unicode.GetBytes("胡");
         string lowCode = "", temp = "";
         for (int i = 0; i < bytes.Length; i++)
         {
             if (i % 2 == 0)
             {
                 temp = System.Convert.ToString(bytes[i], 16);//取出元素4编码内容(两位16进制)
                 if (temp.Length < 2) temp = "0" + temp;
             }
             else
             {
                 string mytemp = Convert.ToString(bytes[i], 16);
                 if (mytemp.Length < 2) mytemp = "0" + mytemp; lowCode =  mytemp + temp;//取出元素4编码内容(两位16进制)
             }
         }
           int s = (int)'A';


字符集之间转换(UTF-8,UNICODE,Gb2312)
特搜集了UTF-8,UNICODE,Gb2312他们3个之间的相互转换.
UTF-8:   1~3字节可变
UNICODE: 2字节一个字符
GB2312:  2字节一个字符
例子: “你”字的UTF-8编码: E4 BD A0        11100100 10111101 10100000
“你”的Unicode编码: 4F 60            01001111 01100000
按照UTF-8的编码规则,分解如下:xxxx0100 xx111101 xx100000
把除了x之外的数字拼接在一起,就变成“你”的Unicode编码了。
注意UTF-8的最前面3个1,表示整个UTF-8串是由3个字节构成的。
经过UTF-8编码之后,再也不会出现敏感字符了,因为最高位始终为1。

C#:

 private int CharCode(char  ch)
        {
            byte[] by = System.Text.Encoding.GetEncoding("utf-8").GetBytes(ch.ToString());
if(by.Length==3)
            return Convert.ToInt32(Convert.ToString(by[0], 2).Substring(4) + Convert.ToString(by[1], 2).Substring(2)  +Convert.ToString(by[2], 2).Substring(2), 2);
else return by[0];
        }
        private void test()
        {
            int a = (int)'中';
            int a1 = CharCode('中');
            int a2 = CharCode('a');

}





 public static string GB2312(string str)
        {
            str = ReplaceSpecialChar(str);
            StringBuilder sb = new StringBuilder();
            Encoding en = Encoding.GetEncoding("GB2312");
            for (int i = 0; i < str.Length; i++)
            {
                byte[] byteCode = en.GetBytes(str[i].ToString());
                if (byteCode.Length == 2)
                {
                    sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16));
                }
                else
                {
                    sb.Append(str[i]);
                }
            }

            return sb.ToString();
        }


        /// <summary>
        /// 把汉字按utf-8 编码
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string UTF8(string str)
        {
            str = ReplaceSpecialChar(str) ;
            StringBuilder sb = new StringBuilder();
            Encoding en = Encoding.GetEncoding("UTF-8");
            for (int i = 0; i < str.Length; i++)
            {
                byte[] byteCode = en.GetBytes(str[i].ToString());
                if (byteCode.Length == 3)
                {
                    sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16) + "%" + Convert.ToString(byteCode[2], 16));
                }
                else
                {
                    sb.Append(str[i]);
                }
            }
            return sb.ToString();
        }
        /// <summary>
        /// 替换特殊字符
        /// </summary>
        /// <returns></returns>
        public static string ReplaceChar(string str)
        {
            return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace("=", "%3D");
        }
       /// <summary>
       /// 替换特殊字符
       /// </summary>
       /// <returns></returns>
       public static string ReplaceSpecialChar(string str)
       {
           return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace(":", "%3A").Replace("=", "%3D").Replace("/", "%2F").Replace("+", "%2B").Replace("@", "%40");
       }


原创粉丝点击