UTF-8 编码规则
来源:互联网 发布:韩版女装淘宝网 编辑:程序博客网 时间:2024/05/18 07:26
string lowCode = "", temp = "";
for (int i = 0; i < bytes.Length; i++)
{
if (i % 2 == 0)
{
temp = System.Convert.ToString(bytes[i], 16);//取出元素4编码内容(两位16进制)
if (temp.Length < 2) temp = "0" + temp;
}
else
{
string mytemp = Convert.ToString(bytes[i], 16);
if (mytemp.Length < 2) mytemp = "0" + mytemp; lowCode = mytemp + temp;//取出元素4编码内容(两位16进制)
}
}
int s = (int)'A';
字符集之间转换(UTF-8,UNICODE,Gb2312)
特搜集了UTF-8,UNICODE,Gb2312他们3个之间的相互转换.
UTF-8: 1~3字节可变
UNICODE: 2字节一个字符
GB2312: 2字节一个字符
例子: “你”字的UTF-8编码: E4 BD A0 11100100 10111101 10100000
“你”的Unicode编码: 4F 60 01001111 01100000
按照UTF-8的编码规则,分解如下:xxxx0100 xx111101 xx100000
把除了x之外的数字拼接在一起,就变成“你”的Unicode编码了。
注意UTF-8的最前面3个1,表示整个UTF-8串是由3个字节构成的。
经过UTF-8编码之后,再也不会出现敏感字符了,因为最高位始终为1。
C#:
private int CharCode(char ch)
{
byte[] by = System.Text.Encoding.GetEncoding("utf-8").GetBytes(ch.ToString());
if(by.Length==3)
return Convert.ToInt32(Convert.ToString(by[0], 2).Substring(4) + Convert.ToString(by[1], 2).Substring(2) +Convert.ToString(by[2], 2).Substring(2), 2);
else return by[0];
}
private void test()
{
int a = (int)'中';
int a1 = CharCode('中');
int a2 = CharCode('a');
}
public static string GB2312(string str)
{
str = ReplaceSpecialChar(str);
StringBuilder sb = new StringBuilder();
Encoding en = Encoding.GetEncoding("GB2312");
for (int i = 0; i < str.Length; i++)
{
byte[] byteCode = en.GetBytes(str[i].ToString());
if (byteCode.Length == 2)
{
sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16));
}
else
{
sb.Append(str[i]);
}
}
return sb.ToString();
}
/// <summary>
/// 把汉字按utf-8 编码
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string UTF8(string str)
{
str = ReplaceSpecialChar(str) ;
StringBuilder sb = new StringBuilder();
Encoding en = Encoding.GetEncoding("UTF-8");
for (int i = 0; i < str.Length; i++)
{
byte[] byteCode = en.GetBytes(str[i].ToString());
if (byteCode.Length == 3)
{
sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16) + "%" + Convert.ToString(byteCode[2], 16));
}
else
{
sb.Append(str[i]);
}
}
return sb.ToString();
}
/// <summary>
/// 替换特殊字符
/// </summary>
/// <returns></returns>
public static string ReplaceChar(string str)
{
return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace("=", "%3D");
}
/// <summary>
/// 替换特殊字符
/// </summary>
/// <returns></returns>
public static string ReplaceSpecialChar(string str)
{
return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace(":", "%3A").Replace("=", "%3D").Replace("/", "%2F").Replace("+", "%2B").Replace("@", "%40");
}
- UTF-8编码规则
- UTF-8编码规则
- utf-8编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8 编码规则
- UTF-8编码规则
- UTF 8 编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8编码规则 .
- UTF - 8编码规则
- UTF-8编码规则
- UTF-8编码规则
- UTF-8 编码规则
- sizeof与strlen的区别
- iOS中正则表达式的使用--NSPredicate
- java关键字final
- 64 位SQL 2008 上打开 Excel
- pythonPkg_re
- UTF-8 编码规则
- Ipsec Nat-Traversal
- 【Q&A】getline读取行的行尾处理
- 求递归算法时间复杂度:递归树
- 谁之错
- 调试arm下的 Alignment trap
- easyui 验证表单
- C#中的DateTimePicker控件焦点默认在日期中的日位置
- Android 4.1 Surface系统变化说明