javaseday24补充(基本数据类型流编码表)

来源：互联网发布：网络情歌《恋恋红尘》编辑：程序博客网时间：2024/06/04 18:16

//操作基本数据类型就想到DataStream
private static void readData() throws IOException {
DataInputStream dis = new DataInputStream(new FileInputStream("data.txt"));
String str = dis.readUTF();
System.out.println(str);
dis.close();
}

private static void writeData() throws IOException {
DataOutputStream dos = new DataOutputStream(new FileOutputStream("data.txt"));
dos.writeUTF("你好");//写英文字母没什么区别
dos.close();

}

//如果源是内存用 ByteArrayInputStream
//目的地是内存用ByteArrayOutputStream
//用流的思想操作数组操作的数据一般都不大
ByteArrayInputStream bis = new ByteArrayInputStream("abcdsd".getBytes());
ByteArrayOutputStream bos = new ByteArrayOutputStream();
int ch = 0;
while((ch=bis.read())!=-1){
bos.write(ch);
}
System.out.println(bos.toString());

还有操作字符数组的 chararrayreader chararraywrite

操作字符串 stringreader stringwriter

/*
* 编码表的由来
* 计算机只能识别二进制数据早期由来是电信号
* 为了方便应用计算机让他可以识别各个国家的文字
* 就将各个国家的文字用数字来表示并意义对应形成一张表
* 这就是编码表
* ASCII 美国标准信息交换码键盘上差不多
* 用一个字节的7位可以表示打头为0一般都是ASCII
* ISO8859-1 拉丁码表欧洲码表
* 用一个字节的8位表示
* GB2312 中国的中文编码表
* GBK 中国的中文编码表升级融合了更多的中文文字符号 2万多的中文符号
* Unicode 国际标准码融合了多种文字
* UTF-8 最多用三个字节来表示一个字符
* 一般都用GBK 或者Unicode 别的巨多很麻烦
* ANSI 是本地码表
*/
public class Demo_01 {
public static void main(String[] args) throws IOException {
/*
* 字符串——> 字节数组编码
* 字节数组——>字符串解码
* 你好:GBK -60 -29 -70 -61
* 你好:utf-8 -28 -67 -96 -27 -91 -67
* 如果编码编错解不出来
* 如果编对了解错了有可能还有救 utf-8都识别所以都用同一个未知返回的可能复原
* 因为 iso8859-1 是不识别中文而且是单字节编码所以返回原码
* 但是UTF-8 1.2。3 字节都识别导致了解码失败
*/
String str = "你好";
byte[] buf = str.getBytes("GBK");
String s1 = new String(buf, "utf-8");
System.out.println("s1="+s1);
byte[] buf2 = s1.getBytes("utf-8");//获取源字节
printBytes(buf2);
String s2 = new String(buf2, "GBK");
System.out.println("s2="+s2);

// Demo_01();

}

public static void Demo_01() throws UnsupportedEncodingException {
String str = "你好";
//编码
byte[] buf =str.getBytes("utf-8");
printBytes(buf);
//解码
String s1 = new String(buf,"utf-8");
System.out.println("s1="+s1);
}

private static void printBytes(byte[] buf) {
for(byte b:buf){//因为中文对应2个字节每个字节开头都是1 所以负数
System.out.print(b+" ");
}
}
}

String str = "联通";
byte[] buf = str.getBytes("GBK");
/*
* 11000001
* 10101010
* 11001101
* 10101000
* 联通的二进制正好和utf-8的编码方式一样冲突的使用utf-8 所以返回的是通过UTF-8解出来的
*/

for(byte b:buf){
System.out.println(Integer.toBinaryString(b&255));//取最后8个字节的二进制
}

public class Test {
public static void main(String[] args) throws IOException {
/*
* 在java中字符串 abcd 与字符串 ab你好的长度是一样的都是四个字符
* 但对于那个的字节数不同一个汉字占两个字节
* 定义一个方法按照最大的字节数来取子串
* 如对于 ab你好如果取三个字节那么字串就是 ab 与你字的半个
* 那么半个就要舍去如果取四个字节就是 ab你取五个字节还是ab你
*/
//gbk 为了添加更多的汉字汉字中有正数
// String str = "abdsa你好css呵呵";
String str = "琲asa你好";//-84 105
int len = str.getBytes("gbk").length;
for (int i = 0; i < len; i++) {
System.out.println("截取"+(i+1)+"字节的结果是"+cutStringByByte(str,i+1));//汉字中带有正数也有用
}
// byte[] buf = str.getBytes("gbk");
// for(byte b : buf){
// System.out.print(b+" ");
// }

}
public static String cutStringByByte(String str,int len) throws IOException{
byte[] buf = str.getBytes("gbk");
int count =0;
for(int x=len-1;x>=0;x--){
if(buf[x]<0)
count++;
else break;
}
if(count%2==0)
return new String(buf,0,len,"gbk");
else
return new String (buf,0,len-1,"gbk");
}
public static String cutStringByU8tf(String str,int len) throws IOException{
byte[] buf = str.getBytes("utf-8");
int count =0;
for(int x=len-1;x>=0;x--){
if(buf[x]<0)
count++;
else break;
}
if(count%3==0)
return new String(buf,0,len,"utf-8");
else if(count%3==1)
return new String(buf,0,len-1,"utf-8");
else
return new String(buf,0,len-2,"utf-8");
}
}

阅读全文

0 0

javaseday24补充(基本数据类型流 编码表)

javaseday24补充(基本数据类型流编码表)