截取字符,中英混合无乱码。 一个汉字当作一个字符

来源:互联网 发布:wind开发数据接口 编辑:程序博客网 时间:2024/06/01 10:38
<?php //PHP中英文截取无乱码header('Content_type=text/php;charset=UTF-8');$str = '浙江zju大学';//关键在于判断往后截多少个字节上,这是这个函数的核心//此函数是用客串来判断的,效率不高//位运算效果会更好//110x xxxx && 1110 0000 -> 1100 0000//1110 xxxx && 1111 0000 -> 1110 0000//位运算时不受英文字符最高位为0的影响,只是在转成字符串才受到影响function utf8sub($str,$len){if($len<0){return '';}$res = '';$offset = 0;$chars = 0;$count = 0;$length = strlen($str);//待截取字符串的字节数while($chars<$len && $offset<$length){$high = decbin(ord(substr($str,$offset,1)));//先截取客串的一个字节,substr按字节进行截取//重要突破,已经能够判断高位字节if(strlen($high)<8){//英文字符ascii编码长度为7,通过长度小于8来判断$count = 1;// echo 'hello,I am in','<br>';}elseif (substr($high,0,3) == '110') {$count = 2;//取两个字节的长度}elseif (substr($high,0,4) == '1110') {$count = 3;//取三个字节的长度}elseif (substr($high,0,5) == '11110') {$count = 4;}elseif (substr($high,0,6) == '111110') {$count = 5;}elseif(substr($high,0,7)=='1111110'){$count = 6;}$res .= substr($str,$offset,$count);$chars +=1;$offset += $count;}return $res;}echo utf8sub($str,1),'<br>';echo utf8sub($str,10),'<br>';?>

0 0