CodeIgniter框架源码学习之utf-8编码的环境支持类--Utf8.php

来源:互联网 发布:页游广告知乎 编辑:程序博客网 时间:2024/05/18 16:16
文件位置:./system/core/Utf8.php
<?php
/**
* CodeIgniter
*
* An open source application development framework for PHP
*
* This content is released under the MIT License (MIT)
*
* Copyright (c) 2014 - 2017, British Columbia Institute of Technology
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @package CodeIgniter
* @author EllisLab Dev Team
* @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
* @copyright Copyright (c) 2014 - 2017, British Columbia Institute of Technology (http://bcit.ca/)
* @license http://opensource.org/licenses/MIT MIT License
* @link https://codeigniter.com
* @since Version 2.0.0
* @filesource
*/
defined('BASEPATH') OR exit('No direct script access allowed');

/**
* Utf8 Class
*
* Provides support for UTF-8 environments
*提供UTF-8环境支持
* @package CodeIgniter
* @subpackage Libraries
* @category UTF-8
* @author EllisLab Dev Team
* @link https://codeigniter.com/user_guide/libraries/utf8.html
*/
class CI_Utf8 {

/**
* Class constructor
*
* Determines if UTF-8 support is to be enabled.
*确定是否支持UTF-8
* @return void
*/
public function __construct()
{
if (
判断如果正则表达式支持utf8,iconv库已经安装,多字节字符串函数重载没有启用,应用程序字  符集是utf8
defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8
&& (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed
&& strtoupper(config_item('charset')) === 'UTF-8' // Application charset must be UTF-8
)
{
define('UTF8_ENABLED', TRUE);
log_message('debug', 'UTF-8 Support Enabled');
}
else
{
define('UTF8_ENABLED', FALSE);
log_message('debug', 'UTF-8 Support Disabled');
}

log_message('info', 'Utf8 Class Initialized');
}

// --------------------------------------------------------------------

/**
* Clean UTF-8 strings
* 清理utf8编码的字符串
* Ensures strings contain only valid UTF-8 characters.
*
* @param string $str String to clean
* @return string
*/
public function clean_string($str)
{
(1)判断如果字符串不是ASCII码
(2)使用iconv函数将字符串转码(关于iconv函数详情见http://www.php.net/manual/zh/function.iconv.php)
(3)返回字符串
if ($this->is_ascii($str) === FALSE)
{
if (MB_ENABLED)
{
$str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
}
elseif (ICONV_ENABLED)
{
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
}
}

return $str;
}

// --------------------------------------------------------------------

/**
* Remove ASCII control characters
* 删除所有在xml中可能导致问题的ASCII码字符,除了水平制表符,换行,回车。
(1) 直接调用remove_invisible_characters()来删除无效的字符并返回。
注:remove_invisible_characters 函数在common.php中定义
* Removes all ASCII control characters except horizontal tabs,
* line feeds, and carriage returns, as all others can cause
* problems in XML.
*
* @param string $str String to clean
* @return string
*/
public function safe_ascii_for_xml($str)
{
return remove_invisible_characters($str, FALSE);
}

// --------------------------------------------------------------------

/**
* Convert to UTF-8
*
* Attempts to convert a string to UTF-8.
*将字符串转换为utf8编码
(1)如果iconv函数存在,使用iconv转换
(2)如果mb_convert_encoding函数存在,使用mb_convert_encoding函数转换
(3)如果上面两个函数都不存在则不能转换返回false
(4)如果转换完成返回转换后的字符串
* @param string $str Input string
* @param string $encoding Input encoding
* @return string $str encoded in UTF-8 or FALSE on failure
*/
public function convert_to_utf8($str, $encoding)
{
if (MB_ENABLED)
{
return mb_convert_encoding($str, 'UTF-8', $encoding);
}
elseif (ICONV_ENABLED)
{
return @iconv($encoding, 'UTF-8', $str);
}

return FALSE;
}

// --------------------------------------------------------------------

/**
* Is ASCII?
*
* Tests if a string is standard 7-bit ASCII or not.
*  测试一个字符串是不是ASCII码
(1) 使用正则拼配返回测试结果
* @param string $str String to check
* @return bool
*/
public function is_ascii($str)
{
return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
}

}

阅读全文
0 0
原创粉丝点击