jEditor 安全的使用FCKEditor

来源：互联网发布：淘宝发布宝贝教程编辑：程序博客网时间：2024/06/06 12:24

在web 开发中，尤其是博客类应用中，要允许用户输入大段格式文本，通常现在有2中方案，一种是 UBB代码，优缺点就不做评价了，一种是HTML代码，而HTML编辑器使用最为广泛的莫过于Fckeditor了，在使用FCKEditor的应用中，安全风险是值得注意的，这里说的安全只是网友可能通过自己写的HTML代码来攻击其他网友，也就是HTML注入问题。

HTML代码中有很多不安全的代码，典型的如<script>,通过在页面上输入script之后，别的网友访问这个页面的时候就会执行这个script，从而达到广告或者恶意的目标。目前我们所总结的不安全的HTML代码包括：

1．脚本：<script *>*</script>

2. 事件 : <element on* >

3. 样式 :<style *></style> <link * />

4. 框架 : <iframe *></iframe>

5. 表单 : <form *></form> <input */> <select */> <texterea * />

6. 其他html标记：<html> <body> ，如果输入HTML标记造成页面的DOM结构不完整，会破坏原有页面的显示及script 的运行。

7. 样式:url,expression等，目前很多网站都未针对此类隐患进行过滤

例如：可执行script 的样式定义 <DIV STYLE="background-image: url(javascript:alert(1))"></div>

8. Flash 标签，比如加载一个flash 就立刻跳转到黄色网页。

其中 3，7, 8 是很容易忽略的，包括新浪博客。CSS注入可以修改整个博客版面，打开页面后执行代码。

针对使用Html 输入的Web应用，以下几个方面值得关注。

一． FCKeditor 的使用。

我写了一个基于jqueyr 的 jeditor.js 用简化加载fckeditor.

/**

* jeditor 1.0

* fckeidtor loader

* Author: neil.mao

* Email: maoxiang@gmail.com

(function($){

var fckeditor_root = "/shaike/component/fckeditor_2.5.1/";

var fckeditor_js = fckeditor_root+"fckeditor.js";

//加载fckeditor.js

document.write('<script type="text/javascript" src="'+fckeditor_js+'"></script>');

$.fn.loadEditor = function(type,barId){

return this.each(function(){

try{

//如果未定义id，则不加载

if(undefined == this.id || !this.id.length) return;

$(this).html("正在初始化编辑器，请稍后....");

var oFCKeditor = new FCKeditor(this.id) ;

oFCKeditor.BasePath = fckeditor_root ;

oFCKeditor.Width = $(this).width();

oFCKeditor.Height = $(this).height();

switch(type){

case 0: //标准配置

oFCKeditor.Config['CustomConfigurationsPath'] = fckeditor_root + 'standard.config.js' ;

oFCKeditor.ToolbarSet = 'standard' ;

break;

case 1: //最小配置

oFCKeditor.Config['CustomConfigurationsPath'] = fckeditor_root + 'simple.config.js' ;

oFCKeditor.ToolbarSet = 'simple' ;

break;

case 2: //扩展配置

oFCKeditor.Config['CustomConfigurationsPath'] = fckeditor_root + 'extend.config.js' ;

oFCKeditor.ToolbarSet = 'extend' ;

if ($("#xToolbar")){

oFCKeditor.Config[ 'ToolbarLocation' ] = 'Out:xToolbar' ;

}

break;

case 3: //Ubb配置

oFCKeditor.Config['CustomConfigurationsPath'] = fckeditor_root + 'ubb.config.js' ;

oFCKeditor.ToolbarSet = 'ubb' ;

break;

default:

oFCKeditor.Config['CustomConfigurationsPath'] = fckeditor_root + type +'.config.js' ;

oFCKeditor.ToolbarSet = type ;

if(undefined != barId && barId.length){

oFCKeditor.Config[ 'ToolbarLocation' ] = 'Out:'+barId ;

}

break;

}

$(this).html(oFCKeditor.CreateHtml());

}catch(e){

alert("加载编辑器出错:"+e.message());

}

});

};

$.fn.getEditorHtml = function (){

if (this.length>0){

var o=this[0];

if(undefined == o.id || !o.id.length) return;

var oEditor = FCKeditorAPI.GetInstance(o.id) ;

// Get the editor contents in XHTML.

// "true" means you want it formatted.

return (oEditor.GetXHTML(true)) ;

};

return "";

};

$.fn.setEditorHtml = function (html){

if (this.length>0){

var o=this[0];

if(undefined == o.id || !o.id.length) return;

var oEditor = FCKeditorAPI.GetInstance(o.id) ;

// Get the editor contents in XHTML.

// "true" means you want it formatted.

oEditor.InsertHtml(html) ;

};

})(jQuery);

通过这个js来调用fckeditor就会很简单。

$(document).ready(function (){

$("#htmleditor").loadEditor(0);

});

</script>

可以通过写一个安全插件可以实时的过滤html。

/**

* 安全过滤HTML代码

* 2008-01-04

* maoxiang@gmail.com

/**

* 过滤有隐患的html 代码

//过滤事件

function processEvents(html){

function _replace(tagMatch){

return tagMatch.replace( /s(onw+)[s ]*=[s ]*?('|")([sS]*?)/g, '');

}

return html.replace(/<[^>]+ onw+[s ]*=[s ]*?('|")[sS]+?>/g, _replace );

}

//过滤元素

function processTags(html){

function _replace(tagMatch){

return '';

}

var regx = [

//注释代码

//g ,

// script代码

/<script[sS]*?</script>/gi,

// <noscript> tags (get lost in IE and messed up in FF).

/<noscript[sS]*?</noscript>/gi,

// Protect <object> tags. See #359.

/<object[sS]+?</object>/gi,

//过滤样式

/<style[sS]*?</style>/gi,

/<link[sS]*?</link>/gi,

//框架

/<frameset[sS]*?</frameset>/gi,

/<iframe[sS]*?</iframe>/gi,

//表单

/<form[sS]*?</form>/gi,

/<select[sS]*?</select>/gi,

/<option[sS]*?</option>/gi,

/<textarea[sS]*?</textarea>/gi,

/<input[sS]*?>/gi

] ;

for(var i=0;i<regx.length;i++){

if (regx[i].test(html)){

html=html.replace(regx[i],_replace);

}

return html;

}

//过滤url

function processUrlA(html){

function _replace(tagMatch){

//alert("tag="+tagMatch);

var url= tagMatch.replace(/(.*)href=((?:(?:s*)("|').*?)|(?:[^"'][^ >]+))/gi, "$2");

if (!isValidUrl(url)){

//去掉链接

var i= tagMatch.indexOf(url);

return tagMatch.substring(0,i+1)+'#'+tagMatch.substring(i+url.length-1);

}

return tagMatch;

}

var regx = /<a(?=s).*?shref=((?:(?:s*)("|').*?)|(?:[^"'][^ >]+))/gi;

if (regx.test(html)){

html = html.replace(regx,_replace)

}

return html;

}

function processUrlImg(html){

function _replace(tagMatch){

//alert("tag="+tagMatch);

var url= tagMatch.replace(/(.*)src=((?:(?:s*)("|').*?2)|(?:[^"'][^ >]+))/gi, "$2");

if (!isValidUrl(url)){

var i= tagMatch.indexOf(url);

return tagMatch.substring(0,i+1)+'#'+tagMatch.substring(i+url.length-1);

}

return tagMatch;

}

var regx = /<img(?=s).*?ssrc=((?:(?:s*)("|').*?)|(?:[^"'][^ >]+))/gi;

if (regx.test(html)){

html = html.replace(regx,_replace)

}

return html;

}

function processUrlArea(html){

function _replace(tagMatch){

//alert("tag="+tagMatch);

var url= tagMatch.replace(/(.*)href=((?:(?:s*)("|').*?)|(?:[^"'][^ >]+))/gi, "$2");

if (!isValidUrl(url)){

var i= tagMatch.indexOf(url);

return tagMatch.substring(0,i+1)+'#'+tagMatch.substring(i+url.length-1);

}

return tagMatch;

}

var regx = /<area(?=s).*?shref=((?:(?:s*)("|').*?2)|(?:[^"'][^ >]+))/gi;

if (regx.test(html)){

html = html.replace(regx,_replace)

}

return html;

}

function isValidUrl(url){

if (url.indexOf("javascript:")>=0){

return false;

}

return true;

}

function processUrl(html){

html = processUrlA(html);

html = processUrlImg(html);

html = processUrlArea(html);

return html;

}

function processHtml(html){

html = processUrl(html);

html = processEvents(html);

html = processTags(html);

return html;

}

FCK.DataProcessor =

{

* Returns a string representing the HTML format of "data". The returned

* value will be loaded in the editor.

* The HTML must be from <html> to </html>, including <head>, <body> and

* eventually the DOCTYPE.

* Note: HTML comments may already be part of the data because of the

* pre-processing made with ProtectedSource.

* @param {String} data The data to be converted in the

* DataProcessor specific format.

ConvertToHtml : function( data )

{

data=processHtml(data);

//alert("ConvertToHtml"+data);

// The default data processor must handle two different cases depending

// on the FullPage setting. Custom Data Processors will not be

// compatible with FullPage, much probably.

if ( FCKConfig.FullPage )

{

// Save the DOCTYPE.

FCK.DocTypeDeclaration = data.match( FCKRegexLib.DocTypeTag ) ;

// Check if the <body> tag is available.

if ( !FCKRegexLib.HasBodyTag.test( data ) )

data = '<body>' + data + '</body>' ;

// Check if the <html> tag is available.

if ( !FCKRegexLib.HtmlOpener.test( data ) )

data = '<html dir="' + FCKConfig.ContentLangDirection + '">' + data + '</html>' ;

// Check if the <head> tag is available.

if ( !FCKRegexLib.HeadOpener.test( data ) )

data = data.replace( FCKRegexLib.HtmlOpener, '$&<head><title></title></head>' ) ;

return data ;

}

else

{

var html =

FCKConfig.DocType +

'<html dir="' + FCKConfig.ContentLangDirection + '"' ;

// On IE, if you are using a DOCTYPE different of HTML 4 (like

// XHTML), you must force the vertical scroll to show, otherwise

// the horizontal one may appear when the page needs vertical scrolling.

// TODO : Check it with IE7 and make it IE6- if it is the case.

if ( FCKBrowserInfo.IsIE && FCKConfig.DocType.length > 0 && !FCKRegexLib.Html4DocType.test( FCKConfig.DocType ) )

html += ' style="overflow-y: scroll"' ;

html += '><head><title></title></head>' +

'<body' + FCKConfig.GetBodyAttributes() + '>' +

data +

'</body></html>' ;

return html ;

}

* Converts a DOM (sub-)tree to a string in the data format.

* @param {Object} rootNode The node that contains the DOM tree to be

* converted to the data format.

* @param {Boolean} excludeRoot Indicates that the root node must not

* be included in the conversion, only its children.

* @param {Boolean} format Indicates that the data must be formatted

* for human reading. Not all Data Processors may provide it.

ConvertToDataFormat : function( rootNode, excludeRoot, ignoreIfEmptyParagraph, format )

{

var data = FCKXHtml.GetXHTML( rootNode, !excludeRoot, format ) ;

data=processHtml(data);

//alert("ConvertToDataFormat="+data);

if ( ignoreIfEmptyParagraph && FCKRegexLib.EmptyOutParagraph.test( data ) )

return '' ;

return data ;

* Makes any necessary changes to a piece of HTML for insertion in the

* editor selection position.

* @param {String} html The HTML to be fixed.

FixHtml : function( html )

{

html=processHtml(html);

return html ;

}

} ;

//解决flash bug

FCKXHtml.TagProcessors["embed"] = function(node,htmlNode){

if(htmlNode.getAttribute("type")=="application/x-shockwave-flash"){

FCKXHtml._AppendAttribute( node, 'allowscriptaccess', "never" ) ;

FCKXHtml._AppendAttribute( node, 'type', "application/x-shockwave-flash" ) ;

}

return node ;

};

以上就是前台部分了，不过还有更重要的问题需要解决。

二、后台的处理

1. html过滤

     当用户提交html 到后台，我们当然不能假定就已经是安全的html，还需要后台精确的过滤一次。通过技术研究，限于篇幅，不能一一赘述，终于的实现大致是这样的，
   1）把用户提交的HTML代码进行格式处理，使得用户输入的html能够完整的解析成一个DOM结构。
   2）针对这个DOM结构，对每个HTML Tag 和每个HTML Attribute 进行精准的过滤。比如 <a> 不允许外站链接，<embed> 如果是 flash ，加上安全属性 , 检查 CSS 里是否有 expression 属性，url 里是否包含 javascript 等。

2. html 截字
博客里经常有摘要要显示，如果输入的html 的话，这个就比较麻烦了。比如有段html代码
<a href="http://www.jteam.cn">专注web开发，为您创造价值</a> ,要显示前5 个字符，如果直接采用subString，结果就是"<a hre " ,肯定就不对了,这个时候同样采用html过滤的原理，先解析成DOM结构，然后截字，比较理想的结果应该是 "<a href="http://www.jteam.cn">专注web...</a>" 。

如果感觉这个比较难实现，也可以全部输出后，在前台用javascript 或者样式实现，这里就不复述了。

欢迎任何建议，如果要看实例，请看太平洋女性网晒客