纯文本中识别URI地址并转换成HTML

来源:互联网 发布:淘宝客进入等级 编辑:程序博客网 时间:2024/04/30 07:53

问题

有一段纯文本text, 欲将其插入DOM节点div中. text中可能有超链接, 邮件地址等. 如果有, 识别之.

分析

  1. 如果只是纯文本, 插入div中, 只要将div.innerText设置为text即可.
  2. text中的URI地址可以用正则识别, 并将其替换为<a/>标签组成的字符串. 此时 text变成了HTML字符串html.
  3. HTML字符串html可以赋值给div.innerHTML. 但如果原text中存在HTML语义的 字符串呢? 因此, 在识别URI之前, 需要将原text作转义.

解决

uri-recognition.js

(function () {    var trim = function (s) {        /*jslint eqeq:true*/        if (s == null || s === '') {            return '';        }        // s 空格        //  制表符        // xA0 non-breaking spaces        // 3000中文空格        return String(s).replace(/^[sxA03000]+/, '').            replace(/[sxA03000]+$/, '');    },    startsWith = function (s, sub) {        s = String(s);        return s.indexOf(sub) === 0;    },    test = function (str) {        /*jslint maxlen: 100*/        var URI_REG = /(https?://|www.|ssh://|ftp://)[a-z0-9&_+-?/.=#]+/i,            MAIL_REG = /[a-z0-9_+-.]+@[a-z0-9_+-.]+/i;        str = trim(String(str));        return URI_REG.test(str) || MAIL_REG.test(str) || false;    },    /**     * @param {String} str     * @param {Function} replacer     */    replace = function (str, replacer) {        /*jslint maxlen: 100*/        var URI_REG = /(https?://|www.|ssh://|ftp://)[a-z0-9&_+-?/.=#]+/gi,            MAIL_REG = /[a-z0-9_+-.]+@[a-z0-9_+-.]+/gi;        str = trim(String(str));        str = str.replace(URI_REG, function (match) {            var newStr =  replacer({                mail: false,                fullURI: startsWith(match.toLowerCase(), 'www.') ?                        ('http://' + match) : match,                match: match            });            /*jslint eqeq: true*/            return newStr == null ? match : newStr;        });        str = str.replace(MAIL_REG, function (match) {            var newStr =  replacer({                mail: true,                fullURI: 'mailto:' + match,                match: match            });            /*jslint eqeq: true*/            return newStr == null ? match : newStr;        });        return str;    },    uriRecognition = function (text) {        var doc = document,            html;        text = trim(String(text));        if (test(text)) {            //use {} to escape            text = text.replace(/{<}/g, '{{<}}').                replace(/{>}/g, '{{>}}').                replace(/</g, '{<}').                replace(/>/g, '{>}');            html = replace(text, function (info) {                if (!info || !info.match || !info.fullURI) {                    return null;                }                var link = doc.createElement('a');                link.setAttribute('href', info.fullURI);                /*jslint eqeq: true*/                if (link.innerText != null) {                    link.innerText = info.match;                } else if (link.textContent != null) {                    link.textContent = info.match;                }                return link.outerHTML;            });            html = html.replace(/{<}/g, '<').                replace(/{>}/g, '>');            return {                content: html,                isPlainText: false            };        }        return {            content: text,            isPlainText: true        };    },    setContentWithURIRecognition = function (el, text) {        var result = uriRecognition(text);        if (!result) {            return;        }        if (result.isPlainText) {            if (el.innerText != null) {                el.innerText = result.content;            } else if (el.textContent != null) {                el.textContent = result.content;            }        } else {            el.innerHTML = result.content;        }    };    window.uriRecognition = uriRecognition;    window.setContentWithURIRecognition = setContentWithURIRecognition;})();


test.html

<!DOCTYPE HTML><html>    <head>        <meta http-equiv="content-type" content="text/html; charset=utf-8">        <title>uri regcognition</title>    </head>    <body>        <script src="./uri-recognition.js" type="text/javascript"></script>        <script type="text/javascript">            var text = '<a href="http://china.haiwainet.cn/n/2014/0509/c232587-20619235.html" ' +                    'mon="ct=1&a=2&c=top&pn=8" target="_blank">' +                    '纽约时报:阿里巴巴IPO将风险推向全新水平</a>' +                    ' send to example@example.com xxxx',                div = document.createElement('div');            window.setContentWithURIRecognition(div, text);            document.body.appendChild(div);        </script>    </body></html>


Chrome下测试OK.

0 0