mysql本身没有去除html代码的内置函数,但是在一些情况下,不得不在数据库层次提取一些去除了html代码的纯文本。
经过谷歌后,找到了以下两个函数,经测试,均可用。
函数1:
代码如下复制代码SET GLOBAL log_bin_trust_function_creators=1;
DROP FUNCTION IF EXISTS fnStripTags;
DELIMITER |
CREATE FUNCTION fnStripTags( Dirty varchar(4000) )
RETURNS varchar(4000)
DETERMINISTIC
BEGIN
DECLARE iStart, iEnd, iLength int;
WHILE Locate( '<', Dirty ) > 0 And Locate( '>', Dirty, Locate( '<', Dirty )) > 0 DO
BEGIN
SET iStart = Locate( '<', Dirty ), iEnd = Locate( '>', Dirty, Locate('<', Dirty ));
SET iLength = ( iEnd - iStart) + 1;
IF iLength > 0 THEN
BEGIN
SET Dirty = Insert( Dirty, iStart, iLength, '');
END;
END IF;
END;
END WHILE;
RETURN Dirty;
END;
|
DELIMITER ;
SELECT fnStripTags('this <html>is <b>a test</b>, nothing more</html>');
函数2:
代码如下复制代码CREATE FUNCTION `strip_tags`($str text) RETURNS text
BEGIN
DECLARE $start, $end INT DEFAULT 1;
LOOP
SET $start = LOCATE("<", $str, $start);
IF (!$start) THEN RETURN $str; END IF;
SET $end = LOCATE(">", $str, $start);
IF (!$end) THEN SET $end = $start; END IF;
SET $str = INSERT($str, $start, $end - $start + 1, "");
END LOOP;
END;
select strip_tags('<span>hel<b>lo <a href="world">wo<>rld</a> <<x>again<.');
严格来说,以上两个函数只是过滤掉了"<"开头,">"结尾的字符串,并没有对HTML标签,其他标签进行区分,也没有过滤" "之类的空白字符,是不完美的方案,以下对第一个函数进行改进,添加对" "的过滤,还是不完美方案
SET GLOBAL log_bin_trust_function_creators=1; DROP FUNCTION IF EXISTS fnStripTags; DELIMITER | CREATE FUNCTION fnStripTags( Dirty TEXT ) RETURNS TEXT DETERMINISTIC BEGIN DECLARE iStart, iEnd, iLength INT; WHILE LOCATE( '<', Dirty ) > 0 AND LOCATE( '>', Dirty, LOCATE( '<', Dirty )) > 0 DO BEGIN SET iStart = LOCATE( '<', Dirty ), iEnd = LOCATE( '>', Dirty, LOCATE('<', Dirty )); SET iLength = ( iEnd - iStart) + 1; IF iLength > 0 THEN BEGIN SET Dirty = INSERT( Dirty, iStart, iLength, ''); END; END IF; END; END WHILE; WHILE LOCATE( '&nb和谐显示sp;', Dirty ) > 0 DO BEGIN SET iStart = LOCATE( '&nb和谐显示sp;', Dirty ); SET iLength = 6; SET Dirty = INSERT( Dirty, iStart, iLength, ''); END; END WHILE; RETURN Dirty; END; | DELIMITER ;