htmlwrap - Safely wrap HTML formatted text 1.1

来源：互联网发布：泰达网络下线编辑：程序博客网时间：2024/05/16 02:03
Built for use in the Orca Forum and Blog, the htmlwrap() function safely wraps HTML formatted text by breaking strings of characters over a certain length. It's great for use anywhere where generated HTML output is built from user input.

htmlwrap() won't insert line-breaks within HTML tags or entities, and entities are all treated as single characters when calculating break-points. It will also try to find logical line-break insertion points (like after periods or slashes) instead of pounding them in robotically every certain number of characters. Additionally, you can protect entire elements from line-breaks just by adding it to the protect list.

Now optionally works on multi-byte characters in version 1.1
    <?php /* ************************************************************** 
    * htmlwrap() function - v1.1 
    * Copyright (c) 2004 Brian Huisman AKA GreyWyvern 
    * 
    * This program may be distributed under the terms of the GPL 
    *   - http://www.gnu.org/licenses/gpl.txt 
    * 
    * 
    * htmlwrap -- Safely wraps a string containing HTML formatted text (not 
    * a full HTML document) to a specified width 
    * 
    * 
    * Changelog 
    * 1.1  - Now optionally works with multi-byte characters 
    * 
    * 
    * Description 
    * 
    * string htmlwrap ( string str [, int width [, string break [, string 
    * nobreak [, string nobr [, bool utf]]]]]) 
    * 
    * htmlwrap() is a function which wraps HTML by breaking long words and 
    * preventing them from damaging your layout.  This function will NOT 
    * insert <br /> tags every "width" characters as in the PHP wordwrap() 
    * function.  HTML wraps automatically, so this function only ensures 
    * wrapping at "width" characters is possible.  Use in places where a 
    * page will accept user input in order to create HTML output like in 
    * forums or blog comments. 
    * 
    * htmlwrap() won't break text within HTML tags and also preserves any 
    * existing HTML entities within the string, like &nbsp; and &lt;  It 
    * will only count these entities as one character.  Output is auto- 
    * matically nl2br()'ed. 
    * 
    * The function also allows you to specify "protected" elements, where 
    * line-breaks, block-returns or both are not inserted.  This is useful 
    * for elements like <pre> where you don't want the code to be damaged 
    * by the insertion of HTML block-returns.  Add the names of the 
    * elements you wish to protect from line-breaks (nobreak) and/or block- 
    * returns (nobr) as space separated lists.  Only names of valid HTML 
    * tags are accepted.  (eg. "code pre blockquote") 
    * 
    * The optional "utf" parameter enables the function to treat multi- 
    * byte characters in UTF-8 as single characters.  The default is false. 
    * "This modifier is available from PHP 4.1.0 or greater on Unix and 
    * from PHP 4.2.3 on win32." 
    *  - http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php 
    * 
    * htmlwrap() will *always* break long strings of characters at the 
    * specified width.  In this way, the function behaves as if the 
    * wordwrap() "cut" flag is always set.  However, the function will try 
    * to find "safe" characters within strings it breaks, where inserting a 
    * line-break would make more sense.  You may edit these characters by 
    * adding or removing them from the $lbrks variable. 
    * 
    * htmlwrap() is safe to use on strings containing multi-byte 
    * characters as of version 1.1. 
    * 
    * See the inline comments and http://www.greywyvern.com/php.php 
    * for more info 
    ******************************************************************** */ 
      
    function htmlwrap($str, $width = 60, $break = "/n", $nobreak = "", $nobr = "pre", $utf = false) { 
      
      // Split HTML content into an array delimited by < and > 
      // The flags save the delimeters and remove empty variables 
      $content = preg_split("/([<>])/", $str, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); 
      
      // Transform protected element lists into arrays 
      $nobreak = explode(" ", $nobreak); 
      $nobr = explode(" ", $nobr); 
      
      // Variable setup 
      $intag = false; 
      $innbk = array(); 
      $innbr = array(); 
      $drain = ""; 
      $utf = ($utf) ? "u" : ""; 
      
      // List of characters it is "safe" to insert line-breaks at 
      // Do not add ampersand (&) as it will mess up HTML Entities 
      // It is not necessary to add < and > 
      $lbrks = "/?!%)-}]///"':;"; 
      
      // We use /r for adding <br /> in the right spots so just switch to /n 
      if ($break == "/r") $break = "/n"; 
      
      while (list(, $value) = each($content)) { 
        switch ($value) { 
      
          // If a < is encountered, set the "in-tag" flag 
          case "<": $intag = true; break; 
      
          // If a > is encountered, remove the flag 
          case ">": $intag = false; break; 
      
          default: 
      
            // If we are currently within a tag... 
            if ($intag) { 
      
              // If the first character is not a / then this is an opening tag 
              if ($value{0} != "/") { 
      
                // Collect the tag name    
                preg_match("/^(.*?)(/s|$)/$utf", $value, $t); 
      
                // If this is a protected element, activate the associated protection flag 
                if ((!count($innbk) && in_array($t[1], $nobreak)) || in_array($t[1], $innbk)) $innbk[] = $t[1]; 
                if ((!count($innbr) && in_array($t[1], $nobr)) || in_array($t[1], $innbr)) $innbr[] = $t[1]; 
      
              // Otherwise this is a closing tag 
              } else { 
      
                // If this is a closing tag for a protected element, unset the flag 
                if (in_array(substr($value, 1), $innbk)) unset($innbk[count($innbk)]); 
                if (in_array(substr($value, 1), $innbr)) unset($innbr[count($innbr)]); 
              } 
      
            // Else if we're outside any tags... 
            } else if ($value) { 
      
              // If unprotected, remove all existing /r, replace all existing /n with /r 
              if (!count($innbr)) $value = str_replace("/n", "/r", str_replace("/r", "", $value)); 
      
              // If unprotected, enter the line-break loop 
              if (!count($innbk)) { 
                do { 
                  $store = $value; 
      
                  // Find the first stretch of characters over the $width limit 
                  if (preg_match("/^(.*?/s|^)(([^/s&]|&(/w{2,5}|#/d{2,4});){".$width."})(?!(".preg_quote($break, "/")."|/s))(.*)$/s$utf", $value, $match)) { 
      
                    // Determine the last "safe line-break" character within this match 
                    for ($x = 0, $ledge = 0; $x < strlen($lbrks); $x++) $ledge = max($ledge, strrpos($match[2], $lbrks{$x})); 
                    if (!$ledge) $ledge = strlen($match[2]) - 1; 
      
                    // Insert the modified string 
                    $value = $match[1].substr($match[2], 0, $ledge + 1).$break.substr($match[2], $ledge + 1).$match[6]; 
                  } 
      
                // Loop while overlimit strings are still being found 
                } while ($store != $value); 
              } 
      
              // If unprotected, replace all /r with <br />/n to finish 
              if (!count($innbr)) $value = str_replace("/r", "<br />/n", $value); 
            } 
        } 
      
        // Send the modified segment down the drain 
        $drain .= $value; 
      } 
      
      // Return contents of the drain 
      return $drain; 
    } 
    ?>