vendor/uvdesk/core-framework/Utils/HTMLFilter.php line 351

Open in your IDE?
  1. <?php
  2. /**
  3.  * htmlfilter.inc
  4.  * ---------------
  5.  * This set of functions allows you to filter html in order to remove
  6.  * any malicious tags from it. Useful in cases when you need to filter
  7.  * user input for any cross-site-scripting attempts.
  8.  *
  9.  * Copyright (C) 2002-2004 by Duke University
  10.  *
  11.  * This library is free software; you can redistribute it and/or
  12.  * modify it under the terms of the GNU Lesser General Public
  13.  * License as published by the Free Software Foundation; either
  14.  * version 2.1 of the License, or (at your option) any later version.
  15.  *
  16.  * This library is distributed in the hope that it will be useful,
  17.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     See the GNU
  19.  * Lesser General Public License for more details.
  20.  *
  21.  * You should have received a copy of the GNU Lesser General Public
  22.  * License along with this library; if not, write to the Free Software
  23.  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  24.  * 02110-1301  USA
  25.  *
  26.  * @Author    Konstantin Riabitsev <icon@linux.duke.edu>
  27.  * @Author  Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
  28.  */
  29. namespace Webkul\UVDesk\CoreFrameworkBundle\Utils;
  30. use Symfony\Component\DomCrawler\Crawler;
  31. class HTMLFilter
  32. {
  33.     /**
  34.      * This function returns the final tag out of the tag name, an array
  35.      * of attributes, and the type of the tag. This function is called by
  36.      * tln_sanitize internally.
  37.      *
  38.      * @param string $tagname the name of the tag.
  39.      * @param array $attary the array of attributes and their values
  40.      * @param integer $tagtype The type of the tag (see in comments).
  41.      * @return string A string with the final tag representation.
  42.      */
  43.     public function tln_tagprint($tagname$attary$tagtype) {
  44.         if ($tagtype == 2) {
  45.             $fulltag '</' $tagname '>';
  46.         } else {
  47.             $fulltag '<' $tagname;
  48.             if (is_array($attary) && sizeof($attary)) {
  49.                 $atts = array();
  50.                 while (list($attname$attvalue) = each($attary)) {
  51.                     array_push($atts"$attname=$attvalue");
  52.                 }
  53.                 $fulltag .= ' ' join(' '$atts);
  54.             }
  55.             if ($tagtype == 3) {
  56.                 $fulltag .= ' /';
  57.             }
  58.             $fulltag .= '>';
  59.         }
  60.         return $fulltag;
  61.     }
  62.     /**
  63.      * A small helper function to use with array_walk. Modifies a by-ref
  64.      * value and makes it lowercase.
  65.      *
  66.      * @param string $val a value passed by-ref.
  67.      * @return        void since it modifies a by-ref value.
  68.      */
  69.     public function tln_casenormalize(&$val) {
  70.         $val strtolower($val);
  71.     }
  72.     /**
  73.      * This function skips any whitespace from the current position within
  74.      * a string and to the next non-whitespace value.
  75.      *
  76.      * @param string $body the string
  77.      * @param integer $offset the offset within the string where we should start
  78.      *                   looking for the next non-whitespace character.
  79.      * @return integer          the location within the $body where the next
  80.      *                   non-whitespace char is located.
  81.      */
  82.     public function tln_skipspace($body$offset) {
  83.         preg_match('/^(\s*)/s'substr($body$offset), $matches);
  84.         if (isset($matches[1]) && is_array($matches[1]) && sizeof($matches[1])) {
  85.             $count strlen($matches[1]);
  86.             $offset += $count;
  87.         }
  88.         return $offset;
  89.     }
  90.     /**
  91.      * This function looks for the next character within a string.    It's
  92.      * really just a glorified "strpos", except it catches the failures
  93.      * nicely.
  94.      *
  95.      * @param string $body   The string to look for needle in.
  96.      * @param integer $offset Start looking from this position.
  97.      * @param string $needle The character/string to look for.
  98.      * @return integer           location of the next occurrence of the needle, or
  99.      *                   strlen($body) if needle wasn't found.
  100.      */
  101.     public function tln_findnxstr($body$offset$needle) {
  102.         $pos strpos($body$needle$offset);
  103.         if ($pos === false) {
  104.             $pos strlen($body);
  105.         }
  106.         return $pos;
  107.     }
  108.     /**
  109.      * This function takes a PCRE-style regexp and tries to match it
  110.      * within the string.
  111.      *
  112.      * @param string $body   The string to look for needle in.
  113.      * @param integer $offset Start looking from here.
  114.      * @param string $reg       A PCRE-style regex to match.
  115.      * @return array|boolean  Returns a false if no matches found, or an array
  116.      *                   with the following members:
  117.      *                   - integer with the location of the match within $body
  118.      *                   - string with whatever content between offset and the match
  119.      *                   - string with whatever it is we matched
  120.      */
  121.     public function tln_findnxreg($body$offset$reg) {
  122.         $matches = array();
  123.         $retarr = array();
  124.         $preg_rule '%^(.*?)(' $reg ')%s';
  125.         preg_match($preg_rulesubstr($body$offset), $matches);
  126.         if (!isset($matches[0])) {
  127.             $retarr false;
  128.         } else {
  129.             $retarr[0] = $offset strlen($matches[1]);
  130.             $retarr[1] = $matches[1];
  131.             $retarr[2] = $matches[2];
  132.         }
  133.         return $retarr;
  134.     }
  135.     /**
  136.      * This function looks for the next tag.
  137.      *
  138.      * @param string $body   String where to look for the next tag.
  139.      * @param integer $offset Start looking from here.
  140.      * @return array|boolean false if no more tags exist in the body, or
  141.      *                   an array with the following members:
  142.      *                   - string with the name of the tag
  143.      *                   - array with attributes and their values
  144.      *                   - integer with tag type (1, 2, or 3)
  145.      *                   - integer where the tag starts (starting "<")
  146.      *                   - integer where the tag ends (ending ">")
  147.      *                   first three members will be false, if the tag is invalid.
  148.      */
  149.     public function tln_getnxtag($body$offset) {
  150.         if ($offset strlen($body)) {
  151.             return false;
  152.         }
  153.         $lt $this->tln_findnxstr($body$offset'<');
  154.         if ($lt == strlen($body)) {
  155.             return false;
  156.         }
  157.         /**
  158.          * We are here:
  159.          * blah blah <tag attribute="value">
  160.          * \---------^
  161.          */
  162.         $pos $this->tln_skipspace($body$lt 1);
  163.         if ($pos >= strlen($body)) {
  164.             return array(falsefalsefalse$ltstrlen($body));
  165.         }
  166.         /**
  167.          * There are 3 kinds of tags:
  168.          * 1. Opening tag, e.g.:
  169.          *      <a href="blah">
  170.          * 2. Closing tag, e.g.:
  171.          *      </a>
  172.          * 3. XHTML-style content-less tag, e.g.:
  173.          *      <img src="blah"/>
  174.          */
  175.         switch (substr($body$pos1)) {
  176.             case '/':
  177.                 $tagtype 2;
  178.                 $pos++;
  179.                 break;
  180.             case '!':
  181.                 /**
  182.                  * A comment or an SGML declaration.
  183.                  */
  184.                 if (substr($body$pos 12) == '--') {
  185.                     $gt strpos($body'-->'$pos);
  186.                     if ($gt === false) {
  187.                         $gt strlen($body);
  188.                     } else {
  189.                         $gt += 2;
  190.                     }
  191.                     return array(falsefalsefalse$lt$gt);
  192.                 } else {
  193.                     $gt $this->tln_findnxstr($body$pos'>');
  194.                     return array(falsefalsefalse$lt$gt);
  195.                 }
  196.                 break;
  197.             default:
  198.                 /**
  199.                  * Assume tagtype 1 for now. If it's type 3, we'll switch values
  200.                  * later.
  201.                  */
  202.                 $tagtype 1;
  203.                 break;
  204.         }
  205.         /**
  206.          * Look for next [\W-_], which will indicate the end of the tag name.
  207.          */
  208.         $regary $this->tln_findnxreg($body$pos'[^\w\-_]');
  209.         if ($regary == false) {
  210.             return array(falsefalsefalse$ltstrlen($body));
  211.         }
  212.         list($pos$tagname$match) = $regary;
  213.         $tagname strtolower($tagname);
  214.         /**
  215.          * $match can be either of these:
  216.          * '>'    indicating the end of the tag entirely.
  217.          * '\s' indicating the end of the tag name.
  218.          * '/'    indicating that this is type-3 xhtml tag.
  219.          *
  220.          * Whatever else we find there indicates an invalid tag.
  221.          */
  222.         switch ($match) {
  223.             case '/':
  224.                 /**
  225.                  * This is an xhtml-style tag with a closing / at the
  226.                  * end, like so: <img src="blah"/>. Check if it's followed
  227.                  * by the closing bracket. If not, then this tag is invalid
  228.                  */
  229.                 if (substr($body$pos2) == '/>') {
  230.                     $pos++;
  231.                     $tagtype 3;
  232.                 } else {
  233.                     $gt $this->tln_findnxstr($body$pos'>');
  234.                     $retary = array(falsefalsefalse$lt$gt);
  235.                     return $retary;
  236.                 }
  237.                 //intentional fall-through
  238.             case '>':
  239.                 return array($tagnamefalse$tagtype$lt$pos);
  240.                 break;
  241.             default:
  242.                 /**
  243.                  * Check if it's whitespace
  244.                  */
  245.                 if (preg_match('/\s/'$match)) {
  246.                 } else {
  247.                     /**
  248.                      * This is an invalid tag! Look for the next closing ">".
  249.                      */
  250.                     $gt $this->tln_findnxstr($body$lt'>');
  251.                     return array(falsefalsefalse$lt$gt);
  252.                 }
  253.         }
  254.         /**
  255.          * At this point we're here:
  256.          * <tagname     attribute='blah'>
  257.          * \-------^
  258.          *
  259.          * At this point we loop in order to find all attributes.
  260.          */
  261.         $attary = array();
  262.         while ($pos <= strlen($body)) {
  263.             $pos $this->tln_skipspace($body$pos);
  264.             if ($pos == strlen($body)) {
  265.                 /**
  266.                  * Non-closed tag.
  267.                  */
  268.                 return array(falsefalsefalse$lt$pos);
  269.             }
  270.             /**
  271.              * See if we arrived at a ">" or "/>", which means that we reached
  272.              * the end of the tag.
  273.              */
  274.             $matches = array();
  275.             preg_match('%^(\s*)(>|/>)%s'substr($body$pos), $matches);
  276.             if (isset($matches[0]) && $matches[0]) {
  277.                 /**
  278.                  * Yep. So we did.
  279.                  */
  280.                 $pos += strlen($matches[1]);
  281.                 if ($matches[2] == '/>') {
  282.                     $tagtype 3;
  283.                     $pos++;
  284.                 }
  285.                 return array($tagname$attary$tagtype$lt$pos);
  286.             }
  287.             /**
  288.              * There are several types of attributes, with optional
  289.              * [:space:] between members.
  290.              * Type 1:
  291.              *     attrname[:space:]=[:space:]'CDATA'
  292.              * Type 2:
  293.              *     attrname[:space:]=[:space:]"CDATA"
  294.              * Type 3:
  295.              *     attr[:space:]=[:space:]CDATA
  296.              * Type 4:
  297.              *     attrname
  298.              *
  299.              * We leave types 1 and 2 the same, type 3 we check for
  300.              * '"' and convert to "&quot" if needed, then wrap in
  301.              * double quotes. Type 4 we convert into:
  302.              * attrname="yes".
  303.              */
  304.             $regary $this->tln_findnxreg($body$pos'[^\w\-_]');
  305.             if ($regary == false) {
  306.                 /**
  307.                  * Looks like body ended before the end of tag.
  308.                  */
  309.                 return array(falsefalsefalse$ltstrlen($body));
  310.             }
  311.             list($pos$attname$match) = $regary;
  312.             $attname strtolower($attname);
  313.             /**
  314.              * We arrived at the end of attribute name. Several things possible
  315.              * here:
  316.              * '>'    means the end of the tag and this is attribute type 4
  317.              * '/'    if followed by '>' means the same thing as above
  318.              * '\s' means a lot of things -- look what it's followed by.
  319.              *        anything else means the attribute is invalid.
  320.              */
  321.             switch ($match) {
  322.                 case '/':
  323.                     /**
  324.                      * This is an xhtml-style tag with a closing / at the
  325.                      * end, like so: <img src="blah"/>. Check if it's followed
  326.                      * by the closing bracket. If not, then this tag is invalid
  327.                      */
  328.                     if (substr($body$pos2) == '/>') {
  329.                         $pos++;
  330.                         $tagtype 3;
  331.                     } else {
  332.                         $gt $this->tln_findnxstr($body$pos'>');
  333.                         $retary = array(falsefalsefalse$lt$gt);
  334.                         return $retary;
  335.                     }
  336.                     //intentional fall-through
  337.                 case '>':
  338.                     $attary{$attname} = '"yes"';
  339.                     return array($tagname$attary$tagtype$lt$pos);
  340.                     break;
  341.                 default:
  342.                     /**
  343.                      * Skip whitespace and see what we arrive at.
  344.                      */
  345.                     $pos $this->tln_skipspace($body$pos);
  346.                     $char substr($body$pos1);
  347.                     /**
  348.                      * Two things are valid here:
  349.                      * '=' means this is attribute type 1 2 or 3.
  350.                      * \w means this was attribute type 4.
  351.                      * anything else we ignore and re-loop. End of tag and
  352.                      * invalid stuff will be caught by our checks at the beginning
  353.                      * of the loop.
  354.                      */
  355.                     if ($char == '=') {
  356.                         $pos++;
  357.                         $pos $this->tln_skipspace($body$pos);
  358.                         /**
  359.                          * Here are 3 possibilities:
  360.                          * "'"    attribute type 1
  361.                          * '"'    attribute type 2
  362.                          * everything else is the content of tag type 3
  363.                          */
  364.                         $quot substr($body$pos1);
  365.                         if ($quot == '\'') {
  366.                             $regary $this->tln_findnxreg($body$pos 1'\'');
  367.                             if ($regary == false) {
  368.                                 return array(falsefalsefalse$ltstrlen($body));
  369.                             }
  370.                             list($pos$attval$match) = $regary;
  371.                             $pos++;
  372.                             $attary{$attname} = '\'' $attval '\'';
  373.                         } else {
  374.                             if ($quot == '"') {
  375.                                 $regary $this->tln_findnxreg($body$pos 1'\"');
  376.                                 if ($regary == false) {
  377.                                     return array(falsefalsefalse$ltstrlen($body));
  378.                                 }
  379.                                 list($pos$attval$match) = $regary;
  380.                                 $pos++;
  381.                                 $attary{$attname} = '"' $attval '"';
  382.                             } else {
  383.                                 /**
  384.                                  * These are hateful. Look for \s, or >.
  385.                                  */
  386.                                 $regary $this->tln_findnxreg($body$pos'[\s>]');
  387.                                 if ($regary == false) {
  388.                                     return array(falsefalsefalse$ltstrlen($body));
  389.                                 }
  390.                                 list($pos$attval$match) = $regary;
  391.                                 /**
  392.                                  * If it's ">" it will be caught at the top.
  393.                                  */
  394.                                 $attval preg_replace('/\"/s''&quot;'$attval);
  395.                                 $attary{$attname} = '"' $attval '"';
  396.                             }
  397.                         }
  398.                     } else {
  399.                         if (preg_match('|[\w/>]|'$char)) {
  400.                             /**
  401.                              * That was attribute type 4.
  402.                              */
  403.                             $attary{$attname} = '"yes"';
  404.                         } else {
  405.                             /**
  406.                              * An illegal character. Find next '>' and return.
  407.                              */
  408.                             $gt $this->tln_findnxstr($body$pos'>');
  409.                             return array(falsefalsefalse$lt$gt);
  410.                         }
  411.                     }
  412.             }
  413.         }
  414.         /**
  415.          * The fact that we got here indicates that the tag end was never
  416.          * found. Return invalid tag indication so it gets stripped.
  417.          */
  418.         return array(falsefalsefalse$ltstrlen($body));
  419.     }
  420.     /**
  421.      * Translates entities into literal values so they can be checked.
  422.      *
  423.      * @param string $attvalue the by-ref value to check.
  424.      * @param string $regex    the regular expression to check against.
  425.      * @param boolean $hex        whether the entites are hexadecimal.
  426.      * @return boolean            True or False depending on whether there were matches.
  427.      */
  428.     public function tln_deent(&$attvalue$regex$hex false) {
  429.         preg_match_all($regex$attvalue$matches);
  430.         if (is_array($matches) && sizeof($matches[0]) > 0) {
  431.             $repl = array();
  432.             for ($i 0$i sizeof($matches[0]); $i++) {
  433.                 $numval $matches[1][$i];
  434.                 if ($hex) {
  435.                     $numval hexdec($numval);
  436.                 }
  437.                 $repl{$matches[0][$i]} = chr($numval);
  438.             }
  439.             $attvalue strtr($attvalue$repl);
  440.             return true;
  441.         } else {
  442.             return false;
  443.         }
  444.     }
  445.     /**
  446.      * This function checks attribute values for entity-encoded values
  447.      * and returns them translated into 8-bit strings so we can run
  448.      * checks on them.
  449.      *
  450.      * @param string $attvalue A string to run entity check against.
  451.      * @return             Void, modifies a reference value.
  452.      */
  453.     public function tln_defang(&$attvalue) {
  454.         /**
  455.          * Skip this if there aren't ampersands or backslashes.
  456.          */
  457.         if (strpos($attvalue'&') === false
  458.             && strpos($attvalue'\\') === false
  459.         ) {
  460.             return;
  461.         }
  462.         do {
  463.             $m false;
  464.             $m $m || $this->tln_deent($attvalue'/\&#0*(\d+);*/s');
  465.             $m $m || $this->tln_deent($attvalue'/\&#x0*((\d|[a-f])+);*/si'true);
  466.             $m $m || $this->tln_deent($attvalue'/\\\\(\d+)/s'true);
  467.         } while ($m == true);
  468.         $attvalue stripslashes($attvalue);
  469.     }
  470.     /**
  471.      * Kill any tabs, newlines, or carriage returns. Our friends the
  472.      * makers of the browser with 95% market value decided that it'd
  473.      * be funny to make "java[tab]script" be just as good as "javascript".
  474.      *
  475.      * @param string $attvalue     The attribute value before extraneous spaces removed.
  476.      * @return     Void, modifies a reference value.
  477.      */
  478.     public function tln_unspace(&$attvalue) {
  479.         if (strcspn($attvalue"\t\r\n\0 ") != strlen($attvalue)) {
  480.             $attvalue str_replace(
  481.                 array("\t""\r""\n""\0"" "),
  482.                 array(''''''''''),
  483.                 $attvalue
  484.             );
  485.         }
  486.     }
  487.     /**
  488.      * This function runs various checks against the attributes.
  489.      *
  490.      * @param string $tagname            String with the name of the tag.
  491.      * @param array $attary            Array with all tag attributes.
  492.      * @param array $rm_attnames        See description for tln_sanitize
  493.      * @param array $bad_attvals        See description for tln_sanitize
  494.      * @param array $add_attr_to_tag See description for tln_sanitize
  495.      * @return                    Array with modified attributes.
  496.      */
  497.     public function tln_fixatts(
  498.         $tagname,
  499.         $attary,
  500.         $rm_attnames,
  501.         $bad_attvals,
  502.         $add_attr_to_tag
  503.     ) {
  504.         while (list($attname$attvalue) = each($attary)) {
  505.             /**
  506.              * See if this attribute should be removed.
  507.              */
  508.             foreach ($rm_attnames as $matchtag => $matchattrs) {
  509.                 if (preg_match($matchtag$tagname)) {
  510.                     foreach ($matchattrs as $matchattr) {
  511.                         if (preg_match($matchattr$attname)) {
  512.                             unset($attary{$attname});
  513.                             continue;
  514.                         }
  515.                     }
  516.                 }
  517.             }
  518.             /**
  519.              * Remove any backslashes, entities, or extraneous whitespace.
  520.              */
  521.             $this->tln_defang($attvalue);
  522.             $this->tln_unspace($attvalue);
  523.             /**
  524.              * Now let's run checks on the attvalues.
  525.              * I don't expect anyone to comprehend this. If you do,
  526.              * get in touch with me so I can drive to where you live and
  527.              * shake your hand personally. :)
  528.              */
  529.             foreach ($bad_attvals as $matchtag => $matchattrs) {
  530.                 if (preg_match($matchtag$tagname)) {
  531.                     foreach ($matchattrs as $matchattr => $valary) {
  532.                         if (preg_match($matchattr$attname)) {
  533.                             /**
  534.                              * There are two arrays in valary.
  535.                              * First is matches.
  536.                              * Second one is replacements
  537.                              */
  538.                             list($valmatch$valrepl) = $valary;
  539.                             $newvalue preg_replace($valmatch$valrepl$attvalue);
  540.                             if ($newvalue != $attvalue) {
  541.                                 $attary{$attname} = $newvalue;
  542.                             }
  543.                         }
  544.                     }
  545.                 }
  546.             }
  547.         }
  548.         /**
  549.          * See if we need to append any attributes to this tag.
  550.          */
  551.         foreach ($add_attr_to_tag as $matchtag => $addattary) {
  552.             if (preg_match($matchtag$tagname)) {
  553.                 $attary array_merge($attary$addattary);
  554.             }
  555.         }
  556.         return $attary;
  557.     }
  558.     /**
  559.      *
  560.      * @param string $body                    The HTML you wish to filter
  561.      * @param array $tag_list                see description above
  562.      * @param array $rm_tags_with_content see description above
  563.      * @param array $self_closing_tags    see description above
  564.      * @param boolean $force_tag_closing    see description above
  565.      * @param array $rm_attnames            see description above
  566.      * @param array $bad_attvals            see description above
  567.      * @param array $add_attr_to_tag        see description above
  568.      * @return string                       Sanitized html safe to show on your pages.
  569.      */
  570.     public function tln_sanitize(
  571.         $body,
  572.         $tag_list,
  573.         $rm_tags_with_content,
  574.         $self_closing_tags,
  575.         $force_tag_closing,
  576.         $rm_attnames,
  577.         $bad_attvals,
  578.         $add_attr_to_tag
  579.     ) {
  580.         /**
  581.          * Normalize rm_tags and rm_tags_with_content.
  582.          */
  583.         $rm_tags array_shift($tag_list);
  584.         @array_walk($tag_list, [$this'tln_casenormalize']);
  585.         @array_walk($rm_tags_with_content, [$this'tln_casenormalize']);
  586.         @array_walk($self_closing_tags, [$this'tln_casenormalize']);
  587.         /**
  588.          * See if tag_list is of tags to remove or tags to allow.
  589.          * false  means remove these tags
  590.          * true      means allow these tags
  591.          */
  592.         $curpos 0;
  593.         $open_tags = array();
  594.         $trusted "<!-- begin tln_sanitized html -->\n";
  595.         $skip_content false;
  596.         /**
  597.          * Take care of netscape's stupid javascript entities like
  598.          * &{alert('boo')};
  599.          */
  600.         $body preg_replace('/&(\{.*?\};)/si''&amp;\\1'$body);
  601.         while (($curtag $this->tln_getnxtag($body$curpos)) != false) {
  602.             list($tagname$attary$tagtype$lt$gt) = $curtag;
  603.             $free_content substr($body$curpos$lt $curpos);
  604.             if ($skip_content == false) {
  605.                 $trusted .= $free_content;
  606.             } else {
  607.             }
  608.             if ($tagname != false) {
  609.                 if ($tagtype == 2) {
  610.                     if ($skip_content == $tagname) {
  611.                         /**
  612.                          * Got to the end of tag we needed to remove.
  613.                          */
  614.                         $tagname false;
  615.                         $skip_content false;
  616.                     } else {
  617.                         if ($skip_content == false) {
  618.                             if (isset($open_tags{$tagname}) &&
  619.                                 $open_tags{$tagname} > 0
  620.                             ) {
  621.                                 $open_tags{$tagname}--;
  622.                             } else {
  623.                                 $tagname false;
  624.                             }
  625.                         } else {
  626.                         }
  627.                     }
  628.                 } else {
  629.                     /**
  630.                      * $rm_tags_with_content
  631.                      */
  632.                     if ($skip_content == false) {
  633.                         /**
  634.                          * See if this is a self-closing type and change
  635.                          * tagtype appropriately.
  636.                          */
  637.                         if ($tagtype == 1
  638.                             && in_array($tagname$self_closing_tags)
  639.                         ) {
  640.                             $tagtype 3;
  641.                         }
  642.                         /**
  643.                          * See if we should skip this tag and any content
  644.                          * inside it.
  645.                          */
  646.                         if ($tagtype == 1
  647.                             && in_array($tagname$rm_tags_with_content)
  648.                         ) {
  649.                             $skip_content $tagname;
  650.                         } else {
  651.                             if (($rm_tags == false
  652.                                     && in_array($tagname$tag_list)) ||
  653.                                 ($rm_tags == true
  654.                                     && !in_array($tagname$tag_list))
  655.                             ) {
  656.                                 $tagname false;
  657.                             } else {
  658.                                 if ($tagtype == 1) {
  659.                                     if (isset($open_tags{$tagname})) {
  660.                                         $open_tags{$tagname}++;
  661.                                     } else {
  662.                                         $open_tags{$tagname} = 1;
  663.                                     }
  664.                                 }
  665.                                 /**
  666.                                  * This is where we run other checks.
  667.                                  */
  668.                                 if (is_array($attary) && sizeof($attary) > 0) {
  669.                                     $attary $this->tln_fixatts(
  670.                                         $tagname,
  671.                                         $attary,
  672.                                         $rm_attnames,
  673.                                         $bad_attvals,
  674.                                         $add_attr_to_tag
  675.                                     );
  676.                                 }
  677.                             }
  678.                         }
  679.                     } else {
  680.                     }
  681.                 }
  682.                 if ($tagname != false && $skip_content == false) {
  683.                     $trusted .= $this->tln_tagprint($tagname$attary$tagtype);
  684.                 }
  685.             } else {
  686.             }
  687.             $curpos $gt 1;
  688.         }
  689.         $trusted .= substr($body$curposstrlen($body) - $curpos);
  690.         if ($force_tag_closing == true) {
  691.             foreach ($open_tags as $tagname => $opentimes) {
  692.                 while ($opentimes 0) {
  693.                     $trusted .= '</' $tagname '>';
  694.                     $opentimes--;
  695.                 }
  696.             }
  697.             $trusted .= "\n";
  698.         }
  699.         $trusted .= "<!-- end tln_sanitized html -->\n";
  700.         return $trusted;
  701.     }
  702.     // 
  703.     // Use the nifty htmlfilter library
  704.     //
  705.     public function HTMLFilter($body$trans_image_path$block_external_images false) {
  706.         $tag_list = array(
  707.             false,
  708.             "object",
  709.             "meta",
  710.             "html",
  711.             "head",
  712.             "base",
  713.             "link",
  714.             "frame",
  715.             "iframe",
  716.             "plaintext",
  717.             "marquee"
  718.         );
  719.         $rm_tags_with_content = array(
  720.             "script",
  721.             "applet",
  722.             "embed",
  723.             "title",
  724.             "frameset",
  725.             "xmp",
  726.             "xml"
  727.         );
  728.         $self_closing_tags = array(
  729.             "img",
  730.             "br",
  731.             "hr",
  732.             "input",
  733.             "outbind"
  734.         );
  735.         $force_tag_closing true;
  736.         $rm_attnames = array(
  737.             "/.*/" =>
  738.                 array(
  739.                     // "/target/i",
  740.                     "/^on.*/i",
  741.                     "/^dynsrc/i",
  742.                     "/^data.*/i",
  743.                     "/^lowsrc.*/i"
  744.                 )
  745.         );
  746.         $bad_attvals = array(
  747.             "/.*/" =>
  748.                 array(
  749.                     "/^src|background/i" =>
  750.                         array(
  751.                             array(
  752.                                 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
  753.                                 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
  754.                                 '/^([\'"])\s*about\s*:.*([\'"])/si'
  755.                             ),
  756.                             array(
  757.                                 "\\1$trans_image_path\\2",
  758.                                 "\\1$trans_image_path\\2",
  759.                                 "\\1$trans_image_path\\2",
  760.                                 "\\1$trans_image_path\\2"
  761.                             )
  762.                         ),
  763.                     "/^href|action/i" =>
  764.                         array(
  765.                             array(
  766.                                 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
  767.                                 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
  768.                                 '/^([\'"])\s*about\s*:.*([\'"])/si'
  769.                             ),
  770.                             array(
  771.                                 "\\1#\\1",
  772.                                 "\\1#\\1",
  773.                                 "\\1#\\1",
  774.                                 "\\1#\\1"
  775.                             )
  776.                         ),
  777.                     "/^style/i" =>
  778.                         array(
  779.                             array(
  780.                                 "/expression/i",
  781.                                 "/binding/i",
  782.                                 "/behaviou*r/i",
  783.                                 "/include-source/i",
  784.                                 '/position\s*:\s*absolute/i',
  785.                                 '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
  786.                                 '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
  787.                                 '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
  788.                                 '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si'
  789.                             ),
  790.                             array(
  791.                                 "idiocy",
  792.                                 "idiocy",
  793.                                 "idiocy",
  794.                                 "idiocy",
  795.                                 "",
  796.                                 "url(\\1#\\1)",
  797.                                 "url(\\1#\\1)",
  798.                                 "url(\\1#\\1)",
  799.                                 "url(\\1#\\1)",
  800.                                 "url(\\1#\\1)",
  801.                                 "\\1:url(\\2#\\3)"
  802.                             )
  803.                         )
  804.                 )
  805.         );
  806.         if ($block_external_images) {
  807.             array_push(
  808.                 $bad_attvals{'/.*/'}{'/^src|background/i'}[0],
  809.                 '/^([\'\"])\s*https*:.*([\'\"])/si'
  810.             );
  811.             array_push(
  812.                 $bad_attvals{'/.*/'}{'/^src|background/i'}[1],
  813.                 "\\1$trans_image_path\\1"
  814.             );
  815.             array_push(
  816.                 $bad_attvals{'/.*/'}{'/^style/i'}[0],
  817.                 '/url\(([\'\"])\s*https*:.*([\'\"])\)/si'
  818.             );
  819.             array_push(
  820.                 $bad_attvals{'/.*/'}{'/^style/i'}[1],
  821.                 "url(\\1$trans_image_path\\1)"
  822.             );
  823.         }
  824.         $add_attr_to_tag = array(
  825.             "/^a$/i" =>
  826.                 array('target' => '"_blank"')
  827.         );
  828.         $trusted $this->tln_sanitize(
  829.             $body,
  830.             $tag_list,
  831.             $rm_tags_with_content,
  832.             $self_closing_tags,
  833.             $force_tag_closing,
  834.             $rm_attnames,
  835.             $bad_attvals,
  836.             $add_attr_to_tag
  837.         );
  838.         return $trusted;
  839.     }
  840.     public function removeEmailReplyQuote($html) {
  841.         $crawler = new Crawler();
  842.         $crawler->addHtmlContent($html);
  843.         //for gmail mail
  844.         
  845.         $crawler->filter('.gmail_extra')->first()->each(function (Crawler $crawler) {
  846.             foreach ($crawler as $node) {
  847.                 $node->parentNode->removeChild($node);
  848.             }
  849.         });    
  850.         //for yahoo mail
  851.         
  852.         $crawler->filter('.qtdSeparateBR')->first()->each(function (Crawler $crawler) {
  853.             foreach ($crawler as $node) {
  854.                 $node->parentNode->removeChild($node);
  855.             }
  856.         });   
  857.         
  858.         $crawler->filter('.yahoo_quoted')->first()->each(function (Crawler $crawler) {
  859.             foreach ($crawler as $node) {
  860.                 $node->parentNode->removeChild($node);
  861.             }
  862.         });
  863.         //for zimbre mail
  864.         
  865.         $crawler->filter('blockquote')->each(function (Crawler $crawler) {
  866.             foreach ($crawler as $node) {
  867.                 if($node->getAttribute('type') == 'cite')
  868.                     $node->removeChild($node);
  869.             }
  870.         });
  871.         
  872.         //for window live mail
  873.         
  874.         $crawler->filter('hr')->each(function (Crawler $crawler) {
  875.             foreach ($crawler as $node) {
  876.                 if($node->getAttribute('stop') == 'Spelling')
  877.                     $node->parentNode->parentNode->removeChild($node->parentNode);
  878.             }
  879.         });
  880.         //for GMX mail
  881.         
  882.         $crawler->filter('div')->each(function (Crawler $crawler) {
  883.             foreach ($crawler as $node) {
  884.                 if($node->getAttribute('name') == 'quote')
  885.                     $node->parentNode->parentNode->removeChild($node->parentNode);
  886.             }
  887.         });
  888.         return $crawler->html();
  889.     }
  890.     public function addClassEmailReplyQuote($html) {
  891.         if(trim($html) == '')
  892.             return '';
  893.         $crawler = new Crawler();
  894.         $crawler->addHtmlContent($html,'UTF-8');
  895.         // $crawler->filter('.gmail_extra')->first()->each(function (Crawler $crawler) {
  896.         //     foreach ($crawler as $node) {
  897.         //         $node->setAttribute('class','gmail_extra helpdesk_blockquote');
  898.         //     }
  899.         // }); 
  900.         $delimiterClass 'uv-delimiter-dXZkZXNr';
  901.         $crawler->filter('.gmail_quote')->first()->each(function (Crawler $crawler) {
  902.             foreach ($crawler as $node) {
  903.                 $node->setAttribute('class','gmail_quote helpdesk_blockquote');
  904.             }
  905.         }); 
  906.         $crawler->filter('*[class*="' $delimiterClass .'"]')->first()->each(function (Crawler $crawler) {
  907.             foreach ($crawler as $node) {
  908.                 $node->setAttribute('class','yahoo_quoted helpdesk_blockquote');
  909.             }
  910.         }); 
  911.         $crawler->filter('.zmail_extra')->first()->each(function (Crawler $crawler) {
  912.             foreach ($crawler as $node) {
  913.                 $node->setAttribute('class','yahoo_quoted helpdesk_blockquote');
  914.             }
  915.         });
  916.         // $crawler->filter('blockquote.helpdesk_blockquote[type="cite"]')->first()->each(function (Crawler $crawler) {
  917.         //     foreach ($crawler as $node) {
  918.         //         $node->setAttribute('class','yahoo_quoted helpdesk_blockquote');
  919.         //     }
  920.         // });        
  921.         //for yahoo mail
  922.         $crawler->filter('.qtdSeparateBR')->first()->each(function (Crawler $crawler) {
  923.             foreach ($crawler as $node) {
  924.                 $node->parentNode->removeChild($node);
  925.             }
  926.         });   
  927.         $crawler->filter('.yahoo_quoted')->first()->each(function (Crawler $crawler) {
  928.             foreach ($crawler as $node) {
  929.                 $node->setAttribute('class','yahoo_quoted helpdesk_blockquote');
  930.             }
  931.         });
  932.         //for zimbra mail
  933.         
  934.         $crawler->filter('blockquote')->each(function (Crawler $crawler) {
  935.             foreach ($crawler as $node) {
  936.                 if($node->getAttribute('type') == 'cite')
  937.                     $node->setAttribute('class','helpdesk_blockquote');
  938.                 // $node->parentNode->setAttribute('class','helpdesk_blockquote');
  939.             }
  940.         });
  941.         
  942.         //for window live mail
  943.         
  944.         $crawler->filter('hr')->each(function (Crawler $crawler) {
  945.             foreach ($crawler as $node) {
  946.                 if($node->getAttribute('stop') == 'Spelling')
  947.                     $node->parentNode->parentNode->setAttribute('class','helpdesk_blockquote');
  948.                 elseif($node->getAttribute('id') == 'stopSpelling')
  949.                     $node->parentNode->setAttribute('class','helpdesk_blockquote');
  950.             }
  951.         });
  952.         $crawler->filter('#divRplyFwdMsg')->first()->each(function (Crawler $crawler) {
  953.             foreach ($crawler as $node) {
  954.                 $node->parentNode->setAttribute('class','helpdesk_blockquote');
  955.             }
  956.         });
  957.         //for GMX mail
  958.         
  959.         $crawler->filter('div')->each(function (Crawler $crawler) {
  960.             foreach ($crawler as $node) {
  961.                 if($node->getAttribute('name') == 'quote')
  962.                     $node->setAttribute('class','helpdesk_blockquote');
  963.             }
  964.         });
  965.         return $crawler->html();
  966.     }
  967.     public function AutoLinkUrls($str,$popup FALSE) {
  968.         $str $this->AutoEmailUrls($str);
  969.         if (preg_match_all("#(^|\s|\()((http(s?)://)|(www\.))(\w+[^\s\)\<]+)#i"$str$matches)){
  970.             $pop = ($popup == TRUE) ? " target=\"_blank\" " "";
  971.             for ($i 0$i count($matches['0']); $i++){
  972.                 $period '';
  973.                 if (preg_match("|\.$|"$matches['6'][$i])){
  974.                     $period '.';
  975.                     $matches['6'][$i] = substr($matches['6'][$i], 0, -1);
  976.                 }
  977.                 $str str_replace($matches['0'][$i],
  978.                 $matches['1'][$i].'<a href="http'.
  979.                 $matches['4'][$i].'://'.
  980.                 $matches['5'][$i].
  981.                 $matches['6'][$i].'"'.$pop.'>http'.
  982.                 $matches['4'][$i].'://'.
  983.                 $matches['5'][$i].
  984.                 $matches['6'][$i].'</a>'.
  985.                 $period$str);
  986.             }//end for
  987.         }//end if
  988.         return $str;
  989.     }
  990.     public function AutoEmailUrls($string) {
  991.         $search  = array('/<p>__<\/p>/''/([a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})/');
  992.         $replace = array('<hr />''<a href="mailto:$1">$1</a>');
  993.         return preg_replace($search$replace$string);
  994.     }
  995. }