diff options
| author | Nick Palmer <nick@sluggardy.net> | 2007-05-17 14:14:31 +0000 |
|---|---|---|
| committer | Nick Palmer <nick@sluggardy.net> | 2007-05-17 14:14:31 +0000 |
| commit | 43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8 (patch) | |
| tree | 7d651afa18a29e871010453f8ff8c3661b06e257 | |
| parent | 0b3f88362e221a1613e6bca8596f013b0942abbd (diff) | |
| download | liberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.tar.gz liberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.tar.bz2 liberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.zip | |
Integrate HTMLPurifier into liberty.
| -rwxr-xr-x | LibertySystem.php | 143 | ||||
| -rw-r--r-- | admin/admin_liberty_inc.php | 55 | ||||
| -rw-r--r-- | admin/schema_inc.php | 2 | ||||
| -rw-r--r-- | icons/bitweaver/htmlpurifier.png | bin | 0 -> 297 bytes | |||
| -rw-r--r-- | plugins/format.bbcode.php | 10 | ||||
| -rw-r--r-- | plugins/format.bithtml.php | 50 | ||||
| -rw-r--r-- | plugins/format.tikiwiki.php | 16 | ||||
| -rw-r--r-- | templates/admin_liberty.tpl | 51 |
8 files changed, 250 insertions, 77 deletions
diff --git a/LibertySystem.php b/LibertySystem.php index 13ea58c..0faade0 100755 --- a/LibertySystem.php +++ b/LibertySystem.php @@ -3,7 +3,7 @@ * System class for handling the liberty package * * @package liberty -* @version $Header: /cvsroot/bitweaver/_bit_liberty/LibertySystem.php,v 1.68 2007/04/07 18:42:22 wjames5 Exp $ +* @version $Header: /cvsroot/bitweaver/_bit_liberty/LibertySystem.php,v 1.69 2007/05/17 14:14:29 nickpalmer Exp $ * @author spider <spider@steelsun.com> */ @@ -106,6 +106,147 @@ class LibertySystem extends LibertyBase { } } + /** + * Return the types of purification supported by purifyHtml + * @returns an array of strings with the types + */ + function purifyHtmlMethods() { + return array('htmlpurifier' => "HTML Purifier", + 'simple' => "Simple Purifier"); + } + + /** + * Purify HTML from a string. + * + * @param string The string to be cleaned. + * @returns string The sanitized string + */ + function purifyHtml($pString) { + global $gBitSystem; + switch($gBitSystem->getConfig('liberty_html_purifier', 'simple')) { + case 'htmlpurifier': + $pString = $this->advancedPurifyHtml($pString); + break; + + case 'simple': + default: + $pString = $this->simplePurifyHtml($pString); + break; + } + + return $pString; + } + + function advancedPurifyHtml($pString) { + global $gHtmlPurifier, $gBitSystem; + if (!isset($gHtmlPurifier)) { + $blacklistedTags = $gBitSystem-> + getConfig('blacklisted_html_tags', ''); + require_once(UTIL_PKG_PATH . 'htmlpurifier/HTMLPurifier.auto.php'); + $config = HTMLPurifier_Config::createDefault(); + + if ($gBitSystem->getConfig('liberty_html_pure_escape_bad', 'y') == 'y') { + $config->set('Core', 'EscapeInvalidTags', true); + $config->set('Core', 'EscapeInvalidChildren', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_disable_extern') == 'y') { + $config->set('URI', 'DisableExternal', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_disable_extern_res', 'y') == 'y') { + $config->set('URI', 'DisableExternalResources', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_disable_res') == 'y') { + $config->set('URI', 'DisableResources', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_disable_uri') == 'y') { + $config->set('URI', 'Disable', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_use_redirect') == 'y') { + $config->set('URI', 'Munge', LIBERTY_PKG_URL.'redirect.php?q=%s'); + } + if ($gBitSystem->getConfig('liberty_html_pure_strict_html', 'y') == 'y') { + $config->set('HTML', 'Strict', true); + } + if ($gBitSystem->getConfig('liberty_html_pure_xhtml', 'n') == 'n') { + $config->set('Core', 'XHTML', true); + } + + $def =& $config->getHTMLDefinition(); + // HTMLPurifier doesn't have a blacklist feature. Duh guys! + // Note that this has to come last since the other configs + // may tweak the def. + foreach (explode(',',$blacklistedTags) as $tag) { + unset($def->info[$tag]); + } + + $gHtmlPurifier = new HTMLPurifier($config); + } + $pString = $gHtmlPurifier->purify($pString); + + /* There isn't an easy way to disable an attribute in HTMLPurifier */ + $pString = $this->purifyStyle($pString); + + return $pString; + } + + /** + * Removes all style both inline and attributes unless the user + * has permission to edit styles. + */ + function purifyStyle( $pText ) { + global $gBitUser; + + $text = $pText; + // Yank style - both tag and inline attributes + // strip_tags has doesn't recognize that css within the style tags are not document text. To fix this do something similar to the following: + if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) { + $text = preg_replace( "/<style[^>]*>.*<\/style>/siU", '', $text ); + } + $text = stripslashes($text); + if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) { + $text = preg_replace( "/ (style|class)=[\"]?([^\"]*)[\"]?/i", '', $text); + } + + return $text; + } + + // This function is a menagerie of the techniques of the comments listed at + // http://www.php.net/manual/en/function.strip-tags.php - spiderr + function simplePurifyHtml( $pText ) { + global $gBitSystem, $gBitUser; + + // convert all HTML entites to catch people trying to sneak stuff by with things like { etc.. + if( function_exists( 'html_entity_decode' ) ) { + // quieten this down since it causes an error in PHP4 + // http://bugs.php.net/bug.php?id=25670 + $text = @html_entity_decode( $pText, ENT_COMPAT, 'UTF-8' ); + } else { + $trans_tbl = get_html_translation_table(HTML_ENTITIES); + $trans_tbl = array_flip($trans_tbl); + $text = strtr($pText, $trans_tbl); + } + + // strip_tags() appears to become nauseated at the site of a <!DOCTYPE> declaration + $text = str_replace( '<!DOCTYPE', '<DOCTYPE', $text ); + + $text = $this->purifyStyle($text); + + // Strip all evil tags that remain + // this comes out of gBitSystem->getConfig() set in Liberty Admin + $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS ); + + // Destroy all script code "manually" - strip_tags will leave code inline as plain text + if( !preg_match( '/\<script\>/', $acceptableTags ) ) { + $text = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $text ); + } + + $text = strip_tags( $text, $acceptableTags ); + $text = str_replace("<!--", "<!--", $text); + $text = preg_replace("/(\<)(.*?)(--\>)/mi", "".nl2br("\\2")."", $text); + + return( $text ); + } + // ****************************** Plugin Functions /** * Load only active plugins from disk diff --git a/admin/admin_liberty_inc.php b/admin/admin_liberty_inc.php index 2fe7f56..efcc085 100644 --- a/admin/admin_liberty_inc.php +++ b/admin/admin_liberty_inc.php @@ -18,6 +18,50 @@ $formLibertyFeatures = array( ), ); +$formLibertyHtmlPurifierFeatures = array( + 'liberty_html_pure_escape_bad' => array( + 'label' => 'Escape invalid HTML', + 'note' => ' Escapes invlid HTML as text. Otherwise invalid HTML is silently dropped. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#Core.EscapeInvalidTags">this</a> and <a href="http://htmlpurifier.org/live/configdoc/plain.html#Core.EscapeInvalidChildren">this</a> for more information.', + 'default' => 'y' + ), + 'liberty_html_pure_disable_extern' => array( + 'label' => 'Disable External Links', + 'note' => 'Disables links to external websites which is effective against spam. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableExternal">this</a> for more information.', + 'default' => 'n' + ), + 'liberty_html_pure_disable_extern_res' => array( + 'label' => 'Disable External Resounces', + 'note' => 'Disables the embedding of external resource like images from other hosts. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableExternalResources">this</a> for more information.', + 'default' => 'y' + ), + 'liberty_html_pure_disable_res' => array( + 'label' => 'Disable All Resources', + 'note' => 'Disables the embedding of all resources preventing users from including pictures at all. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableResources">this</a> for more information.', + 'default' => 'n' + ), + 'liberty_html_pure_disable_uri' => array( + 'label' => 'Disable all URIs', + 'note' => 'Disables all URIs in all forms within submitted content. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.Disable">this</a> for more information.', + 'default' => 'n' + ), + 'liberty_html_pure_use_redirect' => array( + 'label' => 'Use Redirect', + 'note' => 'Uses the redirect service in the Redirect URI. This can be handy to track clicks out and prevent leacks of PageRank. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.Munge">this</a> for more information.', + 'default' => 'n' + ), + 'liberty_html_pure_strict_html' => array( + 'label' => 'Force Strict', + 'note' => 'Determines if the purification matches the Transitional or Strict rule sets. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#HTML.Strict">this</a> for more information.', + 'default' => 'y' + ), + 'liberty_html_pure_xhtml' => array( + 'label' => 'Force XHTML', + 'note' => 'Determine if purification forces only XHTML tags or if it allows standard HTML.', + 'default' => 'n' + ) +); +$gBitSmarty->assign( 'formLibertyHtmlPurifierFeatures', $formLibertyHtmlPurifierFeatures ); + $formLibertyTextareaFeatures = array( "liberty_textarea_height" => array( 'label' => 'Default Textarea Height', @@ -99,17 +143,26 @@ $formValues = array( 'image_processor', 'liberty_attachment_link_format', 'comme if( !empty( $_REQUEST['change_prefs'] ) ) { $errors = array(); - $formFeatures = array_merge( $formLibertyFeatures, $formImageFeatures, $formCaptcha ); + $formFeatures = array_merge( $formLibertyFeatures, $formImageFeatures, $formCaptcha, $formLibertyHtmlPurifierFeatures ); foreach( $formFeatures as $item => $data ) { simple_set_toggle( $item, LIBERTY_PKG_NAME ); } foreach( $formLibertyTextareaFeatures as $item => $data ) { simple_set_value( $item, LIBERTY_PKG_NAME ); } + simple_set_value('liberty_html_purifier', LIBERTY_PKG_NAME ); simple_set_value( 'liberty_attachment_style', LIBERTY_PKG_NAME ); $gBitSystem->storeConfig('liberty_cache', $_REQUEST['liberty_cache'], LIBERTY_PKG_NAME ); $gBitSystem->storeConfig('liberty_auto_display_attachment_thumbs', $_REQUEST['liberty_auto_display_attachment_thumbs'], LIBERTY_PKG_NAME ); + if( !empty($_REQUEST['blacklisted_html_tags']) ) { + $tags = preg_replace( '/\s/', '', $_REQUEST['blacklisted_html_tags'] ); + if (strlen( $tags ) > 250) { + $tags = substr( $tags, 0, 250 ); + $errors['blacklist'] = 'The blacklisted tags list has been shortened. You can only have 250 characters for blacklisted tags.'; + } + $gBitSystem->storeConfig('blacklisted_html_tags', $tags , LIBERTY_PKG_NAME ); + } if( $_REQUEST['approved_html_tags'] != DEFAULT_ACCEPTABLE_TAGS ) { $tags = preg_replace( '/\s/', '', $_REQUEST['approved_html_tags'] ); $lastAngle = strrpos( $tags, '>' ) + 1; diff --git a/admin/schema_inc.php b/admin/schema_inc.php index 6af69d0..1c95086 100644 --- a/admin/schema_inc.php +++ b/admin/schema_inc.php @@ -249,6 +249,8 @@ $gBitInstaller->registerPreferences( LIBERTY_PKG_NAME, array( array(LIBERTY_PKG_NAME, 'liberty_action_log', 'y'), // array(LIBERTY_PKG_NAME, 'liberty_attachment_link_format', 'wiki') not needed anymore since we use js in the edit page now (depends on format of content) // array(LIBERTY_PKG_NAME, 'liberty_attachment_style', 'standard'), + // The default for new installs is htmlpurifier old stays simple + array(LIBERTY_PKG_NAME, 'liberty_html_purifier', 'htmlpurifier'), ) ); $gBitInstaller->registerSchemaDefault( LIBERTY_PKG_NAME, array( diff --git a/icons/bitweaver/htmlpurifier.png b/icons/bitweaver/htmlpurifier.png Binary files differnew file mode 100644 index 0000000..119c4b9 --- /dev/null +++ b/icons/bitweaver/htmlpurifier.png diff --git a/plugins/format.bbcode.php b/plugins/format.bbcode.php index ba37155..17e58f9 100644 --- a/plugins/format.bbcode.php +++ b/plugins/format.bbcode.php @@ -1,6 +1,6 @@ <?php /** - * @version $Revision: 1.8 $ + * @version $Revision: 1.9 $ * @package liberty * @subpackage plugins_format */ @@ -9,7 +9,6 @@ global $gLibertySystem; /** * run 'pear install Text_Wiki_BBCode-alpha' to install the library, - * you also need to enable the HTML plugin for now to due to dependency on the purge_html function */ require_once('PEAR.php'); @@ -35,12 +34,9 @@ $pluginParams = array ( $gLibertySystem->registerPlugin( PLUGIN_GUID_BBCODE, $pluginParams ); function bbcode_verify_data( &$pParamHash ) { + global $gLibertySystem; $errorMsg = NULL; - if( !function_exists( 'purge_html' ) && include_once( LIBERTY_PKG_PATH.'plugins/format.bithtml.php' ) ) { - $pParamHash['content_store']['data'] = purge_html( $pParamHash['edit'] ); - } else { - $pParamHash['content_store']['data'] = $pParamHash['edit']; - } + $pParamHash['content_store']['data'] = $gLibertySystem->purifyHtml( $pParamHash['edit']); return $errorMsg; } diff --git a/plugins/format.bithtml.php b/plugins/format.bithtml.php index 1cd856b..bde0529 100644 --- a/plugins/format.bithtml.php +++ b/plugins/format.bithtml.php @@ -1,6 +1,6 @@ <?php /** - * @version $Revision: 1.13 $ + * @version $Revision: 1.14 $ * @package liberty * @subpackage plugins_format */ @@ -26,56 +26,12 @@ $pluginParams = array ( $gLibertySystem->registerPlugin( PLUGIN_GUID_BITHTML, $pluginParams ); function bithtml_verify_data( &$pParamHash ) { + global $gLibertySystem; $errorMsg = NULL; - $pParamHash['content_store']['data'] = purge_html( $pParamHash['edit'] ); + $pParamHash['content_store']['data'] = $gLibertySystem->purifyHtml( $pParamHash['edit'] ); return $errorMsg; } -// This function is a menagerie of the techniques of the comments listed at -// http://www.php.net/manual/en/function.strip-tags.php - spiderr -function purge_html( $pText ) { - global $gBitSystem, $gBitUser; - - // convert all HTML entites to catch people trying to sneak stuff by with things like { etc.. - if( function_exists( 'html_entity_decode' ) ) { - // quieten this down since it causes an error in PHP4 - // http://bugs.php.net/bug.php?id=25670 - $text = @html_entity_decode( $pText, ENT_COMPAT, 'UTF-8' ); - } else { - $trans_tbl = get_html_translation_table(HTML_ENTITIES); - $trans_tbl = array_flip($trans_tbl); - $text = strtr($pText, $trans_tbl); - } - - // strip_tags() appears to become nauseated at the site of a <!DOCTYPE> declaration - $text = str_replace( '<!DOCTYPE', '<DOCTYPE', $text ); - - // Yank style - both tag and inline attributes - // strip_tags has doesn't recognize that css within the style tags are not document text. To fix this do something similar to the following: - if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) { - $text = preg_replace( "/<style[^>]*>.*<\/style>/siU", '', $text ); - } - $text = stripslashes($text); - if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) { - $text = preg_replace( "/ (style|class)=[\"]?([^\"]*)[\"]?/i", '', $text); - } - - // Strip all evil tags that remain - // this comes out of gBitSystem->getConfig() set in Liberty Admin - $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS ); - - // Destroy all script code "manually" - strip_tags will leave code inline as plain text - if( !preg_match( '/\<script\>/', $acceptableTags ) ) { - $text = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $text ); - } - - $text = strip_tags( $text, $acceptableTags ); - $text = str_replace("<!--", "<!--", $text); - $text = preg_replace("/(\<)(.*?)(--\>)/mi", "".nl2br("\\2")."", $text); - - return( $text ); -} - function bithtml_save_data( &$pParamHash ) { static $parser; if( empty( $parser ) ) { diff --git a/plugins/format.tikiwiki.php b/plugins/format.tikiwiki.php index df591e7..e2fd12a 100644 --- a/plugins/format.tikiwiki.php +++ b/plugins/format.tikiwiki.php @@ -1,6 +1,6 @@ <?php /** - * @version $Revision: 1.93 $ + * @version $Revision: 1.94 $ * @package liberty */ global $gLibertySystem; @@ -759,7 +759,7 @@ class TikiWikiParser extends BitBase { } function parse_data( $pParseHash, &$pCommonObject ) { - global $gBitSystem, $gBitUser, $page; + global $gBitSystem, $gLibertySystem, $gBitUser, $page; $data = $pParseHash['data']; $contentId = $pParseHash['content_id']; @@ -783,17 +783,7 @@ class TikiWikiParser extends BitBase { // disable HTML in wiki page for now - very disruptive. should be changed into a per page setting - xing if( !empty( $contentPrefs['content_enter_html'] ) ) { - // this is copied and pasted from format.bithtml.php - xing - // Strip all evil tags that remain - // this comes out of gBitSystem->getConfig() set in Liberty Admin - $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS ); - - // Destroy all script code "manually" - strip_tags will leave code inline as plain text - if( !preg_match( '/\<script\>/', $acceptableTags ) ) { - $data = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $data ); - } - - $data = strip_tags( $data, $acceptableTags ); + $data = $gLibertySystem->purifyHtml($data); } elseif( !$gBitSystem->isFeatureActive( 'content_allow_html' ) ) { // convert HTML to chars $data = htmlspecialchars( $data, ENT_NOQUOTES, 'UTF-8' ); diff --git a/templates/admin_liberty.tpl b/templates/admin_liberty.tpl index 93ad3fc..46c6ec3 100644 --- a/templates/admin_liberty.tpl +++ b/templates/admin_liberty.tpl @@ -1,5 +1,5 @@ {strip} -{form} +{form} {legend legend="General Settings"} {foreach from=$formLibertyFeatures key=item item=output} <div class="row"> @@ -38,21 +38,56 @@ </div> <div class="row"> - {formlabel label="Acceptable HTML tags" for="approved_html_tags"} - {formfeedback warning=$errors.warning} + {formlabel label="Liberty Cache" for="liberty_cache"} {forminput} - <input type="text" id="approved_html_tags" name="approved_html_tags" size="50" maxlength="250" value="{$approved_html_tags|escape}" /> - {formhelp note="List of allowed HTML tags. All other tags will be stripped when users save content. This will affect all format plugins."} + {html_options name=liberty_cache id=liberty_cache values=$cacheTimes options=$cacheTimes selected=$gBitSystem->getConfig('liberty_cache')} + {formhelp note='Cache all parsed content. This will dramatically reduce load on the server if pages are called frequently.' page=''} {/forminput} </div> + {/legend} + {legend legend="HTML Cleanup"} <div class="row"> - {formlabel label="Liberty Cache" for="liberty_cache"} + {formlabel label="Purification System"} {forminput} - {html_options name=liberty_cache id=liberty_cache values=$cacheTimes options=$cacheTimes selected=$gBitSystem->getConfig('liberty_cache')} - {formhelp note='Cache all parsed content. This will dramatically reduce load on the server if pages are called frequently.' page=''} + {html_options name=liberty_html_purifier options=$gLibertySystem->purifyHtmlMethods() selected=$gBitSystem->getConfig('liberty_html_purifier', 'simple')} + {formhelp note="Which system should be used to purify incoming HTML. The simple algorithm is faster but <strong>far less</strong> robust and secure than <a href=http://htmlpurifier.org>HTML Purifier</a> which has a much richer feature set. HTMLPurifier is recommended to protect against the most XSS attacks. The Simple system is known to <strong>fail XSS smoke tests</strong> and is therefore not recommended."} {/forminput} </div> + + + {legend legend="Simple Purifier Features"} + <div class="row"> + {formlabel label="Acceptable HTML tags" for="approved_html_tags"} + {formfeedback warning=$errors.warning} + {forminput} + <input type="text" id="approved_html_tags" name="approved_html_tags" size="50" maxlength="250" value="{$approved_html_tags|escape}" /> + {formhelp note="List of allowed HTML tags. All other tags will be stripped when users save content. This will affect all format plugins and all purification systems."} + {/forminput} + </div> + {/legend} + + {legend legend="HTMLPurifier Features"} + <div class="row"> + {formlabel label="Blacklisted HTML tags" for="blacklisted_html_tags"} + {formfeedback warning=$errors.blacklist} + {forminput} + <input type="text" id="blacklisted_html_tags" name="blacklisted_html_tags" size="50" maxlength="250" value="{$gBitSystem->getConfig('blacklisted_html_tags')|escape}" /> + {formhelp note="A comma seperated list of tags that should NOT be allowed in any content."} + {/forminput} + </div> + + {foreach from=$formLibertyHtmlPurifierFeatures key=item item=output} + <div class="row"> + {formlabel label=`$output.label` for=$item} + {forminput} + {html_checkboxes name="$item" values="y" checked=$gBitSystem->getConfig($item, $output.default) labels=false id=$item} + {formhelp note=`$output.note` page=`$output.page`} + {/forminput} + </div> + {/foreach} + {/legend} + {/legend} {legend legend="Captcha Settings"} |
