summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Palmer <nick@sluggardy.net>2007-05-17 14:14:31 +0000
committerNick Palmer <nick@sluggardy.net>2007-05-17 14:14:31 +0000
commit43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8 (patch)
tree7d651afa18a29e871010453f8ff8c3661b06e257
parent0b3f88362e221a1613e6bca8596f013b0942abbd (diff)
downloadliberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.tar.gz
liberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.tar.bz2
liberty-43ca21cfbdb6a9419b6fc1ade9523f4c81bc58e8.zip
Integrate HTMLPurifier into liberty.
-rwxr-xr-xLibertySystem.php143
-rw-r--r--admin/admin_liberty_inc.php55
-rw-r--r--admin/schema_inc.php2
-rw-r--r--icons/bitweaver/htmlpurifier.pngbin0 -> 297 bytes
-rw-r--r--plugins/format.bbcode.php10
-rw-r--r--plugins/format.bithtml.php50
-rw-r--r--plugins/format.tikiwiki.php16
-rw-r--r--templates/admin_liberty.tpl51
8 files changed, 250 insertions, 77 deletions
diff --git a/LibertySystem.php b/LibertySystem.php
index 13ea58c..0faade0 100755
--- a/LibertySystem.php
+++ b/LibertySystem.php
@@ -3,7 +3,7 @@
* System class for handling the liberty package
*
* @package liberty
-* @version $Header: /cvsroot/bitweaver/_bit_liberty/LibertySystem.php,v 1.68 2007/04/07 18:42:22 wjames5 Exp $
+* @version $Header: /cvsroot/bitweaver/_bit_liberty/LibertySystem.php,v 1.69 2007/05/17 14:14:29 nickpalmer Exp $
* @author spider <spider@steelsun.com>
*/
@@ -106,6 +106,147 @@ class LibertySystem extends LibertyBase {
}
}
+ /**
+ * Return the types of purification supported by purifyHtml
+ * @returns an array of strings with the types
+ */
+ function purifyHtmlMethods() {
+ return array('htmlpurifier' => "HTML Purifier",
+ 'simple' => "Simple Purifier");
+ }
+
+ /**
+ * Purify HTML from a string.
+ *
+ * @param string The string to be cleaned.
+ * @returns string The sanitized string
+ */
+ function purifyHtml($pString) {
+ global $gBitSystem;
+ switch($gBitSystem->getConfig('liberty_html_purifier', 'simple')) {
+ case 'htmlpurifier':
+ $pString = $this->advancedPurifyHtml($pString);
+ break;
+
+ case 'simple':
+ default:
+ $pString = $this->simplePurifyHtml($pString);
+ break;
+ }
+
+ return $pString;
+ }
+
+ function advancedPurifyHtml($pString) {
+ global $gHtmlPurifier, $gBitSystem;
+ if (!isset($gHtmlPurifier)) {
+ $blacklistedTags = $gBitSystem->
+ getConfig('blacklisted_html_tags', '');
+ require_once(UTIL_PKG_PATH . 'htmlpurifier/HTMLPurifier.auto.php');
+ $config = HTMLPurifier_Config::createDefault();
+
+ if ($gBitSystem->getConfig('liberty_html_pure_escape_bad', 'y') == 'y') {
+ $config->set('Core', 'EscapeInvalidTags', true);
+ $config->set('Core', 'EscapeInvalidChildren', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_disable_extern') == 'y') {
+ $config->set('URI', 'DisableExternal', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_disable_extern_res', 'y') == 'y') {
+ $config->set('URI', 'DisableExternalResources', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_disable_res') == 'y') {
+ $config->set('URI', 'DisableResources', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_disable_uri') == 'y') {
+ $config->set('URI', 'Disable', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_use_redirect') == 'y') {
+ $config->set('URI', 'Munge', LIBERTY_PKG_URL.'redirect.php?q=%s');
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_strict_html', 'y') == 'y') {
+ $config->set('HTML', 'Strict', true);
+ }
+ if ($gBitSystem->getConfig('liberty_html_pure_xhtml', 'n') == 'n') {
+ $config->set('Core', 'XHTML', true);
+ }
+
+ $def =& $config->getHTMLDefinition();
+ // HTMLPurifier doesn't have a blacklist feature. Duh guys!
+ // Note that this has to come last since the other configs
+ // may tweak the def.
+ foreach (explode(',',$blacklistedTags) as $tag) {
+ unset($def->info[$tag]);
+ }
+
+ $gHtmlPurifier = new HTMLPurifier($config);
+ }
+ $pString = $gHtmlPurifier->purify($pString);
+
+ /* There isn't an easy way to disable an attribute in HTMLPurifier */
+ $pString = $this->purifyStyle($pString);
+
+ return $pString;
+ }
+
+ /**
+ * Removes all style both inline and attributes unless the user
+ * has permission to edit styles.
+ */
+ function purifyStyle( $pText ) {
+ global $gBitUser;
+
+ $text = $pText;
+ // Yank style - both tag and inline attributes
+ // strip_tags has doesn't recognize that css within the style tags are not document text. To fix this do something similar to the following:
+ if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) {
+ $text = preg_replace( "/<style[^>]*>.*<\/style>/siU", '', $text );
+ }
+ $text = stripslashes($text);
+ if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) {
+ $text = preg_replace( "/ (style|class)=[\"]?([^\"]*)[\"]?/i", '', $text);
+ }
+
+ return $text;
+ }
+
+ // This function is a menagerie of the techniques of the comments listed at
+ // http://www.php.net/manual/en/function.strip-tags.php - spiderr
+ function simplePurifyHtml( $pText ) {
+ global $gBitSystem, $gBitUser;
+
+ // convert all HTML entites to catch people trying to sneak stuff by with things like &#123; etc..
+ if( function_exists( 'html_entity_decode' ) ) {
+ // quieten this down since it causes an error in PHP4
+ // http://bugs.php.net/bug.php?id=25670
+ $text = @html_entity_decode( $pText, ENT_COMPAT, 'UTF-8' );
+ } else {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES);
+ $trans_tbl = array_flip($trans_tbl);
+ $text = strtr($pText, $trans_tbl);
+ }
+
+ // strip_tags() appears to become nauseated at the site of a <!DOCTYPE> declaration
+ $text = str_replace( '<!DOCTYPE', '<DOCTYPE', $text );
+
+ $text = $this->purifyStyle($text);
+
+ // Strip all evil tags that remain
+ // this comes out of gBitSystem->getConfig() set in Liberty Admin
+ $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS );
+
+ // Destroy all script code "manually" - strip_tags will leave code inline as plain text
+ if( !preg_match( '/\<script\>/', $acceptableTags ) ) {
+ $text = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $text );
+ }
+
+ $text = strip_tags( $text, $acceptableTags );
+ $text = str_replace("<!--", "&lt;!--", $text);
+ $text = preg_replace("/(\<)(.*?)(--\>)/mi", "".nl2br("\\2")."", $text);
+
+ return( $text );
+ }
+
// ****************************** Plugin Functions
/**
* Load only active plugins from disk
diff --git a/admin/admin_liberty_inc.php b/admin/admin_liberty_inc.php
index 2fe7f56..efcc085 100644
--- a/admin/admin_liberty_inc.php
+++ b/admin/admin_liberty_inc.php
@@ -18,6 +18,50 @@ $formLibertyFeatures = array(
),
);
+$formLibertyHtmlPurifierFeatures = array(
+ 'liberty_html_pure_escape_bad' => array(
+ 'label' => 'Escape invalid HTML',
+ 'note' => ' Escapes invlid HTML as text. Otherwise invalid HTML is silently dropped. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#Core.EscapeInvalidTags">this</a> and <a href="http://htmlpurifier.org/live/configdoc/plain.html#Core.EscapeInvalidChildren">this</a> for more information.',
+ 'default' => 'y'
+ ),
+ 'liberty_html_pure_disable_extern' => array(
+ 'label' => 'Disable External Links',
+ 'note' => 'Disables links to external websites which is effective against spam. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableExternal">this</a> for more information.',
+ 'default' => 'n'
+ ),
+ 'liberty_html_pure_disable_extern_res' => array(
+ 'label' => 'Disable External Resounces',
+ 'note' => 'Disables the embedding of external resource like images from other hosts. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableExternalResources">this</a> for more information.',
+ 'default' => 'y'
+ ),
+ 'liberty_html_pure_disable_res' => array(
+ 'label' => 'Disable All Resources',
+ 'note' => 'Disables the embedding of all resources preventing users from including pictures at all. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.DisableResources">this</a> for more information.',
+ 'default' => 'n'
+ ),
+ 'liberty_html_pure_disable_uri' => array(
+ 'label' => 'Disable all URIs',
+ 'note' => 'Disables all URIs in all forms within submitted content. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.Disable">this</a> for more information.',
+ 'default' => 'n'
+ ),
+ 'liberty_html_pure_use_redirect' => array(
+ 'label' => 'Use Redirect',
+ 'note' => 'Uses the redirect service in the Redirect URI. This can be handy to track clicks out and prevent leacks of PageRank. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#URI.Munge">this</a> for more information.',
+ 'default' => 'n'
+ ),
+ 'liberty_html_pure_strict_html' => array(
+ 'label' => 'Force Strict',
+ 'note' => 'Determines if the purification matches the Transitional or Strict rule sets. See <a href="http://htmlpurifier.org/live/configdoc/plain.html#HTML.Strict">this</a> for more information.',
+ 'default' => 'y'
+ ),
+ 'liberty_html_pure_xhtml' => array(
+ 'label' => 'Force XHTML',
+ 'note' => 'Determine if purification forces only XHTML tags or if it allows standard HTML.',
+ 'default' => 'n'
+ )
+);
+$gBitSmarty->assign( 'formLibertyHtmlPurifierFeatures', $formLibertyHtmlPurifierFeatures );
+
$formLibertyTextareaFeatures = array(
"liberty_textarea_height" => array(
'label' => 'Default Textarea Height',
@@ -99,17 +143,26 @@ $formValues = array( 'image_processor', 'liberty_attachment_link_format', 'comme
if( !empty( $_REQUEST['change_prefs'] ) ) {
$errors = array();
- $formFeatures = array_merge( $formLibertyFeatures, $formImageFeatures, $formCaptcha );
+ $formFeatures = array_merge( $formLibertyFeatures, $formImageFeatures, $formCaptcha, $formLibertyHtmlPurifierFeatures );
foreach( $formFeatures as $item => $data ) {
simple_set_toggle( $item, LIBERTY_PKG_NAME );
}
foreach( $formLibertyTextareaFeatures as $item => $data ) {
simple_set_value( $item, LIBERTY_PKG_NAME );
}
+ simple_set_value('liberty_html_purifier', LIBERTY_PKG_NAME );
simple_set_value( 'liberty_attachment_style', LIBERTY_PKG_NAME );
$gBitSystem->storeConfig('liberty_cache', $_REQUEST['liberty_cache'], LIBERTY_PKG_NAME );
$gBitSystem->storeConfig('liberty_auto_display_attachment_thumbs', $_REQUEST['liberty_auto_display_attachment_thumbs'], LIBERTY_PKG_NAME );
+ if( !empty($_REQUEST['blacklisted_html_tags']) ) {
+ $tags = preg_replace( '/\s/', '', $_REQUEST['blacklisted_html_tags'] );
+ if (strlen( $tags ) > 250) {
+ $tags = substr( $tags, 0, 250 );
+ $errors['blacklist'] = 'The blacklisted tags list has been shortened. You can only have 250 characters for blacklisted tags.';
+ }
+ $gBitSystem->storeConfig('blacklisted_html_tags', $tags , LIBERTY_PKG_NAME );
+ }
if( $_REQUEST['approved_html_tags'] != DEFAULT_ACCEPTABLE_TAGS ) {
$tags = preg_replace( '/\s/', '', $_REQUEST['approved_html_tags'] );
$lastAngle = strrpos( $tags, '>' ) + 1;
diff --git a/admin/schema_inc.php b/admin/schema_inc.php
index 6af69d0..1c95086 100644
--- a/admin/schema_inc.php
+++ b/admin/schema_inc.php
@@ -249,6 +249,8 @@ $gBitInstaller->registerPreferences( LIBERTY_PKG_NAME, array(
array(LIBERTY_PKG_NAME, 'liberty_action_log', 'y'),
// array(LIBERTY_PKG_NAME, 'liberty_attachment_link_format', 'wiki') not needed anymore since we use js in the edit page now (depends on format of content)
// array(LIBERTY_PKG_NAME, 'liberty_attachment_style', 'standard'),
+ // The default for new installs is htmlpurifier old stays simple
+ array(LIBERTY_PKG_NAME, 'liberty_html_purifier', 'htmlpurifier'),
) );
$gBitInstaller->registerSchemaDefault( LIBERTY_PKG_NAME, array(
diff --git a/icons/bitweaver/htmlpurifier.png b/icons/bitweaver/htmlpurifier.png
new file mode 100644
index 0000000..119c4b9
--- /dev/null
+++ b/icons/bitweaver/htmlpurifier.png
Binary files differ
diff --git a/plugins/format.bbcode.php b/plugins/format.bbcode.php
index ba37155..17e58f9 100644
--- a/plugins/format.bbcode.php
+++ b/plugins/format.bbcode.php
@@ -1,6 +1,6 @@
<?php
/**
- * @version $Revision: 1.8 $
+ * @version $Revision: 1.9 $
* @package liberty
* @subpackage plugins_format
*/
@@ -9,7 +9,6 @@ global $gLibertySystem;
/**
* run 'pear install Text_Wiki_BBCode-alpha' to install the library,
- * you also need to enable the HTML plugin for now to due to dependency on the purge_html function
*/
require_once('PEAR.php');
@@ -35,12 +34,9 @@ $pluginParams = array (
$gLibertySystem->registerPlugin( PLUGIN_GUID_BBCODE, $pluginParams );
function bbcode_verify_data( &$pParamHash ) {
+ global $gLibertySystem;
$errorMsg = NULL;
- if( !function_exists( 'purge_html' ) && include_once( LIBERTY_PKG_PATH.'plugins/format.bithtml.php' ) ) {
- $pParamHash['content_store']['data'] = purge_html( $pParamHash['edit'] );
- } else {
- $pParamHash['content_store']['data'] = $pParamHash['edit'];
- }
+ $pParamHash['content_store']['data'] = $gLibertySystem->purifyHtml( $pParamHash['edit']);
return $errorMsg;
}
diff --git a/plugins/format.bithtml.php b/plugins/format.bithtml.php
index 1cd856b..bde0529 100644
--- a/plugins/format.bithtml.php
+++ b/plugins/format.bithtml.php
@@ -1,6 +1,6 @@
<?php
/**
- * @version $Revision: 1.13 $
+ * @version $Revision: 1.14 $
* @package liberty
* @subpackage plugins_format
*/
@@ -26,56 +26,12 @@ $pluginParams = array (
$gLibertySystem->registerPlugin( PLUGIN_GUID_BITHTML, $pluginParams );
function bithtml_verify_data( &$pParamHash ) {
+ global $gLibertySystem;
$errorMsg = NULL;
- $pParamHash['content_store']['data'] = purge_html( $pParamHash['edit'] );
+ $pParamHash['content_store']['data'] = $gLibertySystem->purifyHtml( $pParamHash['edit'] );
return $errorMsg;
}
-// This function is a menagerie of the techniques of the comments listed at
-// http://www.php.net/manual/en/function.strip-tags.php - spiderr
-function purge_html( $pText ) {
- global $gBitSystem, $gBitUser;
-
- // convert all HTML entites to catch people trying to sneak stuff by with things like &#123; etc..
- if( function_exists( 'html_entity_decode' ) ) {
- // quieten this down since it causes an error in PHP4
- // http://bugs.php.net/bug.php?id=25670
- $text = @html_entity_decode( $pText, ENT_COMPAT, 'UTF-8' );
- } else {
- $trans_tbl = get_html_translation_table(HTML_ENTITIES);
- $trans_tbl = array_flip($trans_tbl);
- $text = strtr($pText, $trans_tbl);
- }
-
- // strip_tags() appears to become nauseated at the site of a <!DOCTYPE> declaration
- $text = str_replace( '<!DOCTYPE', '<DOCTYPE', $text );
-
- // Yank style - both tag and inline attributes
- // strip_tags has doesn't recognize that css within the style tags are not document text. To fix this do something similar to the following:
- if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) {
- $text = preg_replace( "/<style[^>]*>.*<\/style>/siU", '', $text );
- }
- $text = stripslashes($text);
- if( !$gBitUser->hasPermission( 'p_liberty_edit_html_style' ) ) {
- $text = preg_replace( "/ (style|class)=[\"]?([^\"]*)[\"]?/i", '', $text);
- }
-
- // Strip all evil tags that remain
- // this comes out of gBitSystem->getConfig() set in Liberty Admin
- $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS );
-
- // Destroy all script code "manually" - strip_tags will leave code inline as plain text
- if( !preg_match( '/\<script\>/', $acceptableTags ) ) {
- $text = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $text );
- }
-
- $text = strip_tags( $text, $acceptableTags );
- $text = str_replace("<!--", "&lt;!--", $text);
- $text = preg_replace("/(\<)(.*?)(--\>)/mi", "".nl2br("\\2")."", $text);
-
- return( $text );
-}
-
function bithtml_save_data( &$pParamHash ) {
static $parser;
if( empty( $parser ) ) {
diff --git a/plugins/format.tikiwiki.php b/plugins/format.tikiwiki.php
index df591e7..e2fd12a 100644
--- a/plugins/format.tikiwiki.php
+++ b/plugins/format.tikiwiki.php
@@ -1,6 +1,6 @@
<?php
/**
- * @version $Revision: 1.93 $
+ * @version $Revision: 1.94 $
* @package liberty
*/
global $gLibertySystem;
@@ -759,7 +759,7 @@ class TikiWikiParser extends BitBase {
}
function parse_data( $pParseHash, &$pCommonObject ) {
- global $gBitSystem, $gBitUser, $page;
+ global $gBitSystem, $gLibertySystem, $gBitUser, $page;
$data = $pParseHash['data'];
$contentId = $pParseHash['content_id'];
@@ -783,17 +783,7 @@ class TikiWikiParser extends BitBase {
// disable HTML in wiki page for now - very disruptive. should be changed into a per page setting - xing
if( !empty( $contentPrefs['content_enter_html'] ) ) {
- // this is copied and pasted from format.bithtml.php - xing
- // Strip all evil tags that remain
- // this comes out of gBitSystem->getConfig() set in Liberty Admin
- $acceptableTags = $gBitSystem->getConfig( 'approved_html_tags', DEFAULT_ACCEPTABLE_TAGS );
-
- // Destroy all script code "manually" - strip_tags will leave code inline as plain text
- if( !preg_match( '/\<script\>/', $acceptableTags ) ) {
- $data = preg_replace( "/(\<script)(.*?)(script\>)/si", '', $data );
- }
-
- $data = strip_tags( $data, $acceptableTags );
+ $data = $gLibertySystem->purifyHtml($data);
} elseif( !$gBitSystem->isFeatureActive( 'content_allow_html' ) ) {
// convert HTML to chars
$data = htmlspecialchars( $data, ENT_NOQUOTES, 'UTF-8' );
diff --git a/templates/admin_liberty.tpl b/templates/admin_liberty.tpl
index 93ad3fc..46c6ec3 100644
--- a/templates/admin_liberty.tpl
+++ b/templates/admin_liberty.tpl
@@ -1,5 +1,5 @@
{strip}
-{form}
+{form}
{legend legend="General Settings"}
{foreach from=$formLibertyFeatures key=item item=output}
<div class="row">
@@ -38,21 +38,56 @@
</div>
<div class="row">
- {formlabel label="Acceptable HTML tags" for="approved_html_tags"}
- {formfeedback warning=$errors.warning}
+ {formlabel label="Liberty Cache" for="liberty_cache"}
{forminput}
- <input type="text" id="approved_html_tags" name="approved_html_tags" size="50" maxlength="250" value="{$approved_html_tags|escape}" />
- {formhelp note="List of allowed HTML tags. All other tags will be stripped when users save content. This will affect all format plugins."}
+ {html_options name=liberty_cache id=liberty_cache values=$cacheTimes options=$cacheTimes selected=$gBitSystem->getConfig('liberty_cache')}
+ {formhelp note='Cache all parsed content. This will dramatically reduce load on the server if pages are called frequently.' page=''}
{/forminput}
</div>
+ {/legend}
+ {legend legend="HTML Cleanup"}
<div class="row">
- {formlabel label="Liberty Cache" for="liberty_cache"}
+ {formlabel label="Purification System"}
{forminput}
- {html_options name=liberty_cache id=liberty_cache values=$cacheTimes options=$cacheTimes selected=$gBitSystem->getConfig('liberty_cache')}
- {formhelp note='Cache all parsed content. This will dramatically reduce load on the server if pages are called frequently.' page=''}
+ {html_options name=liberty_html_purifier options=$gLibertySystem->purifyHtmlMethods() selected=$gBitSystem->getConfig('liberty_html_purifier', 'simple')}
+ {formhelp note="Which system should be used to purify incoming HTML. The simple algorithm is faster but <strong>far less</strong> robust and secure than <a href=http://htmlpurifier.org>HTML Purifier</a> which has a much richer feature set. HTMLPurifier is recommended to protect against the most XSS attacks. The Simple system is known to <strong>fail XSS smoke tests</strong> and is therefore not recommended."}
{/forminput}
</div>
+
+
+ {legend legend="Simple Purifier Features"}
+ <div class="row">
+ {formlabel label="Acceptable HTML tags" for="approved_html_tags"}
+ {formfeedback warning=$errors.warning}
+ {forminput}
+ <input type="text" id="approved_html_tags" name="approved_html_tags" size="50" maxlength="250" value="{$approved_html_tags|escape}" />
+ {formhelp note="List of allowed HTML tags. All other tags will be stripped when users save content. This will affect all format plugins and all purification systems."}
+ {/forminput}
+ </div>
+ {/legend}
+
+ {legend legend="HTMLPurifier Features"}
+ <div class="row">
+ {formlabel label="Blacklisted HTML tags" for="blacklisted_html_tags"}
+ {formfeedback warning=$errors.blacklist}
+ {forminput}
+ <input type="text" id="blacklisted_html_tags" name="blacklisted_html_tags" size="50" maxlength="250" value="{$gBitSystem->getConfig('blacklisted_html_tags')|escape}" />
+ {formhelp note="A comma seperated list of tags that should NOT be allowed in any content."}
+ {/forminput}
+ </div>
+
+ {foreach from=$formLibertyHtmlPurifierFeatures key=item item=output}
+ <div class="row">
+ {formlabel label=`$output.label` for=$item}
+ {forminput}
+ {html_checkboxes name="$item" values="y" checked=$gBitSystem->getConfig($item, $output.default) labels=false id=$item}
+ {formhelp note=`$output.note` page=`$output.page`}
+ {/forminput}
+ </div>
+ {/foreach}
+ {/legend}
+
{/legend}
{legend legend="Captcha Settings"}