summaryrefslogtreecommitdiff
path: root/core/htmlparser.js
diff options
context:
space:
mode:
Diffstat (limited to 'core/htmlparser.js')
-rw-r--r--core/htmlparser.js207
1 files changed, 207 insertions, 0 deletions
diff --git a/core/htmlparser.js b/core/htmlparser.js
new file mode 100644
index 0000000..76bd2d0
--- /dev/null
+++ b/core/htmlparser.js
@@ -0,0 +1,207 @@
+/**
+ * @license Copyright (c) 2003-2013, CKSource - Frederico Knabben. All rights reserved.
+ * For licensing, see LICENSE.html or http://ckeditor.com/license
+ */
+
+/**
+ * Provides an "event like" system to parse strings of HTML data.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onTagOpen = function( tagName, attributes, selfClosing ) {
+ * alert( tagName );
+ * };
+ * parser.parse( '<p>Some <b>text</b>.</p>' ); // Alerts 'p', 'b'.
+ *
+ * @class
+ * @constructor Creates a htmlParser class instance.
+ */
+CKEDITOR.htmlParser = function() {
+ this._ = {
+ htmlPartsRegex: new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:"[^"]*")|(?:\'[^\']*\')|[^"\'>])*)\\/?>))', 'g' )
+ };
+};
+
+(function() {
+ var attribsRegex = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,
+ emptyAttribs = { checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1 };
+
+ CKEDITOR.htmlParser.prototype = {
+ /**
+ * Function to be fired when a tag opener is found. This function
+ * should be overriden when using this class.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onTagOpen = function( tagName, attributes, selfClosing ) {
+ * alert( tagName ); // e.g. 'b'
+ * } );
+ * parser.parse( '<!-- Example --><b>Hello</b>' );
+ *
+ * @param {String} tagName The tag name. The name is guarantted to be lowercased.
+ * @param {Object} attributes An object containing all tag attributes. Each
+ * property in this object represent and attribute name and its value is the attribute value.
+ * @param {Boolean} selfClosing `true` if the tag closes itself, false if the tag doesn't.
+ */
+ onTagOpen: function() {},
+
+ /**
+ * Function to be fired when a tag closer is found. This function
+ * should be overriden when using this class.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onTagClose = function( tagName ) {
+ * alert( tagName ); // 'b'
+ * } );
+ * parser.parse( '<!-- Example --><b>Hello</b>' );
+ *
+ * @param {String} tagName The tag name. The name is guarantted to be lowercased.
+ */
+ onTagClose: function() {},
+
+ /**
+ * Function to be fired when text is found. This function
+ * should be overriden when using this class.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onText = function( text ) {
+ * alert( text ); // 'Hello'
+ * } );
+ * parser.parse( '<!-- Example --><b>Hello</b>' );
+ *
+ * @param {String} text The text found.
+ */
+ onText: function() {},
+
+ /**
+ * Function to be fired when CDATA section is found. This function
+ * should be overriden when using this class.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onCDATA = function( cdata ) {
+ * alert( cdata ); // 'var hello;'
+ * } );
+ * parser.parse( '<script>var hello;</script>' );
+ *
+ * @param {String} cdata The CDATA been found.
+ */
+ onCDATA: function() {},
+
+ /**
+ * Function to be fired when a commend is found. This function
+ * should be overriden when using this class.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * parser.onComment = function( comment ) {
+ * alert( comment ); // ' Example '
+ * } );
+ * parser.parse( '<!-- Example --><b>Hello</b>' );
+ *
+ * @param {String} comment The comment text.
+ */
+ onComment: function() {},
+
+ /**
+ * Parses text, looking for HTML tokens, like tag openers or closers,
+ * or comments. This function fires the onTagOpen, onTagClose, onText
+ * and onComment function during its execution.
+ *
+ * var parser = new CKEDITOR.htmlParser();
+ * // The onTagOpen, onTagClose, onText and onComment should be overriden
+ * // at this point.
+ * parser.parse( '<!-- Example --><b>Hello</b>' );
+ *
+ * @param {String} html The HTML to be parsed.
+ */
+ parse: function( html ) {
+ var parts, tagName,
+ nextIndex = 0,
+ cdata; // The collected data inside a CDATA section.
+
+ while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) {
+ var tagIndex = parts.index;
+ if ( tagIndex > nextIndex ) {
+ var text = html.substring( nextIndex, tagIndex );
+
+ if ( cdata )
+ cdata.push( text );
+ else
+ this.onText( text );
+ }
+
+ nextIndex = this._.htmlPartsRegex.lastIndex;
+
+ /*
+ "parts" is an array with the following items:
+ 0 : The entire match for opening/closing tags and comments.
+ 1 : Group filled with the tag name for closing tags.
+ 2 : Group filled with the comment text.
+ 3 : Group filled with the tag name for opening tags.
+ 4 : Group filled with the attributes part of opening tags.
+ */
+
+ // Closing tag
+ if ( ( tagName = parts[ 1 ] ) ) {
+ tagName = tagName.toLowerCase();
+
+ if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] ) {
+ // Send the CDATA data.
+ this.onCDATA( cdata.join( '' ) );
+ cdata = null;
+ }
+
+ if ( !cdata ) {
+ this.onTagClose( tagName );
+ continue;
+ }
+ }
+
+ // If CDATA is enabled, just save the raw match.
+ if ( cdata ) {
+ cdata.push( parts[ 0 ] );
+ continue;
+ }
+
+ // Opening tag
+ if ( ( tagName = parts[ 3 ] ) ) {
+ tagName = tagName.toLowerCase();
+
+ // There are some tag names that can break things, so let's
+ // simply ignore them when parsing. (#5224)
+ if ( /="/.test( tagName ) )
+ continue;
+
+ var attribs = {},
+ attribMatch,
+ attribsPart = parts[ 4 ],
+ selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );
+
+ if ( attribsPart ) {
+ while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) {
+ var attName = attribMatch[ 1 ].toLowerCase(),
+ attValue = attribMatch[ 2 ] || attribMatch[ 3 ] || attribMatch[ 4 ] || '';
+
+ if ( !attValue && emptyAttribs[ attName ] )
+ attribs[ attName ] = attName;
+ else
+ attribs[ attName ] = attValue;
+ }
+ }
+
+ this.onTagOpen( tagName, attribs, selfClosing );
+
+ // Open CDATA mode when finding the appropriate tags.
+ if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )
+ cdata = [];
+
+ continue;
+ }
+
+ // Comment
+ if ( ( tagName = parts[ 2 ] ) )
+ this.onComment( tagName );
+ }
+
+ if ( html.length > nextIndex )
+ this.onText( html.substring( nextIndex, html.length ) );
+ }
+ };
+})();