bfa5cc57925c31bcd800e086e33f45a154e07f5c
[ckeditor.git] / _source / core / htmlparser / fragment.js
1 /*
2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.
3 For licensing, see LICENSE.html or http://ckeditor.com/license
4 */
5
6 /**
7 * A lightweight representation of an HTML DOM structure.
8 * @constructor
9 * @example
10 */
11 CKEDITOR.htmlParser.fragment = function()
12 {
13 /**
14 * The nodes contained in the root of this fragment.
15 * @type Array
16 * @example
17 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
18 * alert( fragment.children.length ); "2"
19 */
20 this.children = [];
21
22 /**
23 * Get the fragment parent. Should always be null.
24 * @type Object
25 * @default null
26 * @example
27 */
28 this.parent = null;
29
30 /** @private */
31 this._ =
32 {
33 isBlockLike : true,
34 hasInlineStarted : false
35 };
36 };
37
38 (function()
39 {
40 // Block-level elements whose internal structure should be respected during
41 // parser fixing.
42 var nonBreakingBlocks = CKEDITOR.tools.extend( { table:1,ul:1,ol:1,dl:1 }, CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );
43
44 // IE < 8 don't output the close tag on definition list items. (#6975)
45 var optionalCloseTags = CKEDITOR.env.ie && CKEDITOR.env.version < 8 ? { dd : 1, dt :1 } : {};
46
47 var listBlocks = { ol:1, ul:1 };
48
49 // Dtd of the fragment element, basically it accept anything except for intermediate structure, e.g. orphan <li>.
50 var rootDtd = CKEDITOR.tools.extend( {}, { html: 1 }, CKEDITOR.dtd.html, CKEDITOR.dtd.body, CKEDITOR.dtd.head, { style:1,script:1 } );
51
52 /**
53 * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.
54 * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.
55 * @param {Number} [fixForBody=false] Wrap body with specified element if needed.
56 * @param {CKEDITOR.htmlParser.element} contextNode Parse the html as the content of this element.
57 * @returns CKEDITOR.htmlParser.fragment The fragment created.
58 * @example
59 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
60 * alert( fragment.children[0].name ); "b"
61 * alert( fragment.children[1].value ); " Text"
62 */
63 CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody, contextNode )
64 {
65 var parser = new CKEDITOR.htmlParser(),
66 fragment = contextNode || new CKEDITOR.htmlParser.fragment(),
67 pendingInline = [],
68 pendingBRs = [],
69 currentNode = fragment,
70 // Indicate we're inside a <pre> element, spaces should be touched differently.
71 inPre = false;
72
73 function checkPending( newTagName )
74 {
75 var pendingBRsSent;
76
77 if ( pendingInline.length > 0 )
78 {
79 for ( var i = 0 ; i < pendingInline.length ; i++ )
80 {
81 var pendingElement = pendingInline[ i ],
82 pendingName = pendingElement.name,
83 pendingDtd = CKEDITOR.dtd[ pendingName ],
84 currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];
85
86 if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )
87 {
88 if ( !pendingBRsSent )
89 {
90 sendPendingBRs();
91 pendingBRsSent = 1;
92 }
93
94 // Get a clone for the pending element.
95 pendingElement = pendingElement.clone();
96
97 // Add it to the current node and make it the current,
98 // so the new element will be added inside of it.
99 pendingElement.parent = currentNode;
100 currentNode = pendingElement;
101
102 // Remove the pending element (back the index by one
103 // to properly process the next entry).
104 pendingInline.splice( i, 1 );
105 i--;
106 }
107 }
108 }
109 }
110
111 function sendPendingBRs()
112 {
113 while ( pendingBRs.length )
114 currentNode.add( pendingBRs.shift() );
115 }
116
117 /*
118 * Beside of simply append specified element to target, this function also takes
119 * care of other dirty lifts like forcing block in body, trimming spaces at
120 * the block boundaries etc.
121 *
122 * @param {Element} element The element to be added as the last child of {@link target}.
123 * @param {Element} target The parent element to relieve the new node.
124 * @param {Boolean} [moveCurrent=false] Don't change the "currentNode" global unless
125 * there's a return point node specified on the element, otherwise move current onto {@link target} node.
126 */
127 function addElement( element, target, moveCurrent )
128 {
129 // Ignore any element that has already been added.
130 if ( element.previous !== undefined )
131 return;
132
133 target = target || currentNode || fragment;
134
135 // Current element might be mangled by fix body below,
136 // save it for restore later.
137 var savedCurrent = currentNode;
138
139 // If the target is the fragment and this inline element can't go inside
140 // body (if fixForBody).
141 if ( fixForBody && ( !target.type || target.name == 'body' ) )
142 {
143 var elementName, realElementName;
144 if ( element.attributes
145 && ( realElementName =
146 element.attributes[ 'data-cke-real-element-type' ] ) )
147 elementName = realElementName;
148 else
149 elementName = element.name;
150
151 if ( elementName && !( elementName in CKEDITOR.dtd.$body || elementName == 'body' || element.isOrphan ) )
152 {
153 // Create a <p> in the fragment.
154 currentNode = target;
155 parser.onTagOpen( fixForBody, {} );
156
157 // The new target now is the <p>.
158 element.returnPoint = target = currentNode;
159 }
160 }
161
162 // Rtrim empty spaces on block end boundary. (#3585)
163 if ( element._.isBlockLike
164 && element.name != 'pre' )
165 {
166
167 var length = element.children.length,
168 lastChild = element.children[ length - 1 ],
169 text;
170 if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )
171 {
172 if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )
173 element.children.length = length -1;
174 else
175 lastChild.value = text;
176 }
177 }
178
179 target.add( element );
180
181 if ( element.returnPoint )
182 {
183 currentNode = element.returnPoint;
184 delete element.returnPoint;
185 }
186 else
187 currentNode = moveCurrent ? target : savedCurrent;
188 }
189
190 parser.onTagOpen = function( tagName, attributes, selfClosing, optionalClose )
191 {
192 var element = new CKEDITOR.htmlParser.element( tagName, attributes );
193
194 // "isEmpty" will be always "false" for unknown elements, so we
195 // must force it if the parser has identified it as a selfClosing tag.
196 if ( element.isUnknown && selfClosing )
197 element.isEmpty = true;
198
199 // Check for optional closed elements, including browser quirks and manually opened blocks.
200 element.isOptionalClose = tagName in optionalCloseTags || optionalClose;
201
202 // This is a tag to be removed if empty, so do not add it immediately.
203 if ( CKEDITOR.dtd.$removeEmpty[ tagName ] )
204 {
205 pendingInline.push( element );
206 return;
207 }
208 else if ( tagName == 'pre' )
209 inPre = true;
210 else if ( tagName == 'br' && inPre )
211 {
212 currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );
213 return;
214 }
215
216 if ( tagName == 'br' )
217 {
218 pendingBRs.push( element );
219 return;
220 }
221
222 while( 1 )
223 {
224 var currentName = currentNode.name;
225
226 var currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]
227 || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span ) )
228 : rootDtd;
229
230 // If the element cannot be child of the current element.
231 if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )
232 {
233 // Current node doesn't have a close tag, time for a close
234 // as this element isn't fit in. (#7497)
235 if ( currentNode.isOptionalClose )
236 parser.onTagClose( currentName );
237 // Fixing malformed nested lists by moving it into a previous list item. (#3828)
238 else if ( tagName in listBlocks
239 && currentName in listBlocks )
240 {
241 var children = currentNode.children,
242 lastChild = children[ children.length - 1 ];
243
244 // Establish the list item if it's not existed.
245 if ( !( lastChild && lastChild.name == 'li' ) )
246 addElement( ( lastChild = new CKEDITOR.htmlParser.element( 'li' ) ), currentNode );
247
248 !element.returnPoint && ( element.returnPoint = currentNode );
249 currentNode = lastChild;
250 }
251 // Establish new list root for orphan list items.
252 else if ( tagName in CKEDITOR.dtd.$listItem && currentName != tagName )
253 parser.onTagOpen( tagName == 'li' ? 'ul' : 'dl', {}, 0, 1 );
254 // We're inside a structural block like table and list, AND the incoming element
255 // is not of the same type (e.g. <td>td1<td>td2</td>), we simply add this new one before it,
256 // and most importantly, return back to here once this element is added,
257 // e.g. <table><tr><td>td1</td><p>p1</p><td>td2</td></tr></table>
258 else if ( currentName in nonBreakingBlocks && currentName != tagName )
259 {
260 !element.returnPoint && ( element.returnPoint = currentNode );
261 currentNode = currentNode.parent;
262 }
263 else
264 {
265 // The current element is an inline element, which
266 // need to be continued even after the close, so put
267 // it in the pending list.
268 if ( currentName in CKEDITOR.dtd.$inline )
269 pendingInline.unshift( currentNode );
270
271 // The most common case where we just need to close the
272 // current one and append the new one to the parent.
273 if ( currentNode.parent )
274 addElement( currentNode, currentNode.parent, 1 );
275 // We've tried our best to fix the embarrassment here, while
276 // this element still doesn't find it's parent, mark it as
277 // orphan and show our tolerance to it.
278 else
279 {
280 element.isOrphan = 1;
281 break;
282 }
283 }
284 }
285 else
286 break;
287 }
288
289 checkPending( tagName );
290 sendPendingBRs();
291
292 element.parent = currentNode;
293
294 if ( element.isEmpty )
295 addElement( element );
296 else
297 currentNode = element;
298 };
299
300 parser.onTagClose = function( tagName )
301 {
302 // Check if there is any pending tag to be closed.
303 for ( var i = pendingInline.length - 1 ; i >= 0 ; i-- )
304 {
305 // If found, just remove it from the list.
306 if ( tagName == pendingInline[ i ].name )
307 {
308 pendingInline.splice( i, 1 );
309 return;
310 }
311 }
312
313 var pendingAdd = [],
314 newPendingInline = [],
315 candidate = currentNode;
316
317 while ( candidate != fragment && candidate.name != tagName )
318 {
319 // If this is an inline element, add it to the pending list, if we're
320 // really closing one of the parents element later, they will continue
321 // after it.
322 if ( !candidate._.isBlockLike )
323 newPendingInline.unshift( candidate );
324
325 // This node should be added to it's parent at this point. But,
326 // it should happen only if the closing tag is really closing
327 // one of the nodes. So, for now, we just cache it.
328 pendingAdd.push( candidate );
329
330 // Make sure return point is properly restored.
331 candidate = candidate.returnPoint || candidate.parent;
332 }
333
334 if ( candidate != fragment )
335 {
336 // Add all elements that have been found in the above loop.
337 for ( i = 0 ; i < pendingAdd.length ; i++ )
338 {
339 var node = pendingAdd[ i ];
340 addElement( node, node.parent );
341 }
342
343 currentNode = candidate;
344
345 if ( currentNode.name == 'pre' )
346 inPre = false;
347
348 if ( candidate._.isBlockLike )
349 sendPendingBRs();
350
351 addElement( candidate, candidate.parent );
352
353 // The parent should start receiving new nodes now, except if
354 // addElement changed the currentNode.
355 if ( candidate == currentNode )
356 currentNode = currentNode.parent;
357
358 pendingInline = pendingInline.concat( newPendingInline );
359 }
360
361 if ( tagName == 'body' )
362 fixForBody = false;
363 };
364
365 parser.onText = function( text )
366 {
367 // Trim empty spaces at beginning of text contents except <pre>.
368 if ( ( !currentNode._.hasInlineStarted || pendingBRs.length ) && !inPre )
369 {
370 text = CKEDITOR.tools.ltrim( text );
371
372 if ( text.length === 0 )
373 return;
374 }
375
376 sendPendingBRs();
377 checkPending();
378
379 if ( fixForBody
380 && ( !currentNode.type || currentNode.name == 'body' )
381 && CKEDITOR.tools.trim( text ) )
382 {
383 this.onTagOpen( fixForBody, {}, 0, 1 );
384 }
385
386 // Shrinking consequential spaces into one single for all elements
387 // text contents.
388 if ( !inPre )
389 text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );
390
391 currentNode.add( new CKEDITOR.htmlParser.text( text ) );
392 };
393
394 parser.onCDATA = function( cdata )
395 {
396 currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );
397 };
398
399 parser.onComment = function( comment )
400 {
401 sendPendingBRs();
402 checkPending();
403 currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );
404 };
405
406 // Parse it.
407 parser.parse( fragmentHtml );
408
409 // Send all pending BRs except one, which we consider a unwanted bogus. (#5293)
410 sendPendingBRs( !CKEDITOR.env.ie && 1 );
411
412 // Close all pending nodes, make sure return point is properly restored.
413 while ( currentNode != fragment )
414 addElement( currentNode, currentNode.parent, 1 );
415
416 return fragment;
417 };
418
419 CKEDITOR.htmlParser.fragment.prototype =
420 {
421 /**
422 * Adds a node to this fragment.
423 * @param {Object} node The node to be added. It can be any of of the
424 * following types: {@link CKEDITOR.htmlParser.element},
425 * {@link CKEDITOR.htmlParser.text} and
426 * {@link CKEDITOR.htmlParser.comment}.
427 * @param {Number} [index] From where the insertion happens.
428 * @example
429 */
430 add : function( node, index )
431 {
432 isNaN( index ) && ( index = this.children.length );
433
434 var previous = index > 0 ? this.children[ index - 1 ] : null;
435 if ( previous )
436 {
437 // If the block to be appended is following text, trim spaces at
438 // the right of it.
439 if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )
440 {
441 previous.value = CKEDITOR.tools.rtrim( previous.value );
442
443 // If we have completely cleared the previous node.
444 if ( previous.value.length === 0 )
445 {
446 // Remove it from the list and add the node again.
447 this.children.pop();
448 this.add( node );
449 return;
450 }
451 }
452
453 previous.next = node;
454 }
455
456 node.previous = previous;
457 node.parent = this;
458
459 this.children.splice( index, 0, node );
460
461 this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );
462 },
463
464 /**
465 * Writes the fragment HTML to a CKEDITOR.htmlWriter.
466 * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.
467 * @example
468 * var writer = new CKEDITOR.htmlWriter();
469 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '&lt;P&gt;&lt;B&gt;Example' );
470 * fragment.writeHtml( writer )
471 * alert( writer.getHtml() ); "&lt;p&gt;&lt;b&gt;Example&lt;/b&gt;&lt;/p&gt;"
472 */
473 writeHtml : function( writer, filter )
474 {
475 var isChildrenFiltered;
476 this.filterChildren = function()
477 {
478 var writer = new CKEDITOR.htmlParser.basicWriter();
479 this.writeChildrenHtml.call( this, writer, filter, true );
480 var html = writer.getHtml();
481 this.children = new CKEDITOR.htmlParser.fragment.fromHtml( html ).children;
482 isChildrenFiltered = 1;
483 };
484
485 // Filtering the root fragment before anything else.
486 !this.name && filter && filter.onFragment( this );
487
488 this.writeChildrenHtml( writer, isChildrenFiltered ? null : filter );
489 },
490
491 writeChildrenHtml : function( writer, filter )
492 {
493 for ( var i = 0 ; i < this.children.length ; i++ )
494 this.children[i].writeHtml( writer, filter );
495 }
496 };
497 })();