2eb948dab82c44128aaf54e19ff49485f7cd361c
[ckeditor.git] / _source / plugins / pastefromword / filter / default.js
1 /*
2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.
3 For licensing, see LICENSE.html or http://ckeditor.com/license
4 */
5
6 (function()
7 {
8 var fragmentPrototype = CKEDITOR.htmlParser.fragment.prototype,
9 elementPrototype = CKEDITOR.htmlParser.element.prototype;
10
11 fragmentPrototype.onlyChild = elementPrototype.onlyChild = function()
12 {
13 var children = this.children,
14 count = children.length,
15 firstChild = ( count == 1 ) && children[ 0 ];
16 return firstChild || null;
17 };
18
19 elementPrototype.removeAnyChildWithName = function( tagName )
20 {
21 var children = this.children,
22 childs = [],
23 child;
24
25 for ( var i = 0; i < children.length; i++ )
26 {
27 child = children[ i ];
28 if ( !child.name )
29 continue;
30
31 if ( child.name == tagName )
32 {
33 childs.push( child );
34 children.splice( i--, 1 );
35 }
36 childs = childs.concat( child.removeAnyChildWithName( tagName ) );
37 }
38 return childs;
39 };
40
41 elementPrototype.getAncestor = function( tagNameRegex )
42 {
43 var parent = this.parent;
44 while ( parent && !( parent.name && parent.name.match( tagNameRegex ) ) )
45 parent = parent.parent;
46 return parent;
47 };
48
49 fragmentPrototype.firstChild = elementPrototype.firstChild = function( evaluator )
50 {
51 var child;
52
53 for ( var i = 0 ; i < this.children.length ; i++ )
54 {
55 child = this.children[ i ];
56 if ( evaluator( child ) )
57 return child;
58 else if ( child.name )
59 {
60 child = child.firstChild( evaluator );
61 if ( child )
62 return child;
63 }
64 }
65
66 return null;
67 };
68
69 // Adding a (set) of styles to the element's 'style' attributes.
70 elementPrototype.addStyle = function( name, value, isPrepend )
71 {
72 var styleText, addingStyleText = '';
73 // name/value pair.
74 if ( typeof value == 'string' )
75 addingStyleText += name + ':' + value + ';';
76 else
77 {
78 // style literal.
79 if ( typeof name == 'object' )
80 {
81 for ( var style in name )
82 {
83 if ( name.hasOwnProperty( style ) )
84 addingStyleText += style + ':' + name[ style ] + ';';
85 }
86 }
87 // raw style text form.
88 else
89 addingStyleText += name;
90
91 isPrepend = value;
92 }
93
94 if ( !this.attributes )
95 this.attributes = {};
96
97 styleText = this.attributes.style || '';
98
99 styleText = ( isPrepend ?
100 [ addingStyleText, styleText ]
101 : [ styleText, addingStyleText ] ).join( ';' );
102
103 this.attributes.style = styleText.replace( /^;|;(?=;)/, '' );
104 };
105
106 /**
107 * Return the DTD-valid parent tag names of the specified one.
108 * @param tagName
109 */
110 CKEDITOR.dtd.parentOf = function( tagName )
111 {
112 var result = {};
113 for ( var tag in this )
114 {
115 if ( tag.indexOf( '$' ) == -1 && this[ tag ][ tagName ] )
116 result[ tag ] = 1;
117 }
118 return result;
119 };
120
121 // 1. move consistent list item styles up to list root.
122 // 2. clear out unnecessary list item numbering.
123 function postProcessList( list )
124 {
125 var children = list.children,
126 child,
127 attrs,
128 count = list.children.length,
129 match,
130 mergeStyle,
131 styleTypeRegexp = /list-style-type:(.*?)(?:;|$)/,
132 stylesFilter = CKEDITOR.plugins.pastefromword.filters.stylesFilter;
133
134 attrs = list.attributes;
135 if ( styleTypeRegexp.exec( attrs.style ) )
136 return;
137
138 for ( var i = 0; i < count; i++ )
139 {
140 child = children[ i ];
141
142 if ( child.attributes.value && Number( child.attributes.value ) == i + 1 )
143 delete child.attributes.value;
144
145 match = styleTypeRegexp.exec( child.attributes.style );
146
147 if ( match )
148 {
149 if ( match[ 1 ] == mergeStyle || !mergeStyle )
150 mergeStyle = match[ 1 ];
151 else
152 {
153 mergeStyle = null;
154 break;
155 }
156 }
157 }
158
159 if ( mergeStyle )
160 {
161 for ( i = 0; i < count; i++ )
162 {
163 attrs = children[ i ].attributes;
164 attrs.style && ( attrs.style = stylesFilter( [ [ 'list-style-type'] ] )( attrs.style ) || '' );
165 }
166
167 list.addStyle( 'list-style-type', mergeStyle );
168 }
169 }
170
171 var cssLengthRelativeUnit = /^([.\d]*)+(em|ex|px|gd|rem|vw|vh|vm|ch|mm|cm|in|pt|pc|deg|rad|ms|s|hz|khz){1}?/i;
172 var emptyMarginRegex = /^(?:\b0[^\s]*\s*){1,4}$/; // e.g. 0px 0pt 0px
173 var romanLiternalPattern = '^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$',
174 lowerRomanLiteralRegex = new RegExp( romanLiternalPattern ),
175 upperRomanLiteralRegex = new RegExp( romanLiternalPattern.toUpperCase() );
176
177 var orderedPatterns = { 'decimal' : /\d+/, 'lower-roman': lowerRomanLiteralRegex, 'upper-roman': upperRomanLiteralRegex, 'lower-alpha' : /^[a-z]+$/, 'upper-alpha': /^[A-Z]+$/ },
178 unorderedPatterns = { 'disc' : /[l\u00B7\u2002]/, 'circle' : /[\u006F\u00D8]/,'square' : /[\u006E\u25C6]/},
179 listMarkerPatterns = { 'ol' : orderedPatterns, 'ul' : unorderedPatterns },
180 romans = [ [1000, 'M'], [900, 'CM'], [500, 'D'], [400, 'CD'], [100, 'C'], [90, 'XC'], [50, 'L'], [40, 'XL'], [10, 'X'], [9, 'IX'], [5, 'V'], [4, 'IV'], [1, 'I'] ],
181 alpahbets = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
182
183 // Convert roman numbering back to decimal.
184 function fromRoman( str )
185 {
186 str = str.toUpperCase();
187 var l = romans.length, retVal = 0;
188 for ( var i = 0; i < l; ++i )
189 {
190 for ( var j = romans[i], k = j[1].length; str.substr( 0, k ) == j[1]; str = str.substr( k ) )
191 retVal += j[ 0 ];
192 }
193 return retVal;
194 }
195
196 // Convert alphabet numbering back to decimal.
197 function fromAlphabet( str )
198 {
199 str = str.toUpperCase();
200 var l = alpahbets.length, retVal = 1;
201 for ( var x = 1; str.length > 0; x *= l )
202 {
203 retVal += alpahbets.indexOf( str.charAt( str.length - 1 ) ) * x;
204 str = str.substr( 0, str.length - 1 );
205 }
206 return retVal;
207 }
208
209 var listBaseIndent = 0,
210 previousListItemMargin = null,
211 previousListId;
212
213 var plugin = ( CKEDITOR.plugins.pastefromword =
214 {
215 utils :
216 {
217 // Create a <cke:listbullet> which indicate an list item type.
218 createListBulletMarker : function ( bullet, bulletText )
219 {
220 var marker = new CKEDITOR.htmlParser.element( 'cke:listbullet' );
221 marker.attributes = { 'cke:listsymbol' : bullet[ 0 ] };
222 marker.add( new CKEDITOR.htmlParser.text( bulletText ) );
223 return marker;
224 },
225
226 isListBulletIndicator : function( element )
227 {
228 var styleText = element.attributes && element.attributes.style;
229 if ( /mso-list\s*:\s*Ignore/i.test( styleText ) )
230 return true;
231 },
232
233 isContainingOnlySpaces : function( element )
234 {
235 var text;
236 return ( ( text = element.onlyChild() )
237 && ( /^(:?\s|&nbsp;)+$/ ).test( text.value ) );
238 },
239
240 resolveList : function( element )
241 {
242 // <cke:listbullet> indicate a list item.
243 var attrs = element.attributes,
244 listMarker;
245
246 if ( ( listMarker = element.removeAnyChildWithName( 'cke:listbullet' ) )
247 && listMarker.length
248 && ( listMarker = listMarker[ 0 ] ) )
249 {
250 element.name = 'cke:li';
251
252 if ( attrs.style )
253 {
254 attrs.style = plugin.filters.stylesFilter(
255 [
256 // Text-indent is not representing list item level any more.
257 [ 'text-indent' ],
258 [ 'line-height' ],
259 // First attempt is to resolve indent level from on a constant margin increment.
260 [ ( /^margin(:?-left)?$/ ), null, function( margin )
261 {
262 // Deal with component/short-hand form.
263 var values = margin.split( ' ' );
264 margin = CKEDITOR.tools.convertToPx( values[ 3 ] || values[ 1 ] || values [ 0 ] );
265
266 // Figure out the indent unit by checking the first time of incrementation.
267 if ( !listBaseIndent && previousListItemMargin !== null && margin > previousListItemMargin )
268 listBaseIndent = margin - previousListItemMargin;
269
270 previousListItemMargin = margin;
271
272 attrs[ 'cke:indent' ] = listBaseIndent && ( Math.ceil( margin / listBaseIndent ) + 1 ) || 1;
273 } ],
274 // The best situation: "mso-list:l0 level1 lfo2" tells the belonged list root, list item indentation, etc.
275 [ ( /^mso-list$/ ), null, function( val )
276 {
277 val = val.split( ' ' );
278 var listId = Number( val[ 0 ].match( /\d+/ ) ),
279 indent = Number( val[ 1 ].match( /\d+/ ) );
280
281 if ( indent == 1 )
282 {
283 listId !== previousListId && ( attrs[ 'cke:reset' ] = 1 );
284 previousListId = listId;
285 }
286 attrs[ 'cke:indent' ] = indent;
287 } ]
288 ] )( attrs.style, element ) || '';
289 }
290
291 // First level list item might be presented without a margin.
292
293
294 // In case all above doesn't apply.
295 if ( !attrs[ 'cke:indent' ] )
296 {
297 previousListItemMargin = 0;
298 attrs[ 'cke:indent' ] = 1;
299 }
300
301 // Inherit attributes from bullet.
302 CKEDITOR.tools.extend( attrs, listMarker.attributes );
303 return true;
304 }
305 // Current list disconnected.
306 else
307 previousListId = previousListItemMargin = listBaseIndent = null;
308
309 return false;
310 },
311
312 // Providing a shorthand style then retrieve one or more style component values.
313 getStyleComponents : ( function()
314 {
315 var calculator = CKEDITOR.dom.element.createFromHtml(
316 '<div style="position:absolute;left:-9999px;top:-9999px;"></div>',
317 CKEDITOR.document );
318 CKEDITOR.document.getBody().append( calculator );
319
320 return function( name, styleValue, fetchList )
321 {
322 calculator.setStyle( name, styleValue );
323 var styles = {},
324 count = fetchList.length;
325 for ( var i = 0; i < count; i++ )
326 styles[ fetchList[ i ] ] = calculator.getStyle( fetchList[ i ] );
327
328 return styles;
329 };
330 } )(),
331
332 listDtdParents : CKEDITOR.dtd.parentOf( 'ol' )
333 },
334
335 filters :
336 {
337 // Transform a normal list into flat list items only presentation.
338 // E.g. <ul><li>level1<ol><li>level2</li></ol></li> =>
339 // <cke:li cke:listtype="ul" cke:indent="1">level1</cke:li>
340 // <cke:li cke:listtype="ol" cke:indent="2">level2</cke:li>
341 flattenList : function( element, level )
342 {
343 level = typeof level == 'number' ? level : 1;
344
345 var attrs = element.attributes,
346 listStyleType;
347
348 // All list items are of the same type.
349 switch ( attrs.type )
350 {
351 case 'a' :
352 listStyleType = 'lower-alpha';
353 break;
354 case '1' :
355 listStyleType = 'decimal';
356 break;
357 // TODO: Support more list style type from MS-Word.
358 }
359
360 var children = element.children,
361 child;
362
363 for ( var i = 0; i < children.length; i++ )
364 {
365 child = children[ i ];
366
367 if ( child.name in CKEDITOR.dtd.$listItem )
368 {
369 var attributes = child.attributes,
370 listItemChildren = child.children,
371 count = listItemChildren.length,
372 last = listItemChildren[ count - 1 ];
373
374 // Move out nested list.
375 if ( last.name in CKEDITOR.dtd.$list )
376 {
377 element.add( last, i + 1 );
378
379 // Remove the parent list item if it's just a holder.
380 if ( !--listItemChildren.length )
381 children.splice( i--, 1 );
382 }
383
384 child.name = 'cke:li';
385
386 // Inherit numbering from list root on the first list item.
387 attrs.start && !i && ( attributes.value = attrs.start );
388
389 plugin.filters.stylesFilter(
390 [
391 [ 'tab-stops', null, function( val )
392 {
393 var margin = val.split( ' ' )[ 1 ].match( cssLengthRelativeUnit );
394 margin && ( previousListItemMargin = CKEDITOR.tools.convertToPx( margin[ 0 ] ) );
395 } ],
396 ( level == 1 ? [ 'mso-list', null, function( val )
397 {
398 val = val.split( ' ' );
399 var listId = Number( val[ 0 ].match( /\d+/ ) );
400 listId !== previousListId && ( attributes[ 'cke:reset' ] = 1 );
401 previousListId = listId;
402 } ] : null )
403 ] )( attributes.style );
404
405 attributes[ 'cke:indent' ] = level;
406 attributes[ 'cke:listtype' ] = element.name;
407 attributes[ 'cke:list-style-type' ] = listStyleType;
408 }
409 // Flatten sub list.
410 else if ( child.name in CKEDITOR.dtd.$list )
411 {
412 // Absorb sub list children.
413 arguments.callee.apply( this, [ child, level + 1 ] );
414 children = children.slice( 0, i ).concat( child.children ).concat( children.slice( i + 1 ) );
415 element.children = [];
416 for ( var j = 0, num = children.length; j < num ; j++ )
417 element.add( children[ j ] );
418 }
419 }
420
421 delete element.name;
422
423 // We're loosing tag name here, signalize this element as a list.
424 attrs[ 'cke:list' ] = 1;
425 },
426
427 /**
428 * Try to collect all list items among the children and establish one
429 * or more HTML list structures for them.
430 * @param element
431 */
432 assembleList : function( element )
433 {
434 var children = element.children, child,
435 listItem, // The current processing cke:li element.
436 listItemAttrs,
437 listItemIndent, // Indent level of current list item.
438 lastIndent,
439 lastListItem, // The previous one just been added to the list.
440 list, // Current staging list and it's parent list if any.
441 openedLists = [],
442 previousListStyleType,
443 previousListType;
444
445 // Properties of the list item are to be resolved from the list bullet.
446 var bullet,
447 listType,
448 listStyleType,
449 itemNumeric;
450
451 for ( var i = 0; i < children.length; i++ )
452 {
453 child = children[ i ];
454
455 if ( 'cke:li' == child.name )
456 {
457 child.name = 'li';
458 listItem = child;
459 listItemAttrs = listItem.attributes;
460 bullet = listItemAttrs[ 'cke:listsymbol' ];
461 bullet = bullet && bullet.match( /^(?:[(]?)([^\s]+?)([.)]?)$/ );
462 listType = listStyleType = itemNumeric = null;
463
464 if ( listItemAttrs[ 'cke:ignored' ] )
465 {
466 children.splice( i--, 1 );
467 continue;
468 }
469
470
471 // This's from a new list root.
472 listItemAttrs[ 'cke:reset' ] && ( list = lastIndent = lastListItem = null );
473
474 // List item indent level might come from a real list indentation or
475 // been resolved from a pseudo list item's margin value, even get
476 // no indentation at all.
477 listItemIndent = Number( listItemAttrs[ 'cke:indent' ] );
478
479 // We're moving out of the current list, cleaning up.
480 if ( listItemIndent != lastIndent )
481 previousListType = previousListStyleType = null;
482
483 // List type and item style are already resolved.
484 if ( !bullet )
485 {
486 listType = listItemAttrs[ 'cke:listtype' ] || 'ol';
487 listStyleType = listItemAttrs[ 'cke:list-style-type' ];
488 }
489 else
490 {
491 // Probably share the same list style type with previous list item,
492 // give it priority to avoid ambiguous between C(Alpha) and C.(Roman).
493 if ( previousListType && listMarkerPatterns[ previousListType ] [ previousListStyleType ].test( bullet[ 1 ] ) )
494 {
495 listType = previousListType;
496 listStyleType = previousListStyleType;
497 }
498 else
499 {
500 for ( var type in listMarkerPatterns )
501 {
502 for ( var style in listMarkerPatterns[ type ] )
503 {
504 if ( listMarkerPatterns[ type ][ style ].test( bullet[ 1 ] ) )
505 {
506 // Small numbering has higher priority, when dealing with ambiguous
507 // between C(Alpha) and C.(Roman).
508 if ( type == 'ol' && ( /alpha|roman/ ).test( style ) )
509 {
510 var num = /roman/.test( style ) ? fromRoman( bullet[ 1 ] ) : fromAlphabet( bullet[ 1 ] );
511 if ( !itemNumeric || num < itemNumeric )
512 {
513 itemNumeric = num;
514 listType = type;
515 listStyleType = style;
516 }
517 }
518 else
519 {
520 listType = type;
521 listStyleType = style;
522 break;
523 }
524 }
525 }
526 }
527 }
528
529 // Simply use decimal/disc for the rest forms of unrepresentable
530 // numerals, e.g. Chinese..., but as long as there a second part
531 // included, it has a bigger chance of being a order list ;)
532 !listType && ( listType = bullet[ 2 ] ? 'ol' : 'ul' );
533 }
534
535 previousListType = listType;
536 previousListStyleType = listStyleType || ( listType == 'ol' ? 'decimal' : 'disc' );
537 if ( listStyleType && listStyleType != ( listType == 'ol' ? 'decimal' : 'disc' ) )
538 listItem.addStyle( 'list-style-type', listStyleType );
539
540 // Figure out start numbering.
541 if ( listType == 'ol' && bullet )
542 {
543 switch ( listStyleType )
544 {
545 case 'decimal' :
546 itemNumeric = Number( bullet[ 1 ] );
547 break;
548 case 'lower-roman':
549 case 'upper-roman':
550 itemNumeric = fromRoman( bullet[ 1 ] );
551 break;
552 case 'lower-alpha':
553 case 'upper-alpha':
554 itemNumeric = fromAlphabet( bullet[ 1 ] );
555 break;
556 }
557
558 // Always create the numbering, swipe out unnecessary ones later.
559 listItem.attributes.value = itemNumeric;
560 }
561
562 // Start the list construction.
563 if ( !list )
564 {
565 openedLists.push( list = new CKEDITOR.htmlParser.element( listType ) );
566 list.add( listItem );
567 children[ i ] = list;
568 }
569 else
570 {
571 if ( listItemIndent > lastIndent )
572 {
573 openedLists.push( list = new CKEDITOR.htmlParser.element( listType ) );
574 list.add( listItem );
575 lastListItem.add( list );
576 }
577 else if ( listItemIndent < lastIndent )
578 {
579 // There might be a negative gap between two list levels. (#4944)
580 var diff = lastIndent - listItemIndent,
581 parent;
582 while ( diff-- && ( parent = list.parent ) )
583 list = parent.parent;
584
585 list.add( listItem );
586 }
587 else
588 list.add( listItem );
589
590 children.splice( i--, 1 );
591 }
592
593 lastListItem = listItem;
594 lastIndent = listItemIndent;
595 }
596 else if ( list )
597 list = lastIndent = lastListItem = null;
598 }
599
600 for ( i = 0; i < openedLists.length; i++ )
601 postProcessList( openedLists[ i ] );
602
603 list = lastIndent = lastListItem = previousListId = previousListItemMargin = listBaseIndent = null;
604 },
605
606 /**
607 * A simple filter which always rejecting.
608 */
609 falsyFilter : function( value )
610 {
611 return false;
612 },
613
614 /**
615 * A filter dedicated on the 'style' attribute filtering, e.g. dropping/replacing style properties.
616 * @param styles {Array} in form of [ styleNameRegexp, styleValueRegexp,
617 * newStyleValue/newStyleGenerator, newStyleName ] where only the first
618 * parameter is mandatory.
619 * @param whitelist {Boolean} Whether the {@param styles} will be considered as a white-list.
620 */
621 stylesFilter : function( styles, whitelist )
622 {
623 return function( styleText, element )
624 {
625 var rules = [];
626 // html-encoded quote might be introduced by 'font-family'
627 // from MS-Word which confused the following regexp. e.g.
628 //'font-family: &quot;Lucida, Console&quot;'
629 ( styleText || '' )
630 .replace( /&quot;/g, '"' )
631 .replace( /\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g,
632 function( match, name, value )
633 {
634 name = name.toLowerCase();
635 name == 'font-family' && ( value = value.replace( /["']/g, '' ) );
636
637 var namePattern,
638 valuePattern,
639 newValue,
640 newName;
641 for ( var i = 0 ; i < styles.length; i++ )
642 {
643 if ( styles[ i ] )
644 {
645 namePattern = styles[ i ][ 0 ];
646 valuePattern = styles[ i ][ 1 ];
647 newValue = styles[ i ][ 2 ];
648 newName = styles[ i ][ 3 ];
649
650 if ( name.match( namePattern )
651 && ( !valuePattern || value.match( valuePattern ) ) )
652 {
653 name = newName || name;
654 whitelist && ( newValue = newValue || value );
655
656 if ( typeof newValue == 'function' )
657 newValue = newValue( value, element, name );
658
659 // Return an couple indicate both name and value
660 // changed.
661 if ( newValue && newValue.push )
662 name = newValue[ 0 ], newValue = newValue[ 1 ];
663
664 if ( typeof newValue == 'string' )
665 rules.push( [ name, newValue ] );
666 return;
667 }
668 }
669 }
670
671 !whitelist && rules.push( [ name, value ] );
672
673 });
674
675 for ( var i = 0 ; i < rules.length ; i++ )
676 rules[ i ] = rules[ i ].join( ':' );
677 return rules.length ?
678 ( rules.join( ';' ) + ';' ) : false;
679 };
680 },
681
682 /**
683 * Migrate the element by decorate styles on it.
684 * @param styleDefiniton
685 * @param variables
686 */
687 elementMigrateFilter : function ( styleDefiniton, variables )
688 {
689 return function( element )
690 {
691 var styleDef =
692 variables ?
693 new CKEDITOR.style( styleDefiniton, variables )._.definition
694 : styleDefiniton;
695 element.name = styleDef.element;
696 CKEDITOR.tools.extend( element.attributes, CKEDITOR.tools.clone( styleDef.attributes ) );
697 element.addStyle( CKEDITOR.style.getStyleText( styleDef ) );
698 };
699 },
700
701 /**
702 * Migrate styles by creating a new nested stylish element.
703 * @param styleDefinition
704 */
705 styleMigrateFilter : function( styleDefinition, variableName )
706 {
707
708 var elementMigrateFilter = this.elementMigrateFilter;
709 return function( value, element )
710 {
711 // Build an stylish element first.
712 var styleElement = new CKEDITOR.htmlParser.element( null ),
713 variables = {};
714
715 variables[ variableName ] = value;
716 elementMigrateFilter( styleDefinition, variables )( styleElement );
717 // Place the new element inside the existing span.
718 styleElement.children = element.children;
719 element.children = [ styleElement ];
720 };
721 },
722
723 /**
724 * A filter which remove cke-namespaced-attribute on
725 * all none-cke-namespaced elements.
726 * @param value
727 * @param element
728 */
729 bogusAttrFilter : function( value, element )
730 {
731 if ( element.name.indexOf( 'cke:' ) == -1 )
732 return false;
733 },
734
735 /**
736 * A filter which will be used to apply inline css style according the stylesheet
737 * definition rules, is generated lazily when filtering.
738 */
739 applyStyleFilter : null
740
741 },
742
743 getRules : function( editor )
744 {
745 var dtd = CKEDITOR.dtd,
746 blockLike = CKEDITOR.tools.extend( {}, dtd.$block, dtd.$listItem, dtd.$tableContent ),
747 config = editor.config,
748 filters = this.filters,
749 falsyFilter = filters.falsyFilter,
750 stylesFilter = filters.stylesFilter,
751 elementMigrateFilter = filters.elementMigrateFilter,
752 styleMigrateFilter = CKEDITOR.tools.bind( this.filters.styleMigrateFilter, this.filters ),
753 createListBulletMarker = this.utils.createListBulletMarker,
754 flattenList = filters.flattenList,
755 assembleList = filters.assembleList,
756 isListBulletIndicator = this.utils.isListBulletIndicator,
757 containsNothingButSpaces = this.utils.isContainingOnlySpaces,
758 resolveListItem = this.utils.resolveList,
759 convertToPx = function( value )
760 {
761 value = CKEDITOR.tools.convertToPx( value );
762 return isNaN( value ) ? value : value + 'px';
763 },
764 getStyleComponents = this.utils.getStyleComponents,
765 listDtdParents = this.utils.listDtdParents,
766 removeFontStyles = config.pasteFromWordRemoveFontStyles !== false,
767 removeStyles = config.pasteFromWordRemoveStyles !== false;
768
769 return {
770
771 elementNames :
772 [
773 // Remove script, meta and link elements.
774 [ ( /meta|link|script/ ), '' ]
775 ],
776
777 root : function( element )
778 {
779 element.filterChildren();
780 assembleList( element );
781 },
782
783 elements :
784 {
785 '^' : function( element )
786 {
787 // Transform CSS style declaration to inline style.
788 var applyStyleFilter;
789 if ( CKEDITOR.env.gecko && ( applyStyleFilter = filters.applyStyleFilter ) )
790 applyStyleFilter( element );
791 },
792
793 $ : function( element )
794 {
795 var tagName = element.name || '',
796 attrs = element.attributes;
797
798 // Convert length unit of width/height on blocks to
799 // a more editor-friendly way (px).
800 if ( tagName in blockLike
801 && attrs.style )
802 {
803 attrs.style = stylesFilter(
804 [ [ ( /^(:?width|height)$/ ), null, convertToPx ] ] )( attrs.style ) || '';
805 }
806
807 // Processing headings.
808 if ( tagName.match( /h\d/ ) )
809 {
810 element.filterChildren();
811 // Is the heading actually a list item?
812 if ( resolveListItem( element ) )
813 return;
814
815 // Adapt heading styles to editor's convention.
816 elementMigrateFilter( config[ 'format_' + tagName ] )( element );
817 }
818 // Remove inline elements which contain only empty spaces.
819 else if ( tagName in dtd.$inline )
820 {
821 element.filterChildren();
822 if ( containsNothingButSpaces( element ) )
823 delete element.name;
824 }
825 // Remove element with ms-office namespace,
826 // with it's content preserved, e.g. 'o:p'.
827 else if ( tagName.indexOf( ':' ) != -1
828 && tagName.indexOf( 'cke' ) == -1 )
829 {
830 element.filterChildren();
831
832 // Restore image real link from vml.
833 if ( tagName == 'v:imagedata' )
834 {
835 var href = element.attributes[ 'o:href' ];
836 if ( href )
837 element.attributes.src = href;
838 element.name = 'img';
839 return;
840 }
841 delete element.name;
842 }
843
844 // Assembling list items into a whole list.
845 if ( tagName in listDtdParents )
846 {
847 element.filterChildren();
848 assembleList( element );
849 }
850 },
851
852 // We'll drop any style sheet, but Firefox conclude
853 // certain styles in a single style element, which are
854 // required to be changed into inline ones.
855 'style' : function( element )
856 {
857 if ( CKEDITOR.env.gecko )
858 {
859 // Grab only the style definition section.
860 var styleDefSection = element.onlyChild().value.match( /\/\* Style Definitions \*\/([\s\S]*?)\/\*/ ),
861 styleDefText = styleDefSection && styleDefSection[ 1 ],
862 rules = {}; // Storing the parsed result.
863
864 if ( styleDefText )
865 {
866 styleDefText
867 // Remove line-breaks.
868 .replace(/[\n\r]/g,'')
869 // Extract selectors and style properties.
870 .replace( /(.+?)\{(.+?)\}/g,
871 function( rule, selectors, styleBlock )
872 {
873 selectors = selectors.split( ',' );
874 var length = selectors.length, selector;
875 for ( var i = 0; i < length; i++ )
876 {
877 // Assume MS-Word mostly generate only simple
878 // selector( [Type selector][Class selector]).
879 CKEDITOR.tools.trim( selectors[ i ] )
880 .replace( /^(\w+)(\.[\w-]+)?$/g,
881 function( match, tagName, className )
882 {
883 tagName = tagName || '*';
884 className = className.substring( 1, className.length );
885
886 // Reject MS-Word Normal styles.
887 if ( className.match( /MsoNormal/ ) )
888 return;
889
890 if ( !rules[ tagName ] )
891 rules[ tagName ] = {};
892 if ( className )
893 rules[ tagName ][ className ] = styleBlock;
894 else
895 rules[ tagName ] = styleBlock;
896 } );
897 }
898 });
899
900 filters.applyStyleFilter = function( element )
901 {
902 var name = rules[ '*' ] ? '*' : element.name,
903 className = element.attributes && element.attributes[ 'class' ],
904 style;
905 if ( name in rules )
906 {
907 style = rules[ name ];
908 if ( typeof style == 'object' )
909 style = style[ className ];
910 // Maintain style rules priorities.
911 style && element.addStyle( style, true );
912 }
913 };
914 }
915 }
916 return false;
917 },
918
919 'p' : function( element )
920 {
921 // This's a fall-back approach to recognize list item in FF3.6,
922 // as it's not perfect as not all list style (e.g. "heading list") is shipped
923 // with this pattern. (#6662)
924 if ( /MsoListParagraph/.exec( element.attributes[ 'class' ] ) )
925 {
926 var bulletText = element.firstChild( function( node )
927 {
928 return node.type == CKEDITOR.NODE_TEXT && !containsNothingButSpaces( node.parent );
929 });
930 var bullet = bulletText && bulletText.parent,
931 bulletAttrs = bullet && bullet.attributes;
932 bulletAttrs && !bulletAttrs.style && ( bulletAttrs.style = 'mso-list: Ignore;' );
933 }
934
935 element.filterChildren();
936
937 // Is the paragraph actually a list item?
938 if ( resolveListItem( element ) )
939 return;
940
941 // Adapt paragraph formatting to editor's convention
942 // according to enter-mode.
943 if ( config.enterMode == CKEDITOR.ENTER_BR )
944 {
945 // We suffer from attribute/style lost in this situation.
946 delete element.name;
947 element.add( new CKEDITOR.htmlParser.element( 'br' ) );
948 }
949 else
950 elementMigrateFilter( config[ 'format_' + ( config.enterMode == CKEDITOR.ENTER_P ? 'p' : 'div' ) ] )( element );
951 },
952
953 'div' : function( element )
954 {
955 // Aligned table with no text surrounded is represented by a wrapper div, from which
956 // table cells inherit as text-align styles, which is wrong.
957 // Instead we use a clear-float div after the table to properly achieve the same layout.
958 var singleChild = element.onlyChild();
959 if ( singleChild && singleChild.name == 'table' )
960 {
961 var attrs = element.attributes;
962 singleChild.attributes = CKEDITOR.tools.extend( singleChild.attributes, attrs );
963 attrs.style && singleChild.addStyle( attrs.style );
964
965 var clearFloatDiv = new CKEDITOR.htmlParser.element( 'div' );
966 clearFloatDiv.addStyle( 'clear' ,'both' );
967 element.add( clearFloatDiv );
968 delete element.name;
969 }
970 },
971
972 'td' : function ( element )
973 {
974 // 'td' in 'thead' is actually <th>.
975 if ( element.getAncestor( 'thead') )
976 element.name = 'th';
977 },
978
979 // MS-Word sometimes present list as a mixing of normal list
980 // and pseudo-list, normalize the previous ones into pseudo form.
981 'ol' : flattenList,
982 'ul' : flattenList,
983 'dl' : flattenList,
984
985 'font' : function( element )
986 {
987 // Drop the font tag if it comes from list bullet text.
988 if ( isListBulletIndicator( element.parent ) )
989 {
990 delete element.name;
991 return;
992 }
993
994 element.filterChildren();
995
996 var attrs = element.attributes,
997 styleText = attrs.style,
998 parent = element.parent;
999
1000 if ( 'font' == parent.name ) // Merge nested <font> tags.
1001 {
1002 CKEDITOR.tools.extend( parent.attributes,
1003 element.attributes );
1004 styleText && parent.addStyle( styleText );
1005 delete element.name;
1006 }
1007 // Convert the merged into a span with all attributes preserved.
1008 else
1009 {
1010 styleText = styleText || '';
1011 // IE's having those deprecated attributes, normalize them.
1012 if ( attrs.color )
1013 {
1014 attrs.color != '#000000' && ( styleText += 'color:' + attrs.color + ';' );
1015 delete attrs.color;
1016 }
1017 if ( attrs.face )
1018 {
1019 styleText += 'font-family:' + attrs.face + ';';
1020 delete attrs.face;
1021 }
1022 // TODO: Mapping size in ranges of xx-small,
1023 // x-small, small, medium, large, x-large, xx-large.
1024 if ( attrs.size )
1025 {
1026 styleText += 'font-size:' +
1027 ( attrs.size > 3 ? 'large'
1028 : ( attrs.size < 3 ? 'small' : 'medium' ) ) + ';';
1029 delete attrs.size;
1030 }
1031
1032 element.name = 'span';
1033 element.addStyle( styleText );
1034 }
1035 },
1036
1037 'span' : function( element )
1038 {
1039 // Remove the span if it comes from list bullet text.
1040 if ( isListBulletIndicator( element.parent ) )
1041 return false;
1042
1043 element.filterChildren();
1044 if ( containsNothingButSpaces( element ) )
1045 {
1046 delete element.name;
1047 return null;
1048 }
1049
1050 // List item bullet type is supposed to be indicated by
1051 // the text of a span with style 'mso-list : Ignore' or an image.
1052 if ( isListBulletIndicator( element ) )
1053 {
1054 var listSymbolNode = element.firstChild( function( node )
1055 {
1056 return node.value || node.name == 'img';
1057 });
1058
1059 var listSymbol = listSymbolNode && ( listSymbolNode.value || 'l.' ),
1060 listType = listSymbol && listSymbol.match( /^(?:[(]?)([^\s]+?)([.)]?)$/ );
1061
1062 if ( listType )
1063 {
1064 var marker = createListBulletMarker( listType, listSymbol );
1065 // Some non-existed list items might be carried by an inconsequential list, indicate by "mso-hide:all/display:none",
1066 // those are to be removed later, now mark it with "cke:ignored".
1067 var ancestor = element.getAncestor( 'span' );
1068 if ( ancestor && (/ mso-hide:\s*all|display:\s*none /).test( ancestor.attributes.style ) )
1069 marker.attributes[ 'cke:ignored' ] = 1;
1070 return marker;
1071 }
1072 }
1073
1074 // Update the src attribute of image element with href.
1075 var children = element.children,
1076 attrs = element.attributes,
1077 styleText = attrs && attrs.style,
1078 firstChild = children && children[ 0 ];
1079
1080 // Assume MS-Word mostly carry font related styles on <span>,
1081 // adapting them to editor's convention.
1082 if ( styleText )
1083 {
1084 attrs.style = stylesFilter(
1085 [
1086 // Drop 'inline-height' style which make lines overlapping.
1087 [ 'line-height' ],
1088 [ ( /^font-family$/ ), null, !removeFontStyles ? styleMigrateFilter( config[ 'font_style' ], 'family' ) : null ] ,
1089 [ ( /^font-size$/ ), null, !removeFontStyles ? styleMigrateFilter( config[ 'fontSize_style' ], 'size' ) : null ] ,
1090 [ ( /^color$/ ), null, !removeFontStyles ? styleMigrateFilter( config[ 'colorButton_foreStyle' ], 'color' ) : null ] ,
1091 [ ( /^background-color$/ ), null, !removeFontStyles ? styleMigrateFilter( config[ 'colorButton_backStyle' ], 'color' ) : null ]
1092 ] )( styleText, element ) || '';
1093 }
1094
1095 return null;
1096 },
1097
1098 // Migrate basic style formats to editor configured ones.
1099 'b' : elementMigrateFilter( config[ 'coreStyles_bold' ] ),
1100 'i' : elementMigrateFilter( config[ 'coreStyles_italic' ] ),
1101 'u' : elementMigrateFilter( config[ 'coreStyles_underline' ] ),
1102 's' : elementMigrateFilter( config[ 'coreStyles_strike' ] ),
1103 'sup' : elementMigrateFilter( config[ 'coreStyles_superscript' ] ),
1104 'sub' : elementMigrateFilter( config[ 'coreStyles_subscript' ] ),
1105 // Editor doesn't support anchor with content currently (#3582),
1106 // drop such anchors with content preserved.
1107 'a' : function( element )
1108 {
1109 var attrs = element.attributes;
1110 if ( attrs && !attrs.href && attrs.name )
1111 delete element.name;
1112 else if ( CKEDITOR.env.webkit && attrs.href && attrs.href.match( /file:\/\/\/[\S]+#/i ) )
1113 attrs.href = attrs.href.replace( /file:\/\/\/[^#]+/i,'' );
1114 },
1115 'cke:listbullet' : function( element )
1116 {
1117 if ( element.getAncestor( /h\d/ ) && !config.pasteFromWordNumberedHeadingToList )
1118 delete element.name;
1119 }
1120 },
1121
1122 attributeNames :
1123 [
1124 // Remove onmouseover and onmouseout events (from MS Word comments effect)
1125 [ ( /^onmouse(:?out|over)/ ), '' ],
1126 // Onload on image element.
1127 [ ( /^onload$/ ), '' ],
1128 // Remove office and vml attribute from elements.
1129 [ ( /(?:v|o):\w+/ ), '' ],
1130 // Remove lang/language attributes.
1131 [ ( /^lang/ ), '' ]
1132 ],
1133
1134 attributes :
1135 {
1136 'style' : stylesFilter(
1137 removeStyles ?
1138 // Provide a white-list of styles that we preserve, those should
1139 // be the ones that could later be altered with editor tools.
1140 [
1141 // Leave list-style-type
1142 [ ( /^list-style-type$/ ), null ],
1143
1144 // Preserve margin-left/right which used as default indent style in the editor.
1145 [ ( /^margin$|^margin-(?!bottom|top)/ ), null, function( value, element, name )
1146 {
1147 if ( element.name in { p : 1, div : 1 } )
1148 {
1149 var indentStyleName = config.contentsLangDirection == 'ltr' ?
1150 'margin-left' : 'margin-right';
1151
1152 // Extract component value from 'margin' shorthand.
1153 if ( name == 'margin' )
1154 {
1155 value = getStyleComponents( name, value,
1156 [ indentStyleName ] )[ indentStyleName ];
1157 }
1158 else if ( name != indentStyleName )
1159 return null;
1160
1161 if ( value && !emptyMarginRegex.test( value ) )
1162 return [ indentStyleName, value ];
1163 }
1164
1165 return null;
1166 } ],
1167
1168 // Preserve clear float style.
1169 [ ( /^clear$/ ) ],
1170
1171 [ ( /^border.*|margin.*|vertical-align|float$/ ), null,
1172 function( value, element )
1173 {
1174 if ( element.name == 'img' )
1175 return value;
1176 } ],
1177
1178 [ (/^width|height$/ ), null,
1179 function( value, element )
1180 {
1181 if ( element.name in { table : 1, td : 1, th : 1, img : 1 } )
1182 return value;
1183 } ]
1184 ] :
1185 // Otherwise provide a black-list of styles that we remove.
1186 [
1187 [ ( /^mso-/ ) ],
1188 // Fixing color values.
1189 [ ( /-color$/ ), null, function( value )
1190 {
1191 if ( value == 'transparent' )
1192 return false;
1193 if ( CKEDITOR.env.gecko )
1194 return value.replace( /-moz-use-text-color/g, 'transparent' );
1195 } ],
1196 // Remove empty margin values, e.g. 0.00001pt 0em 0pt
1197 [ ( /^margin$/ ), emptyMarginRegex ],
1198 [ 'text-indent', '0cm' ],
1199 [ 'page-break-before' ],
1200 [ 'tab-stops' ],
1201 [ 'display', 'none' ],
1202 removeFontStyles ? [ ( /font-?/ ) ] : null
1203 ], removeStyles ),
1204
1205 // Prefer width styles over 'width' attributes.
1206 'width' : function( value, element )
1207 {
1208 if ( element.name in dtd.$tableContent )
1209 return false;
1210 },
1211 // Prefer border styles over table 'border' attributes.
1212 'border' : function( value, element )
1213 {
1214 if ( element.name in dtd.$tableContent )
1215 return false;
1216 },
1217
1218 // Only Firefox carry style sheet from MS-Word, which
1219 // will be applied by us manually. For other browsers
1220 // the css className is useless.
1221 'class' : falsyFilter,
1222
1223 // MS-Word always generate 'background-color' along with 'bgcolor',
1224 // simply drop the deprecated attributes.
1225 'bgcolor' : falsyFilter,
1226
1227 // Deprecate 'valign' attribute in favor of 'vertical-align'.
1228 'valign' : removeStyles ? falsyFilter : function( value, element )
1229 {
1230 element.addStyle( 'vertical-align', value );
1231 return false;
1232 }
1233 },
1234
1235 // Fore none-IE, some useful data might be buried under these IE-conditional
1236 // comments where RegExp were the right approach to dig them out where usual approach
1237 // is transform it into a fake element node which hold the desired data.
1238 comment :
1239 !CKEDITOR.env.ie ?
1240 function( value, node )
1241 {
1242 var imageInfo = value.match( /<img.*?>/ ),
1243 listInfo = value.match( /^\[if !supportLists\]([\s\S]*?)\[endif\]$/ );
1244
1245 // Seek for list bullet indicator.
1246 if ( listInfo )
1247 {
1248 // Bullet symbol could be either text or an image.
1249 var listSymbol = listInfo[ 1 ] || ( imageInfo && 'l.' ),
1250 listType = listSymbol && listSymbol.match( />(?:[(]?)([^\s]+?)([.)]?)</ );
1251 return createListBulletMarker( listType, listSymbol );
1252 }
1253
1254 // Reveal the <img> element in conditional comments for Firefox.
1255 if ( CKEDITOR.env.gecko && imageInfo )
1256 {
1257 var img = CKEDITOR.htmlParser.fragment.fromHtml( imageInfo[ 0 ] ).children[ 0 ],
1258 previousComment = node.previous,
1259 // Try to dig the real image link from vml markup from previous comment text.
1260 imgSrcInfo = previousComment && previousComment.value.match( /<v:imagedata[^>]*o:href=['"](.*?)['"]/ ),
1261 imgSrc = imgSrcInfo && imgSrcInfo[ 1 ];
1262
1263 // Is there a real 'src' url to be used?
1264 imgSrc && ( img.attributes.src = imgSrc );
1265 return img;
1266 }
1267
1268 return false;
1269 }
1270 : falsyFilter
1271 };
1272 }
1273 });
1274
1275 // The paste processor here is just a reduced copy of html data processor.
1276 var pasteProcessor = function()
1277 {
1278 this.dataFilter = new CKEDITOR.htmlParser.filter();
1279 };
1280
1281 pasteProcessor.prototype =
1282 {
1283 toHtml : function( data )
1284 {
1285 var fragment = CKEDITOR.htmlParser.fragment.fromHtml( data, false ),
1286 writer = new CKEDITOR.htmlParser.basicWriter();
1287
1288 fragment.writeHtml( writer, this.dataFilter );
1289 return writer.getHtml( true );
1290 }
1291 };
1292
1293 CKEDITOR.cleanWord = function( data, editor )
1294 {
1295 // Firefox will be confused by those downlevel-revealed IE conditional
1296 // comments, fixing them first( convert it to upperlevel-revealed one ).
1297 // e.g. <![if !vml]>...<![endif]>
1298 if ( CKEDITOR.env.gecko )
1299 data = data.replace( /(<!--\[if[^<]*?\])-->([\S\s]*?)<!--(\[endif\]-->)/gi, '$1$2$3' );
1300
1301 var dataProcessor = new pasteProcessor(),
1302 dataFilter = dataProcessor.dataFilter;
1303
1304 // These rules will have higher priorities than default ones.
1305 dataFilter.addRules( CKEDITOR.plugins.pastefromword.getRules( editor ) );
1306
1307 // Allow extending data filter rules.
1308 editor.fire( 'beforeCleanWord', { filter : dataFilter } );
1309
1310 try
1311 {
1312 data = dataProcessor.toHtml( data, false );
1313 }
1314 catch ( e )
1315 {
1316 alert( editor.lang.pastefromword.error );
1317 }
1318
1319 /* Below post processing those things that are unable to delivered by filter rules. */
1320
1321 // Remove 'cke' namespaced attribute used in filter rules as marker.
1322 data = data.replace( /cke:.*?".*?"/g, '' );
1323
1324 // Remove empty style attribute.
1325 data = data.replace( /style=""/g, '' );
1326
1327 // Remove the dummy spans ( having no inline style ).
1328 data = data.replace( /<span>/g, '' );
1329
1330 return data;
1331 };
1332 })();
1333
1334 /**
1335 * Whether to ignore all font related formatting styles, including:
1336 * <ul> <li>font size;</li>
1337 * <li>font family;</li>
1338 * <li>font foreground/background color.</li></ul>
1339 * @name CKEDITOR.config.pasteFromWordRemoveFontStyles
1340 * @since 3.1
1341 * @type Boolean
1342 * @default true
1343 * @example
1344 * config.pasteFromWordRemoveFontStyles = false;
1345 */
1346
1347 /**
1348 * Whether to transform MS Word outline numbered headings into lists.
1349 * @name CKEDITOR.config.pasteFromWordNumberedHeadingToList
1350 * @since 3.1
1351 * @type Boolean
1352 * @default false
1353 * @example
1354 * config.pasteFromWordNumberedHeadingToList = true;
1355 */
1356
1357 /**
1358 * Whether to remove element styles that can't be managed with the editor. Note
1359 * that this doesn't handle the font specific styles, which depends on the
1360 * {@link CKEDITOR.config.pasteFromWordRemoveFontStyles} setting instead.
1361 * @name CKEDITOR.config.pasteFromWordRemoveStyles
1362 * @since 3.1
1363 * @type Boolean
1364 * @default true
1365 * @example
1366 * config.pasteFromWordRemoveStyles = false;
1367 */