User:Cacycle/diff.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Cacycle/diff. |
// <syntaxhighlight lang="JavaScript">
// ==UserScript==
// @name wikEd diff
// @version 1.2.4
// @date October 23, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
// @source https://en.wikipedia.org/wiki/User:Cacycle/diff.js
// @author Cacycle (https://en.wikipedia.org/wiki/User:Cacycle)
// @license released into the public domain
// ==/UserScript==
/**
* wikEd diff: inline-style difference engine with block move support
*
* Improved JavaScript diff library that returns html/css-formatted new text version with
* highlighted deletions, insertions, and block moves. It is compatible with all browsers and is
* not dependent on external libraries.
*
* WikEdDiff.php and the JavaScript library wikEd diff are synced one-to-one ports. Changes and
* fixes are to be applied to both versions.
*
* JavaScript library (mirror): https://en.wikipedia.org/wiki/User:Cacycle/diff
* JavaScript online tool: http://cacycle.altervista.org/wikEd-diff-tool.html
* MediaWiki extension: https://www.mediawiki.org/wiki/Extension:wikEdDiff
*
* This difference engine applies a word-based algorithm that uses unique words as anchor points
* to identify matching text and moved blocks (Paul Heckel: A technique for isolating differences
* between files. Communications of the ACM 21(4):264 (1978)).
*
* Additional features:
*
* - Visual inline style, changes are shown in a single output text
* - Block move detection and highlighting
* - Resolution down to characters level
* - Unicode and multilingual support
* - Stepwise split (paragraphs, lines, sentences, words, characters)
* - Recursive diff
* - Optimized code for resolving unmatched sequences
* - Minimization of length of moved blocks
* - Alignment of ambiguous unmatched sequences to next line break or word border
* - Clipping of unchanged irrelevant parts from the output (optional)
* - Fully customizable
* - Text split optimized for MediaWiki source texts
* - Well commented and documented code
*
* Datastructures (abbreviations from publication):
*
* class WikEdDiffText: diff text object (new or old version)
* .text text of version
* .words[] word count table
* .first index of first token in tokens list
* .last index of last token in tokens list
*
* .tokens[]: token list for new or old string (doubly-linked list) (N and O)
* .prev previous list item
* .next next list item
* .token token string
* .link index of corresponding token in new or old text (OA and NA)
* .number list enumeration number
* .unique token is unique word in text
*
* class WikEdDiff: diff object
* .config[]: configuration settings, see top of code for customization options
* .regExp[]: all regular expressions
* .split regular expressions used for splitting text into tokens
* .htmlCode HTML code fragments used for creating the output
* .msg output messages
* .newText new text
* .oldText old text
* .maxWords word count of longest linked block
* .html diff html
* .error flag: result has not passed unit tests
* .bordersDown[] linked region borders downwards, [new index, old index]
* .bordersUp[] linked region borders upwards, [new index, old index]
* .symbols: symbols table for whole text at all refinement levels
* .token[] hash table of parsed tokens for passes 1 - 3, points to symbol[i]
* .symbol[]: array of objects that hold token counters and pointers:
* .newCount new text token counter (NC)
* .oldCount old text token counter (OC)
* .newToken token index in text.newText.tokens
* .oldToken token index in text.oldText.tokens
* .linked flag: at least one unique token pair has been linked
*
* .blocks[]: array, block data (consecutive text tokens) in new text order
* .oldBlock number of block in old text order
* .newBlock number of block in new text order
* .oldNumber old text token number of first token
* .newNumber new text token number of first token
* .oldStart old text token index of first token
* .count number of tokens
* .unique contains unique linked token
* .words word count
* .chars char length
* .type '=', '-', '+', '|' (same, deletion, insertion, mark)
* .section section number
* .group group number of block
* .fixed belongs to a fixed (not moved) group
* .moved moved block group number corresponding with mark block
* .text text of block tokens
*
* .sections[]: array, block sections with no block move crosses outside a section
* .blockStart first block in section
* .blockEnd last block in section
* .groups[]: array, section blocks that are consecutive in old text order
* .oldNumber first block oldNumber
* .blockStart first block index
* .blockEnd last block index
* .unique contains unique linked token
* .maxWords word count of longest linked block
* .words word count
* .chars char count
* .fixed not moved from original position
* .movedFrom group position this group has been moved from
* .color color number of moved group
*
* .fragments[]: diff fragment list ready for markup, abstraction layer for customization
* .text block or mark text
* .color moved block or mark color number
* .type '=', '-', '+' same, deletion, insertion
* '<', '>' mark left, mark right
* '(<', '(>', ')' block start and end
* '~', ' ~', '~ ' omission indicators
* '[', ']', ',' fragment start and end, fragment separator
* '{', '}' container start and end
*
*/
// JSHint options
/* jshint -W004, -W100, newcap: true, browser: true, jquery: true, sub: true, bitwise: true,
curly: true, evil: true, forin: true, freeze: true, globalstrict: true, immed: true,
latedef: true, loopfunc: true, quotmark: single, strict: true, undef: true */
/* global console */
// Turn on ECMAScript 5 strict mode
'use strict';
/** Define global objects. */
var wikEdDiffConfig;
var WED;
/**
* wikEd diff main class.
*
* @class WikEdDiff
*/
var WikEdDiff = function () {
/** @var array config Configuration and customization settings. */
this.config = {
/** Core diff settings (with default values). */
/**
* @var bool config.fullDiff
* Show complete un-clipped diff text (false)
*/
'fullDiff': false,
/**
* @var bool config.showBlockMoves
* Enable block move layout with highlighted blocks and marks at the original positions (true)
*/
'showBlockMoves': true,
/**
* @var bool config.charDiff
* Enable character-refined diff (true)
*/
'charDiff': true,
/**
* @var bool config.repeatedDiff
* Enable repeated diff to resolve problematic sequences (true)
*/
'repeatedDiff': true,
/**
* @var bool config.recursiveDiff
* Enable recursive diff to resolve problematic sequences (true)
*/
'recursiveDiff': true,
/**
* @var int config.recursionMax
* Maximum recursion depth (10)
*/
'recursionMax': 10,
/**
* @var bool config.unlinkBlocks
* Reject blocks if they are too short and their words are not unique,
* prevents fragmentated diffs for very different versions (true)
*/
'unlinkBlocks': true,
/**
* @var int config.unlinkMax
* Maximum number of rejection cycles (5)
*/
'unlinkMax': 5,
/**
* @var int config.blockMinLength
* Reject blocks if shorter than this number of real words (3)
*/
'blockMinLength': 3,
/**
* @var bool config.coloredBlocks
* Display blocks in differing colors (rainbow color scheme) (false)
*/
'coloredBlocks': false,
/**
* @var bool config.coloredBlocks
* Do not use UniCode block move marks (legacy browsers) (false)
*/
'noUnicodeSymbols': false,
/**
* @var bool config.stripTrailingNewline
* Strip trailing newline off of texts (true in .js, false in .php)
*/
'stripTrailingNewline': true,
/**
* @var bool config.debug
* Show debug infos and stats (block, group, and fragment data) in debug console (false)
*/
'debug': false,
/**
* @var bool config.timer
* Show timing results in debug console (false)
*/
'timer': false,
/**
* @var bool config.unitTesting
* Run unit tests to prove correct working, display results in debug console (false)
*/
'unitTesting': false,
/** RegExp character classes. */
// UniCode letter support for regexps
// From http://xregexp.com/addons/unicode/unicode-base.js v1.0.0
'regExpLetters':
'a-zA-Z0-9' + (
'00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-' +
'037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA' +
'05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-' +
'07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D' +
'09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE' +
'09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A39' +
'0A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE0' +
'0AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B83' +
'0B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C' +
'0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-' +
'0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-' +
'0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E87' +
'0E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC4' +
'0EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066' +
'106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D' +
'1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-1315' +
'1318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-' +
'17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C' +
'1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA0' +
'1BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F15' +
'1F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC4' +
'1FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-2113' +
'21152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-' +
'2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE' +
'2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-' +
'3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCC' +
'A000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-' +
'A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3' +
'A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-' +
'AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-' +
'AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9' +
'FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3D' +
'FD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-' +
'FFCFFFD2-FFD7FFDA-FFDC'
).replace( /(\w{4})/g, '\\u$1' ),
// New line characters without and with \n and \r
'regExpNewLines': '\\u0085\\u2028',
'regExpNewLinesAll': '\\n\\r\\u0085\\u2028',
// Breaking white space characters without \n, \r, and \f
'regExpBlanks': ' \\t\\x0b\\u2000-\\u200b\\u202f\\u205f\\u3000',
// Full stops without '.'
'regExpFullStops':
'\\u0589\\u06D4\\u0701\\u0702\\u0964\\u0DF4\\u1362\\u166E\\u1803\\u1809' +
'\\u2CF9\\u2CFE\\u2E3C\\u3002\\uA4FF\\uA60E\\uA6F3\\uFE52\\uFF0E\\uFF61',
// New paragraph characters without \n and \r
'regExpNewParagraph': '\\f\\u2029',
// Exclamation marks without '!'
'regExpExclamationMarks':
'\\u01C3\\u01C3\\u01C3\\u055C\\u055C\\u07F9\\u1944\\u1944' +
'\\u203C\\u203C\\u2048\\u2048\\uFE15\\uFE57\\uFF01',
// Question marks without '?'
'regExpQuestionMarks':
'\\u037E\\u055E\\u061F\\u1367\\u1945\\u2047\\u2049' +
'\\u2CFA\\u2CFB\\u2E2E\\uA60F\\uA6F7\\uFE56\\uFF1F',
/** Clip settings. */
// Find clip position: characters from right
'clipHeadingLeft': 1500,
'clipParagraphLeftMax': 1500,
'clipParagraphLeftMin': 500,
'clipLineLeftMax': 1000,
'clipLineLeftMin': 500,
'clipBlankLeftMax': 1000,
'clipBlankLeftMin': 500,
'clipCharsLeft': 500,
// Find clip position: characters from right
'clipHeadingRight': 1500,
'clipParagraphRightMax': 1500,
'clipParagraphRightMin': 500,
'clipLineRightMax': 1000,
'clipLineRightMin': 500,
'clipBlankRightMax': 1000,
'clipBlankRightMin': 500,
'clipCharsRight': 500,
// Maximum number of lines to search for clip position
'clipLinesRightMax': 10,
'clipLinesLeftMax': 10,
// Skip clipping if ranges are too close
'clipSkipLines': 5,
'clipSkipChars': 1000,
// Css stylesheet
'cssMarkLeft': '◀',
'cssMarkRight': '▶',
'stylesheet':
// Insert
'.wikEdDiffInsert {' +
'font-weight: bold; background-color: #bbddff; ' +
'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
'} ' +
'.wikEdDiffInsertBlank { background-color: #66bbff; } ' +
'.wikEdDiffFragment:hover .wikEdDiffInsertBlank { background-color: #bbddff; } ' +
// Delete
'.wikEdDiffDelete {' +
'font-weight: bold; background-color: #ffe49c; ' +
'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
'} ' +
'.wikEdDiffDeleteBlank { background-color: #ffd064; } ' +
'.wikEdDiffFragment:hover .wikEdDiffDeleteBlank { background-color: #ffe49c; } ' +
// Block
'.wikEdDiffBlock {' +
'font-weight: bold; background-color: #e8e8e8; ' +
'border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; ' +
'} ' +
'.wikEdDiffBlock { color: #000; } ' +
'.wikEdDiffBlock0 { background-color: #ffff80; } ' +
'.wikEdDiffBlock1 { background-color: #d0ff80; } ' +
'.wikEdDiffBlock2 { background-color: #ffd8f0; } ' +
'.wikEdDiffBlock3 { background-color: #c0ffff; } ' +
'.wikEdDiffBlock4 { background-color: #fff888; } ' +
'.wikEdDiffBlock5 { background-color: #bbccff; } ' +
'.wikEdDiffBlock6 { background-color: #e8c8ff; } ' +
'.wikEdDiffBlock7 { background-color: #ffbbbb; } ' +
'.wikEdDiffBlock8 { background-color: #a0e8a0; } ' +
'.wikEdDiffBlockHighlight {' +
'background-color: #777; color: #fff; ' +
'border: solid #777; border-width: 1px 0; ' +
'} ' +
// Mark
'.wikEdDiffMarkLeft, .wikEdDiffMarkRight {' +
'font-weight: bold; background-color: #ffe49c; ' +
'color: #666; border-radius: 0.25em; padding: 0.2em; margin: 0 1px; ' +
'} ' +
'.wikEdDiffMarkLeft:before { content: "{cssMarkLeft}"; } ' +
'.wikEdDiffMarkRight:before { content: "{cssMarkRight}"; } ' +
'.wikEdDiffMarkLeft.wikEdDiffNoUnicode:before { content: "<"; } ' +
'.wikEdDiffMarkRight.wikEdDiffNoUnicode:before { content: ">"; } ' +
'.wikEdDiffMark { background-color: #e8e8e8; color: #666; } ' +
'.wikEdDiffMark0 { background-color: #ffff60; } ' +
'.wikEdDiffMark1 { background-color: #c8f880; } ' +
'.wikEdDiffMark2 { background-color: #ffd0f0; } ' +
'.wikEdDiffMark3 { background-color: #a0ffff; } ' +
'.wikEdDiffMark4 { background-color: #fff860; } ' +
'.wikEdDiffMark5 { background-color: #b0c0ff; } ' +
'.wikEdDiffMark6 { background-color: #e0c0ff; } ' +
'.wikEdDiffMark7 { background-color: #ffa8a8; } ' +
'.wikEdDiffMark8 { background-color: #98e898; } ' +
'.wikEdDiffMarkHighlight { background-color: #777; color: #fff; } ' +
// Wrappers
'.wikEdDiffContainer { } ' +
'.wikEdDiffFragment {' +
'white-space: pre-wrap; background-color: var(--background-color-base, #fff); border: #bbb solid; ' +
'border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; ' +
'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; ' +
'} ' +
'.wikEdDiffNoChange { background: #f0f0f0; border: 1px #bbb solid; border-radius: 0.5em; ' +
'line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0; ' +
'text-align: center; ' +
'} ' +
'.wikEdDiffSeparator { margin-bottom: 1em; } ' +
'.wikEdDiffOmittedChars { } ' +
// Newline
'.wikEdDiffNewline:before { content: "¶"; color: transparent; } ' +
'.wikEdDiffBlock:hover .wikEdDiffNewline:before { color: #aaa; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffNewline:before { color: transparent; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffNewline:before { color: #ccc; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffNewline:before, ' +
'.wikEdDiffInsert:hover .wikEdDiffNewline:before' +
'{ color: #999; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffNewline:before, ' +
'.wikEdDiffDelete:hover .wikEdDiffNewline:before' +
'{ color: #aaa; } ' +
// Tab
'.wikEdDiffTab { position: relative; } ' +
'.wikEdDiffTabSymbol { position: absolute; top: -0.2em; } ' +
'.wikEdDiffTabSymbol:before { content: "→"; font-size: smaller; color: #ccc; } ' +
'.wikEdDiffBlock .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffInsert .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffDelete .wikEdDiffTabSymbol:before { color: #bbb; } ' +
// Space
'.wikEdDiffSpace { position: relative; } ' +
'.wikEdDiffSpaceSymbol { position: absolute; top: -0.2em; left: -0.05em; } ' +
'.wikEdDiffSpaceSymbol:before { content: "·"; color: transparent; } ' +
'.wikEdDiffBlock:hover .wikEdDiffSpaceSymbol:before { color: #999; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffSpaceSymbol:before { color: transparent; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffSpaceSymbol:before { color: #ddd; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffSpaceSymbol:before,' +
'.wikEdDiffInsert:hover .wikEdDiffSpaceSymbol:before ' +
'{ color: #888; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffSpaceSymbol:before,' +
'.wikEdDiffDelete:hover .wikEdDiffSpaceSymbol:before ' +
'{ color: #999; } ' +
// Error
'.wikEdDiffError .wikEdDiffFragment,' +
'.wikEdDiffError .wikEdDiffNoChange' +
'{ background: #faa; }'
};
/** Add regular expressions to configuration settings. */
this.config.regExp = {
// RegExps for splitting text
'split': {
// Split into paragraphs, after double newlines
'paragraph': new RegExp(
'(\\r\\n|\\n|\\r){2,}|[' +
this.config.regExpNewParagraph +
']',
'g'
),
// Split into lines
'line': new RegExp(
'\\r\\n|\\n|\\r|[' +
this.config.regExpNewLinesAll +
']',
'g'
),
// Split into sentences /[^ ].*?[.!?:;]+(?= |$)/
'sentence': new RegExp(
'[^' +
this.config.regExpBlanks +
'].*?[.!?:;' +
this.config.regExpFullStops +
this.config.regExpExclamationMarks +
this.config.regExpQuestionMarks +
']+(?=[' +
this.config.regExpBlanks +
']|$)',
'g'
),
// Split into inline chunks
'chunk': new RegExp(
'\\[\\[[^\\[\\]\\n]+\\]\\]|' + // [[wiki link]]
'\\{\\{[^\\{\\}\\n]+\\}\\}|' + // {{template}}
'\\[[^\\[\\]\\n]+\\]|' + // [ext. link]
'<\\/?[^<>\\[\\]\\{\\}\\n]+>|' + // <html>
'\\[\\[[^\\[\\]\\|\\n]+\\]\\]\\||' + // [[wiki link|
'\\{\\{[^\\{\\}\\|\\n]+\\||' + // {{template|
'\\b((https?:|)\\/\\/)[^\\x00-\\x20\\s"\\[\\]\\x7f]+', // link
'g'
),
// Split into words, multi-char markup, and chars
// regExpLetters speed-up: \\w+
'word': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’][_' +
this.config.regExpLetters +
']*)*|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.',
'g'
),
// Split into chars
'character': /./g
},
// RegExp to detect blank tokens
'blankOnlyToken': new RegExp(
'[^' +
this.config.regExpBlanks +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']'
),
// RegExps for sliding gaps: newlines and space/word breaks
'slideStop': new RegExp(
'[' +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']$'
),
'slideBorder': new RegExp(
'[' +
this.config.regExpBlanks +
']$'
),
// RegExps for counting words
'countWords': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’][_' +
this.config.regExpLetters +
']*)*',
'g'
),
'countChunks': new RegExp(
'\\[\\[[^\\[\\]\\n]+\\]\\]|' + // [[wiki link]]
'\\{\\{[^\\{\\}\\n]+\\}\\}|' + // {{template}}
'\\[[^\\[\\]\\n]+\\]|' + // [ext. link]
'<\\/?[^<>\\[\\]\\{\\}\\n]+>|' + // <html>
'\\[\\[[^\\[\\]\\|\\n]+\\]\\]\\||' + // [[wiki link|
'\\{\\{[^\\{\\}\\|\\n]+\\||' + // {{template|
'\\b((https?:|)\\/\\/)[^\\x00-\\x20\\s"\\[\\]\\x7f]+', // link
'g'
),
// RegExp detecting blank-only and single-char blocks
'blankBlock': /^([^\t\S]+|[^\t])$/,
// RegExps for clipping
'clipLine': new RegExp(
'[' + this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']+',
'g'
),
'clipHeading': new RegExp(
'( ^|\\n)(==+.+?==+|\\{\\||\\|\\}).*?(?=\\n|$)', 'g' ),
'clipParagraph': new RegExp(
'( (\\r\\n|\\n|\\r){2,}|[' +
this.config.regExpNewParagraph +
'])+',
'g'
),
'clipBlank': new RegExp(
'[' +
this.config.regExpBlanks + ']+',
'g'
),
'clipTrimNewLinesLeft': new RegExp(
'[' +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']+$',
'g'
),
'clipTrimNewLinesRight': new RegExp(
'^[' +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']+',
'g'
),
'clipTrimBlanksLeft': new RegExp(
'[' +
this.config.regExpBlanks +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']+$',
'g'
),
'clipTrimBlanksRight': new RegExp(
'^[' +
this.config.regExpBlanks +
this.config.regExpNewLinesAll +
this.config.regExpNewParagraph +
']+',
'g'
)
};
/** Add messages to configuration settings. */
this.config.msg = {
'wiked-diff-empty': '(No difference)',
'wiked-diff-same': '=',
'wiked-diff-ins': '+',
'wiked-diff-del': '-',
'wiked-diff-block-left': '◀',
'wiked-diff-block-right': '▶',
'wiked-diff-block-left-nounicode': '<',
'wiked-diff-block-right-nounicode': '>',
'wiked-diff-error': 'Error: diff not consistent with versions!'
};
/**
* Add output html fragments to configuration settings.
* Dynamic replacements:
* {number}: class/color/block/mark/id number
* {title}: title attribute (popup)
* {nounicode}: noUnicodeSymbols fallback
*/
this.config.htmlCode = {
'noChangeStart':
'<div class="wikEdDiffNoChange" title="' +
this.config.msg['wiked-diff-same'] +
'">',
'noChangeEnd': '</div>',
'containerStart': '<div class="wikEdDiffContainer" id="wikEdDiffContainer">',
'containerEnd': '</div>',
'fragmentStart': '<pre class="wikEdDiffFragment" style="white-space: pre-wrap;">',
'fragmentEnd': '</pre>',
'separator': '<div class="wikEdDiffSeparator"></div>',
'insertStart':
'<span class="wikEdDiffInsert" title="' +
this.config.msg['wiked-diff-ins'] +
'">',
'insertStartBlank':
'<span class="wikEdDiffInsert wikEdDiffInsertBlank" title="' +
this.config.msg['wiked-diff-ins'] +
'">',
'insertEnd': '</span>',
'deleteStart':
'<span class="wikEdDiffDelete" title="' +
this.config.msg['wiked-diff-del'] +
'">',
'deleteStartBlank':
'<span class="wikEdDiffDelete wikEdDiffDeleteBlank" title="' +
this.config.msg['wiked-diff-del'] +
'">',
'deleteEnd': '</span>',
'blockStart':
'<span class="wikEdDiffBlock"' +
'title="{title}" id="wikEdDiffBlock{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');">',
'blockColoredStart':
'<span class="wikEdDiffBlock wikEdDiffBlock wikEdDiffBlock{number}"' +
'title="{title}" id="wikEdDiffBlock{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');">',
'blockEnd': '</span>',
'markLeft':
'<span class="wikEdDiffMarkLeft{nounicode}"' +
'title="{title}" id="wikEdDiffMark{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');"></span>',
'markLeftColored':
'<span class="wikEdDiffMarkLeft{nounicode} wikEdDiffMark wikEdDiffMark{number}"' +
'title="{title}" id="wikEdDiffMark{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');"></span>',
'markRight':
'<span class="wikEdDiffMarkRight{nounicode}"' +
'title="{title}" id="wikEdDiffMark{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');"></span>',
'markRightColored':
'<span class="wikEdDiffMarkRight{nounicode} wikEdDiffMark wikEdDiffMark{number}"' +
'title="{title}" id="wikEdDiffMark{number}"' +
'onmouseover="wikEdDiffBlockHandler(undefined, this, \'mouseover\');"></span>',
'newline': '<span class="wikEdDiffNewline">\n</span>',
'tab': '<span class="wikEdDiffTab"><span class="wikEdDiffTabSymbol"></span>\t</span>',
'space': '<span class="wikEdDiffSpace"><span class="wikEdDiffSpaceSymbol"></span> </span>',
'omittedChars': '<span class="wikEdDiffOmittedChars">…</span>',
'errorStart': '<div class="wikEdDiffError" title="Error: diff not consistent with versions!">',
'errorEnd': '</div>'
};
/*
* Add JavaScript event handler function to configuration settings
* Highlights corresponding block and mark elements on hover and jumps between them on click
* Code for use in non-jQuery environments and legacy browsers (at least IE 8 compatible)
*
* @option Event|undefined event Browser event if available
* @option element Node DOM node
* @option type string Event type
*/
this.config.blockHandler = function ( event, element, type ) {
// IE compatibility
if ( event === undefined && window.event !== undefined ) {
event = window.event;
}
// Get mark/block elements
var number = element.id.replace( /\D/g, '' );
var block = document.getElementById( 'wikEdDiffBlock' + number );
var mark = document.getElementById( 'wikEdDiffMark' + number );
if ( block === null || mark === null ) {
return;
}
// Highlight corresponding mark/block pairs
if ( type === 'mouseover' ) {
element.onmouseover = null;
element.onmouseout = function ( event ) {
window.wikEdDiffBlockHandler( event, element, 'mouseout' );
};
element.onclick = function ( event ) {
window.wikEdDiffBlockHandler( event, element, 'click' );
};
block.className += ' wikEdDiffBlockHighlight';
mark.className += ' wikEdDiffMarkHighlight';
}
// Remove mark/block highlighting
if ( type === 'mouseout' || type === 'click' ) {
element.onmouseout = null;
element.onmouseover = function ( event ) {
window.wikEdDiffBlockHandler( event, element, 'mouseover' );
};
// Reset, allow outside container (e.g. legend)
if ( type !== 'click' ) {
block.className = block.className.replace( / wikEdDiffBlockHighlight/g, '' );
mark.className = mark.className.replace( / wikEdDiffMarkHighlight/g, '' );
// GetElementsByClassName
var container = document.getElementById( 'wikEdDiffContainer' );
if ( container !== null ) {
var spans = container.getElementsByTagName( 'span' );
var spansLength = spans.length;
for ( var i = 0; i < spansLength; i ++ ) {
if ( spans[i] !== block && spans[i] !== mark ) {
if ( spans[i].className.indexOf( ' wikEdDiffBlockHighlight' ) !== -1 ) {
spans[i].className = spans[i].className.replace( / wikEdDiffBlockHighlight/g, '' );
}
else if ( spans[i].className.indexOf( ' wikEdDiffMarkHighlight') !== -1 ) {
spans[i].className = spans[i].className.replace( / wikEdDiffMarkHighlight/g, '' );
}
}
}
}
}
}
// Scroll to corresponding mark/block element
if ( type === 'click' ) {
// Get corresponding element
var corrElement;
if ( element === block ) {
corrElement = mark;
}
else {
corrElement = block;
}
// Get element height (getOffsetTop)
var corrElementPos = 0;
var node = corrElement;
do {
corrElementPos += node.offsetTop;
} while ( ( node = node.offsetParent ) !== null );
// Get scroll height
var top;
if ( window.pageYOffset !== undefined ) {
top = window.pageYOffset;
}
else {
top = document.documentElement.scrollTop;
}
// Get cursor pos
var cursor;
if ( event.pageY !== undefined ) {
cursor = event.pageY;
}
else if ( event.clientY !== undefined ) {
cursor = event.clientY + top;
}
// Get line height
var line = 12;
if ( window.getComputedStyle !== undefined ) {
line = parseInt( window.getComputedStyle( corrElement ).getPropertyValue( 'line-height' ) );
}
// Scroll element under mouse cursor
window.scroll( 0, corrElementPos + top - cursor + line / 2 );
}
return;
};
/** Internal data structures. */
/** @var WikEdDiffText newText New text version object with text and token list */
this.newText = null;
/** @var WikEdDiffText oldText Old text version object with text and token list */
this.oldText = null;
/** @var object symbols Symbols table for whole text at all refinement levels */
this.symbols = {
token: [],
hashTable: {},
linked: false
};
/** @var array bordersDown Matched region borders downwards */
this.bordersDown = [];
/** @var array bordersUp Matched region borders upwards */
this.bordersUp = [];
/** @var array blocks Block data (consecutive text tokens) in new text order */
this.blocks = [];
/** @var int maxWords Maximal detected word count of all linked blocks */
this.maxWords = 0;
/** @var array groups Section blocks that are consecutive in old text order */
this.groups = [];
/** @var array sections Block sections with no block move crosses outside a section */
this.sections = [];
/** @var object timer Debug timer array: string 'label' => float milliseconds. */
this.timer = {};
/** @var array recursionTimer Count time spent in recursion level in milliseconds. */
this.recursionTimer = [];
/** Output data. */
/** @var bool error Unit tests have detected a diff error */
this.error = false;
/** @var array fragments Diff fragment list for markup, abstraction layer for customization */
this.fragments = [];
/** @var string html Html code of diff */
this.html = '';
/**
* Constructor, initialize settings, load js and css.
*
* @param[in] object wikEdDiffConfig Custom customization settings
* @param[out] object config Settings
*/
this.init = function () {
// Import customizations from wikEdDiffConfig{}
if ( typeof wikEdDiffConfig === 'object' ) {
this.deepCopy( wikEdDiffConfig, this.config );
}
// Add CSS stylescheet
this.addStyleSheet( this.config.stylesheet );
// Load block handler script
if ( this.config.showBlockMoves === true ) {
// Add block handler to head if running under Greasemonkey
if ( typeof GM_info === 'object' ) {
var script = 'var wikEdDiffBlockHandler = ' + this.config.blockHandler.toString() + ';';
this.addScript( script );
}
else {
window.wikEdDiffBlockHandler = this.config.blockHandler;
}
}
return;
};
/**
* Main diff method.
*
* @param string oldString Old text version
* @param string newString New text version
* @param[out] array fragment
* Diff fragment list ready for markup, abstraction layer for customized diffs
* @param[out] string html Html code of diff
* @return string Html code of diff
*/
this.diff = function ( oldString, newString ) {
// Start total timer
if ( this.config.timer === true ) {
this.time( 'total' );
}
// Start diff timer
if ( this.config.timer === true ) {
this.time( 'diff' );
}
// Reset error flag
this.error = false;
// Strip trailing newline (.js only)
if ( this.config.stripTrailingNewline === true ) {
if ( newString.substr( -1 ) === '\n' && oldString.substr( -1 === '\n' ) ) {
newString = newString.substr( 0, newString.length - 1 );
oldString = oldString.substr( 0, oldString.length - 1 );
}
}
// Load version strings into WikEdDiffText objects
this.newText = new WikEdDiff.WikEdDiffText( newString, this );
this.oldText = new WikEdDiff.WikEdDiffText( oldString, this );
// Trap trivial changes: no change
if ( this.newText.text === this.oldText.text ) {
this.html =
this.config.htmlCode.containerStart +
this.config.htmlCode.noChangeStart +
this.htmlEscape( this.config.msg['wiked-diff-empty'] ) +
this.config.htmlCode.noChangeEnd +
this.config.htmlCode.containerEnd;
return this.html;
}
// Trap trivial changes: old text deleted
if (
this.oldText.text === '' || (
this.oldText.text === '\n' &&
( this.newText.text.charAt( this.newText.text.length - 1 ) === '\n' )
)
) {
this.html =
this.config.htmlCode.containerStart +
this.config.htmlCode.fragmentStart +
this.config.htmlCode.insertStart +
this.htmlEscape( this.newText.text ) +
this.config.htmlCode.insertEnd +
this.config.htmlCode.fragmentEnd +
this.config.htmlCode.containerEnd;
return this.html;
}
// Trap trivial changes: new text deleted
if (
this.newText.text === '' || (
this.newText.text === '\n' &&
( this.oldText.text.charAt( this.oldText.text.length - 1 ) === '\n' )
)
) {
this.html =
this.config.htmlCode.containerStart +
this.config.htmlCode.fragmentStart +
this.config.htmlCode.deleteStart +
this.htmlEscape( this.oldText.text ) +
this.config.htmlCode.deleteEnd +
this.config.htmlCode.fragmentEnd +
this.config.htmlCode.containerEnd;
return this.html;
}
// Split new and old text into paragraps
if ( this.config.timer === true ) {
this.time( 'paragraph split' );
}
this.newText.splitText( 'paragraph' );
this.oldText.splitText( 'paragraph' );
if ( this.config.timer === true ) {
this.timeEnd( 'paragraph split' );
}
// Calculate diff
this.calculateDiff( 'line' );
// Refine different paragraphs into lines
if ( this.config.timer === true ) {
this.time( 'line split' );
}
this.newText.splitRefine( 'line' );
this.oldText.splitRefine( 'line' );
if ( this.config.timer === true ) {
this.timeEnd( 'line split' );
}
// Calculate refined diff
this.calculateDiff( 'line' );
// Refine different lines into sentences
if ( this.config.timer === true ) {
this.time( 'sentence split' );
}
this.newText.splitRefine( 'sentence' );
this.oldText.splitRefine( 'sentence' );
if ( this.config.timer === true ) {
this.timeEnd( 'sentence split' );
}
// Calculate refined diff
this.calculateDiff( 'sentence' );
// Refine different sentences into chunks
if ( this.config.timer === true ) {
this.time( 'chunk split' );
}
this.newText.splitRefine( 'chunk' );
this.oldText.splitRefine( 'chunk' );
if ( this.config.timer === true ) {
this.timeEnd( 'chunk split' );
}
// Calculate refined diff
this.calculateDiff( 'chunk' );
// Refine different chunks into words
if ( this.config.timer === true ) {
this.time( 'word split' );
}
this.newText.splitRefine( 'word' );
this.oldText.splitRefine( 'word' );
if ( this.config.timer === true ) {
this.timeEnd( 'word split' );
}
// Calculate refined diff information with recursion for unresolved gaps
this.calculateDiff( 'word', true );
// Slide gaps
if ( this.config.timer === true ) {
this.time( 'word slide' );
}
this.slideGaps( this.newText, this.oldText );
this.slideGaps( this.oldText, this.newText );
if ( this.config.timer === true ) {
this.timeEnd( 'word slide' );
}
// Split tokens into chars
if ( this.config.charDiff === true ) {
// Split tokens into chars in selected unresolved gaps
if ( this.config.timer === true ) {
this.time( 'character split' );
}
this.splitRefineChars();
if ( this.config.timer === true ) {
this.timeEnd( 'character split' );
}
// Calculate refined diff information with recursion for unresolved gaps
this.calculateDiff( 'character', true );
// Slide gaps
if ( this.config.timer === true ) {
this.time( 'character slide' );
}
this.slideGaps( this.newText, this.oldText );
this.slideGaps( this.oldText, this.newText );
if ( this.config.timer === true ) {
this.timeEnd( 'character slide' );
}
}
// Free memory
this.symbols = undefined;
this.bordersDown = undefined;
this.bordersUp = undefined;
this.newText.words = undefined;
this.oldText.words = undefined;
// Enumerate token lists
this.newText.enumerateTokens();
this.oldText.enumerateTokens();
// Detect moved blocks
if ( this.config.timer === true ) {
this.time( 'blocks' );
}
this.detectBlocks();
if ( this.config.timer === true ) {
this.timeEnd( 'blocks' );
}
// Free memory
this.newText.tokens = undefined;
this.oldText.tokens = undefined;
// Assemble blocks into fragment table
this.getDiffFragments();
// Free memory
this.blocks = undefined;
this.groups = undefined;
this.sections = undefined;
// Stop diff timer
if ( this.config.timer === true ) {
this.timeEnd( 'diff' );
}
// Unit tests
if ( this.config.unitTesting === true ) {
// Test diff to test consistency between input and output
if ( this.config.timer === true ) {
this.time( 'unit tests' );
}
this.unitTests();
if ( this.config.timer === true ) {
this.timeEnd( 'unit tests' );
}
}
// Clipping
if ( this.config.fullDiff === false ) {
// Clipping unchanged sections from unmoved block text
if ( this.config.timer === true ) {
this.time( 'clip' );
}
this.clipDiffFragments();
if ( this.config.timer === true ) {
this.timeEnd( 'clip' );
}
}
// Create html formatted diff code from diff fragments
if ( this.config.timer === true ) {
this.time( 'html' );
}
this.getDiffHtml();
if ( this.config.timer === true ) {
this.timeEnd( 'html' );
}
// No change
if ( this.html === '' ) {
this.html =
this.config.htmlCode.containerStart +
this.config.htmlCode.noChangeStart +
this.htmlEscape( this.config.msg['wiked-diff-empty'] ) +
this.config.htmlCode.noChangeEnd +
this.config.htmlCode.containerEnd;
}
// Add error indicator
if ( this.error === true ) {
this.html = this.config.htmlCode.errorStart + this.html + this.config.htmlCode.errorEnd;
}
// Stop total timer
if ( this.config.timer === true ) {
this.timeEnd( 'total' );
}
return this.html;
};
/**
* Split tokens into chars in the following unresolved regions (gaps):
* - One token became connected or separated by space or dash (or any token)
* - Same number of tokens in gap and strong similarity of all tokens:
* - Addition or deletion of flanking strings in tokens
* - Addition or deletion of internal string in tokens
* - Same length and at least 50 % identity
* - Same start or end, same text longer than different text
* Identical tokens including space separators will be linked,
* resulting in word-wise char-level diffs
*
* @param[in/out] WikEdDiffText newText, oldText Text object tokens list
*/
this.splitRefineChars = function () {
/** Find corresponding gaps. */
// Cycle through new text tokens list
var gaps = [];
var gap = null;
var i = this.newText.first;
var j = this.oldText.first;
while ( i !== null ) {
// Get token links
var newLink = this.newText.tokens[i].link;
var oldLink = null;
if ( j !== null ) {
oldLink = this.oldText.tokens[j].link;
}
// Start of gap in new and old
if ( gap === null && newLink === null && oldLink === null ) {
gap = gaps.length;
gaps.push( {
newFirst: i,
newLast: i,
newTokens: 1,
oldFirst: j,
oldLast: j,
oldTokens: null,
charSplit: null
} );
}
// Count chars and tokens in gap
else if ( gap !== null && newLink === null ) {
gaps[gap].newLast = i;
gaps[gap].newTokens ++;
}
// Gap ended
else if ( gap !== null && newLink !== null ) {
gap = null;
}
// Next list elements
if ( newLink !== null ) {
j = this.oldText.tokens[newLink].next;
}
i = this.newText.tokens[i].next;
}
// Cycle through gaps and add old text gap data
var gapsLength = gaps.length;
for ( var gap = 0; gap < gapsLength; gap ++ ) {
// Cycle through old text tokens list
var j = gaps[gap].oldFirst;
while (
j !== null &&
this.oldText.tokens[j] !== null &&
this.oldText.tokens[j].link === null
) {
// Count old chars and tokens in gap
gaps[gap].oldLast = j;
gaps[gap].oldTokens ++;
j = this.oldText.tokens[j].next;
}
}
/** Select gaps of identical token number and strong similarity of all tokens. */
var gapsLength = gaps.length;
for ( var gap = 0; gap < gapsLength; gap ++ ) {
var charSplit = true;
// Not same gap length
if ( gaps[gap].newTokens !== gaps[gap].oldTokens ) {
// One word became separated by space, dash, or any string
if ( gaps[gap].newTokens === 1 && gaps[gap].oldTokens === 3 ) {
var token = this.newText.tokens[ gaps[gap].newFirst ].token;
var tokenFirst = this.oldText.tokens[ gaps[gap].oldFirst ].token;
var tokenLast = this.oldText.tokens[ gaps[gap].oldLast ].token;
if (
token.indexOf( tokenFirst ) !== 0 ||
token.indexOf( tokenLast ) !== token.length - tokenLast.length
) {
continue;
}
}
else if ( gaps[gap].oldTokens === 1 && gaps[gap].newTokens === 3 ) {
var token = this.oldText.tokens[ gaps[gap].oldFirst ].token;
var tokenFirst = this.newText.tokens[ gaps[gap].newFirst ].token;
var tokenLast = this.newText.tokens[ gaps[gap].newLast ].token;
if (
token.indexOf( tokenFirst ) !== 0 ||
token.indexOf( tokenLast ) !== token.length - tokenLast.length
) {
continue;
}
}
else {
continue;
}
gaps[gap].charSplit = true;
}
// Cycle through new text tokens list and set charSplit
else {
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
while ( i !== null ) {
var newToken = this.newText.tokens[i].token;
var oldToken = this.oldText.tokens[j].token;
// Get shorter and longer token
var shorterToken;
var longerToken;
if ( newToken.length < oldToken.length ) {
shorterToken = newToken;
longerToken = oldToken;
}
else {
shorterToken = oldToken;
longerToken = newToken;
}
// Not same token length
if ( newToken.length !== oldToken.length ) {
// Test for addition or deletion of internal string in tokens
// Find number of identical chars from left
var left = 0;
while ( left < shorterToken.length ) {
if ( newToken.charAt( left ) !== oldToken.charAt( left ) ) {
break;
}
left ++;
}
// Find number of identical chars from right
var right = 0;
while ( right < shorterToken.length ) {
if (
newToken.charAt( newToken.length - 1 - right ) !==
oldToken.charAt( oldToken.length - 1 - right )
) {
break;
}
right ++;
}
// No simple insertion or deletion of internal string
if ( left + right !== shorterToken.length ) {
// Not addition or deletion of flanking strings in tokens
// Smaller token not part of larger token
if ( longerToken.indexOf( shorterToken ) === -1 ) {
// Same text at start or end shorter than different text
if ( left < shorterToken.length / 2 && (right < shorterToken.length / 2) ) {
// Do not split into chars in this gap
charSplit = false;
break;
}
}
}
}
// Same token length
else if ( newToken !== oldToken ) {
// Tokens less than 50 % identical
var ident = 0;
var tokenLength = shorterToken.length;
for ( var pos = 0; pos < tokenLength; pos ++ ) {
if ( shorterToken.charAt( pos ) === longerToken.charAt( pos ) ) {
ident ++;
}
}
if ( ident / shorterToken.length < 0.49 ) {
// Do not split into chars this gap
charSplit = false;
break;
}
}
// Next list elements
if ( i === gaps[gap].newLast ) {
break;
}
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
gaps[gap].charSplit = charSplit;
}
}
/** Refine words into chars in selected gaps. */
var gapsLength = gaps.length;
for ( var gap = 0; gap < gapsLength; gap ++ ) {
if ( gaps[gap].charSplit === true ) {
// Cycle through new text tokens list, link spaces, and split into chars
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
var newGapLength = i - gaps[gap].newLast;
var oldGapLength = j - gaps[gap].oldLast;
while ( i !== null || j !== null ) {
// Link identical tokens (spaces) to keep char refinement to words
if (
newGapLength === oldGapLength &&
this.newText.tokens[i].token === this.oldText.tokens[j].token
) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
}
// Refine words into chars
else {
if ( i !== null ) {
this.newText.splitText( 'character', i );
}
if ( j !== null ) {
this.oldText.splitText( 'character', j );
}
}
// Next list elements
if ( i === gaps[gap].newLast ) {
i = null;
}
if ( j === gaps[gap].oldLast ) {
j = null;
}
if ( i !== null ) {
i = this.newText.tokens[i].next;
}
if ( j !== null ) {
j = this.oldText.tokens[j].next;
}
}
}
}
return;
};
/**
* Move gaps with ambiguous identical fronts to last newline border or otherwise last word border.
*
* @param[in/out] wikEdDiffText text, textLinked These two are newText and oldText
*/
this.slideGaps = function ( text, textLinked ) {
var regExpSlideBorder = this.config.regExp.slideBorder;
var regExpSlideStop = this.config.regExp.slideStop;
// Cycle through tokens list
var i = text.first;
var gapStart = null;
while ( i !== null ) {
// Remember gap start
if ( gapStart === null && text.tokens[i].link === null ) {
gapStart = i;
}
// Find gap end
else if ( gapStart !== null && text.tokens[i].link !== null ) {
var gapFront = gapStart;
var gapBack = text.tokens[i].prev;
// Slide down as deep as possible
var front = gapFront;
var back = text.tokens[gapBack].next;
if (
front !== null &&
back !== null &&
text.tokens[front].link === null &&
text.tokens[back].link !== null &&
text.tokens[front].token === text.tokens[back].token
) {
text.tokens[front].link = text.tokens[back].link;
textLinked.tokens[ text.tokens[front].link ].link = front;
text.tokens[back].link = null;
gapFront = text.tokens[gapFront].next;
gapBack = text.tokens[gapBack].next;
front = text.tokens[front].next;
back = text.tokens[back].next;
}
// Test slide up, remember last line break or word border
var front = text.tokens[gapFront].prev;
var back = gapBack;
var gapFrontBlankTest = regExpSlideBorder.test( text.tokens[gapFront].token );
var frontStop = front;
if ( text.tokens[back].link === null ) {
while (
front !== null &&
back !== null &&
text.tokens[front].link !== null &&
text.tokens[front].token === text.tokens[back].token
) {
if ( front !== null ) {
// Stop at line break
if ( regExpSlideStop.test( text.tokens[front].token ) === true ) {
frontStop = front;
break;
}
// Stop at first word border (blank/word or word/blank)
if (
regExpSlideBorder.test( text.tokens[front].token ) !== gapFrontBlankTest ) {
frontStop = front;
}
}
front = text.tokens[front].prev;
back = text.tokens[back].prev;
}
}
// Actually slide up to stop
var front = text.tokens[gapFront].prev;
var back = gapBack;
while (
front !== null &&
back !== null &&
front !== frontStop &&
text.tokens[front].link !== null &&
text.tokens[back].link === null &&
text.tokens[front].token === text.tokens[back].token
) {
text.tokens[back].link = text.tokens[front].link;
textLinked.tokens[ text.tokens[back].link ].link = back;
text.tokens[front].link = null;
front = text.tokens[front].prev;
back = text.tokens[back].prev;
}
gapStart = null;
}
i = text.tokens[i].next;
}
return;
};
/**
* Calculate diff information, can be called repeatedly during refining.
* Links corresponding tokens from old and new text.
* Steps:
* Pass 1: parse new text into symbol table
* Pass 2: parse old text into symbol table
* Pass 3: connect unique matching tokens
* Pass 4: connect adjacent identical tokens downwards
* Pass 5: connect adjacent identical tokens upwards
* Repeat with empty symbol table (against crossed-over gaps)
* Recursively diff still unresolved regions downwards with empty symbol table
* Recursively diff still unresolved regions upwards with empty symbol table
*
* @param array symbols Symbol table object
* @param string level Split level: 'paragraph', 'line', 'sentence', 'chunk', 'word', 'character'
*
* Optionally for recursive or repeated calls:
* @param bool repeating Currently repeating with empty symbol table
* @param bool recurse Enable recursion
* @param int newStart, newEnd, oldStart, oldEnd Text object tokens indices
* @param int recursionLevel Recursion level
* @param[in/out] WikEdDiffText newText, oldText Text object, tokens list link property
*/
this.calculateDiff = function (
level,
recurse,
repeating,
newStart,
oldStart,
up,
recursionLevel
) {
// Set defaults
if ( repeating === undefined ) { repeating = false; }
if ( recurse === undefined ) { recurse = false; }
if ( newStart === undefined ) { newStart = this.newText.first; }
if ( oldStart === undefined ) { oldStart = this.oldText.first; }
if ( up === undefined ) { up = false; }
if ( recursionLevel === undefined ) { recursionLevel = 0; }
// Start timers
if ( this.config.timer === true && repeating === false && recursionLevel === 0 ) {
this.time( level );
}
if ( this.config.timer === true && repeating === false ) {
this.time( level + recursionLevel );
}
// Get object symbols table and linked region borders
var symbols;
var bordersDown;
var bordersUp;
if ( recursionLevel === 0 && repeating === false ) {
symbols = this.symbols;
bordersDown = this.bordersDown;
bordersUp = this.bordersUp;
}
// Create empty local symbols table and linked region borders arrays
else {
symbols = {
token: [],
hashTable: {},
linked: false
};
bordersDown = [];
bordersUp = [];
}
// Updated versions of linked region borders
var bordersUpNext = [];
var bordersDownNext = [];
/**
* Pass 1: parse new text into symbol table.
*/
// Cycle through new text tokens list
var i = newStart;
while ( i !== null ) {
if ( this.newText.tokens[i].link === null ) {
// Add new entry to symbol table
var token = this.newText.tokens[i].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
symbols.hashTable[token] = symbols.token.length;
symbols.token.push( {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
} );
}
// Or update existing entry
else {
// Increment token counter for new text
var hashToArray = symbols.hashTable[token];
symbols.token[hashToArray].newCount ++;
}
}
// Stop after gap if recursing
else if ( recursionLevel > 0 ) {
break;
}
// Get next token
if ( up === false ) {
i = this.newText.tokens[i].next;
}
else {
i = this.newText.tokens[i].prev;
}
}
/**
* Pass 2: parse old text into symbol table.
*/
// Cycle through old text tokens list
var j = oldStart;
while ( j !== null ) {
if ( this.oldText.tokens[j].link === null ) {
// Add new entry to symbol table
var token = this.oldText.tokens[j].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
symbols.hashTable[token] = symbols.token.length;
symbols.token.push( {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
} );
}
// Or update existing entry
else {
// Increment token counter for old text
var hashToArray = symbols.hashTable[token];
symbols.token[hashToArray].oldCount ++;
// Add token number for old text
symbols.token[hashToArray].oldToken = j;
}
}
// Stop after gap if recursing
else if ( recursionLevel > 0 ) {
break;
}
// Get next token
if ( up === false ) {
j = this.oldText.tokens[j].next;
}
else {
j = this.oldText.tokens[j].prev;
}
}
/**
* Pass 3: connect unique tokens.
*/
// Cycle through symbol array
var symbolsLength = symbols.token.length;
for ( var i = 0; i < symbolsLength; i ++ ) {
// Find tokens in the symbol table that occur only once in both versions
if ( symbols.token[i].newCount === 1 && symbols.token[i].oldCount === 1 ) {
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
var newTokenObj = this.newText.tokens[newToken];
var oldTokenObj = this.oldText.tokens[oldToken];
// Connect from new to old and from old to new
if ( newTokenObj.link === null ) {
// Do not use spaces as unique markers
if (
this.config.regExp.blankOnlyToken.test( newTokenObj.token ) === true
) {
// Link new and old tokens
newTokenObj.link = oldToken;
oldTokenObj.link = newToken;
symbols.linked = true;
// Save linked region borders
bordersDown.push( [newToken, oldToken] );
bordersUp.push( [newToken, oldToken] );
// Check if token contains unique word
if ( recursionLevel === 0 ) {
var unique = false;
if ( level === 'character' ) {
unique = true;
}
else {
var token = newTokenObj.token;
var words =
( token.match( this.config.regExp.countWords ) || [] ).concat(
( token.match( this.config.regExp.countChunks ) || [] )
);
// Unique if longer than min block length
var wordsLength = words.length;
if ( wordsLength >= this.config.blockMinLength ) {
unique = true;
}
// Unique if it contains at least one unique word
else {
for ( var i = 0;i < wordsLength; i ++ ) {
var word = words[i];
if (
this.oldText.words[word] === 1 &&
this.newText.words[word] === 1 &&
Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true &&
Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true
) {
unique = true;
break;
}
}
}
}
// Set unique
if ( unique === true ) {
newTokenObj.unique = true;
oldTokenObj.unique = true;
}
}
}
}
}
}
// Continue passes only if unique tokens have been linked previously
if ( symbols.linked === true ) {
/**
* Pass 4: connect adjacent identical tokens downwards.
*/
// Cycle through list of linked new text tokens
var bordersLength = bordersDown.length;
for ( var match = 0; match < bordersLength; match ++ ) {
var i = bordersDown[match][0];
var j = bordersDown[match][1];
// Next down
var iMatch = i;
var jMatch = j;
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
// Cycle through new text list gap region downwards
while (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null
) {
// Connect if same token
if ( this.newText.tokens[i].token === this.oldText.tokens[j].token ) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
}
// Not a match yet, maybe in next refinement level
else {
bordersDownNext.push( [iMatch, jMatch] );
break;
}
// Next token down
iMatch = i;
jMatch = j;
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
}
/**
* Pass 5: connect adjacent identical tokens upwards.
*/
// Cycle through list of connected new text tokens
var bordersLength = bordersUp.length;
for ( var match = 0; match < bordersLength; match ++ ) {
var i = bordersUp[match][0];
var j = bordersUp[match][1];
// Next up
var iMatch = i;
var jMatch = j;
i = this.newText.tokens[i].prev;
j = this.oldText.tokens[j].prev;
// Cycle through new text gap region upwards
while (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null
) {
// Connect if same token
if ( this.newText.tokens[i].token === this.oldText.tokens[j].token ) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
}
// Not a match yet, maybe in next refinement level
else {
bordersUpNext.push( [iMatch, jMatch] );
break;
}
// Next token up
iMatch = i;
jMatch = j;
i = this.newText.tokens[i].prev;
j = this.oldText.tokens[j].prev;
}
}
/**
* Connect adjacent identical tokens downwards from text start.
* Treat boundary as connected, stop after first connected token.
*/
// Only for full text diff
if ( recursionLevel === 0 && repeating === false ) {
// From start
var i = this.newText.first;
var j = this.oldText.first;
var iMatch = null;
var jMatch = null;
// Cycle through old text tokens down
// Connect identical tokens, stop after first connected token
while (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null &&
this.newText.tokens[i].token === this.oldText.tokens[j].token
) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
iMatch = i;
jMatch = j;
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
if ( iMatch !== null ) {
bordersDownNext.push( [iMatch, jMatch] );
}
// From end
i = this.newText.last;
j = this.oldText.last;
iMatch = null;
jMatch = null;
// Cycle through old text tokens up
// Connect identical tokens, stop after first connected token
while (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null &&
this.newText.tokens[i].token === this.oldText.tokens[j].token
) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
iMatch = i;
jMatch = j;
i = this.newText.tokens[i].prev;
j = this.oldText.tokens[j].prev;
}
if ( iMatch !== null ) {
bordersUpNext.push( [iMatch, jMatch] );
}
}
// Save updated linked region borders to object
if ( recursionLevel === 0 && repeating === false ) {
this.bordersDown = bordersDownNext;
this.bordersUp = bordersUpNext;
}
// Merge local updated linked region borders into object
else {
this.bordersDown = this.bordersDown.concat( bordersDownNext );
this.bordersUp = this.bordersUp.concat( bordersUpNext );
}
/**
* Repeat once with empty symbol table to link hidden unresolved common tokens in cross-overs.
* ("and" in "and this a and b that" -> "and this a and b that")
*/
if ( repeating === false && this.config.repeatedDiff === true ) {
var repeat = true;
this.calculateDiff( level, recurse, repeat, newStart, oldStart, up, recursionLevel );
}
/**
* Refine by recursively diffing not linked regions with new symbol table.
* At word and character level only.
* Helps against gaps caused by addition of common tokens around sequences of common tokens.
*/
if (
recurse === true &&
this.config['recursiveDiff'] === true &&
recursionLevel < this.config.recursionMax
) {
/**
* Recursively diff gap downwards.
*/
// Cycle through list of linked region borders
var bordersLength = bordersDownNext.length;
for ( match = 0; match < bordersLength; match ++ ) {
var i = bordersDownNext[match][0];
var j = bordersDownNext[match][1];
// Next token down
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
// Start recursion at first gap token pair
if (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null
) {
var repeat = false;
var dirUp = false;
this.calculateDiff( level, recurse, repeat, i, j, dirUp, recursionLevel + 1 );
}
}
/**
* Recursively diff gap upwards.
*/
// Cycle through list of linked region borders
var bordersLength = bordersUpNext.length;
for ( match = 0; match < bordersLength; match ++ ) {
var i = bordersUpNext[match][0];
var j = bordersUpNext[match][1];
// Next token up
i = this.newText.tokens[i].prev;
j = this.oldText.tokens[j].prev;
// Start recursion at first gap token pair
if (
i !== null &&
j !== null &&
this.newText.tokens[i].link === null &&
this.oldText.tokens[j].link === null
) {
var repeat = false;
var dirUp = true;
this.calculateDiff( level, recurse, repeat, i, j, dirUp, recursionLevel + 1 );
}
}
}
}
// Stop timers
if ( this.config.timer === true && repeating === false ) {
if ( this.recursionTimer[recursionLevel] === undefined ) {
this.recursionTimer[recursionLevel] = 0;
}
this.recursionTimer[recursionLevel] += this.timeEnd( level + recursionLevel, true );
}
if ( this.config.timer === true && repeating === false && recursionLevel === 0 ) {
this.timeRecursionEnd( level );
this.timeEnd( level );
}
return;
};
/**
* Main method for processing raw diff data, extracting deleted, inserted, and moved blocks.
*
* Scheme of blocks, sections, and groups (old block numbers):
* Old: 1 2 3D4 5E6 7 8 9 10 11
* | ‾/-/_ X | >|< |
* New: 1 I 3D4 2 E6 5 N 7 10 9 8 11
* Section: 0 0 0 1 1 2 2 2
* Group: 0 10 111 2 33 4 11 5 6 7 8 9
* Fixed: . +++ - ++ - . . - - +
* Type: = . =-= = -= = . = = = = =
*
* @param[out] array groups Groups table object
* @param[out] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object tokens list
*/
this.detectBlocks = function () {
// Debug log
if ( this.config.debug === true ) {
this.oldText.debugText( 'Old text' );
this.newText.debugText( 'New text' );
}
// Collect identical corresponding ('=') blocks from old text and sort by new text
this.getSameBlocks();
// Collect independent block sections with no block move crosses outside a section
this.getSections();
// Find groups of continuous old text blocks
this.getGroups();
// Set longest sequence of increasing groups in sections as fixed (not moved)
this.setFixed();
// Convert groups to insertions/deletions if maximum block length is too short
// Only for more complex texts that actually have blocks of minimum block length
var unlinkCount = 0;
if (
this.config.unlinkBlocks === true &&
this.config.blockMinLength > 0 &&
this.maxWords >= this.config.blockMinLength
) {
if ( this.config.timer === true ) {
this.time( 'total unlinking' );
}
// Repeat as long as unlinking is possible
var unlinked = true;
while ( unlinked === true && unlinkCount < this.config.unlinkMax ) {
// Convert '=' to '+'/'-' pairs
unlinked = this.unlinkBlocks();
// Start over after conversion
if ( unlinked === true ) {
unlinkCount ++;
this.slideGaps( this.newText, this.oldText );
this.slideGaps( this.oldText, this.newText );
// Repeat block detection from start
this.maxWords = 0;
this.getSameBlocks();
this.getSections();
this.getGroups();
this.setFixed();
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'total unlinking' );
}
}
// Collect deletion ('-') blocks from old text
this.getDelBlocks();
// Position '-' blocks into new text order
this.positionDelBlocks();
// Collect insertion ('+') blocks from new text
this.getInsBlocks();
// Set group numbers of '+' blocks
this.setInsGroups();
// Mark original positions of moved groups
this.insertMarks();
// Debug log
if ( this.config.timer === true || this.config.debug === true ) {
console.log( 'Unlink count: ', unlinkCount );
}
if ( this.config.debug === true ) {
this.debugGroups( 'Groups' );
this.debugBlocks( 'Blocks' );
}
return;
};
/**
* Collect identical corresponding matching ('=') blocks from old text and sort by new text.
*
* @param[in] WikEdDiffText newText, oldText Text objects
* @param[in/out] array blocks Blocks table object
*/
this.getSameBlocks = function () {
if ( this.config.timer === true ) {
this.time( 'getSameBlocks' );
}
var blocks = this.blocks;
// Clear blocks array
blocks.splice( 0 );
// Cycle through old text to find connected (linked, matched) blocks
var j = this.oldText.first;
var i = null;
while ( j !== null ) {
// Skip '-' blocks
while ( j !== null && this.oldText.tokens[j].link === null ) {
j = this.oldText.tokens[j].next;
}
// Get '=' block
if ( j !== null ) {
i = this.oldText.tokens[j].link;
var iStart = i;
var jStart = j;
// Detect matching blocks ('=')
var count = 0;
var unique = false;
var text = '';
while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) {
text += this.oldText.tokens[j].token;
count ++;
if ( this.newText.tokens[i].unique === true ) {
unique = true;
}
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
// Save old text '=' block
blocks.push( {
oldBlock: blocks.length,
newBlock: null,
oldNumber: this.oldText.tokens[jStart].number,
newNumber: this.newText.tokens[iStart].number,
oldStart: jStart,
count: count,
unique: unique,
words: this.wordCount( text ),
chars: text.length,
type: '=',
section: null,
group: null,
fixed: null,
moved: null,
text: text
} );
}
}
// Sort blocks by new text token number
blocks.sort( function( a, b ) {
return a.newNumber - b.newNumber;
} );
// Number blocks in new text order
var blocksLength = blocks.length;
for ( var block = 0; block < blocksLength; block ++ ) {
blocks[block].newBlock = block;
}
if ( this.config.timer === true ) {
this.timeEnd( 'getSameBlocks' );
}
return;
};
/**
* Collect independent block sections with no block move crosses
* outside a section for per-section determination of non-moving fixed groups.
*
* @param[out] array sections Sections table object
* @param[in/out] array blocks Blocks table object, section property
*/
this.getSections = function () {
if ( this.config.timer === true ) {
this.time( 'getSections' );
}
var blocks = this.blocks;
var sections = this.sections;
// Clear sections array
sections.splice( 0 );
// Cycle through blocks
var blocksLength = blocks.length;
for ( var block = 0; block < blocksLength; block ++ ) {
var sectionStart = block;
var sectionEnd = block;
var oldMax = blocks[sectionStart].oldNumber;
var sectionOldMax = oldMax;
// Check right
for ( var j = sectionStart + 1; j < blocksLength; j ++ ) {
// Check for crossing over to the left
if ( blocks[j].oldNumber > oldMax ) {
oldMax = blocks[j].oldNumber;
}
else if ( blocks[j].oldNumber < sectionOldMax ) {
sectionEnd = j;
sectionOldMax = oldMax;
}
}
// Save crossing sections
if ( sectionEnd > sectionStart ) {
// Save section to block
for ( var i = sectionStart; i <= sectionEnd; i ++ ) {
blocks[i].section = sections.length;
}
// Save section
sections.push( {
blockStart: sectionStart,
blockEnd: sectionEnd
} );
block = sectionEnd;
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getSections' );
}
return;
};
/**
* Find groups of continuous old text blocks.
*
* @param[out] array groups Groups table object
* @param[in/out] array blocks Blocks table object, group property
*/
this.getGroups = function () {
if ( this.config.timer === true ) {
this.time( 'getGroups' );
}
var blocks = this.blocks;
var groups = this.groups;
// Clear groups array
groups.splice( 0 );
// Cycle through blocks
var blocksLength = blocks.length;
for ( var block = 0; block < blocksLength; block ++ ) {
var groupStart = block;
var groupEnd = block;
var oldBlock = blocks[groupStart].oldBlock;
// Get word and char count of block
var words = this.wordCount( blocks[block].text );
var maxWords = words;
var unique = blocks[block].unique;
var chars = blocks[block].chars;
// Check right
for ( var i = groupEnd + 1; i < blocksLength; i ++ ) {
// Check for crossing over to the left
if ( blocks[i].oldBlock !== oldBlock + 1 ) {
break;
}
oldBlock = blocks[i].oldBlock;
// Get word and char count of block
if ( blocks[i].words > maxWords ) {
maxWords = blocks[i].words;
}
if ( blocks[i].unique === true ) {
unique = true;
}
words += blocks[i].words;
chars += blocks[i].chars;
groupEnd = i;
}
// Save crossing group
if ( groupEnd >= groupStart ) {
// Set groups outside sections as fixed
var fixed = false;
if ( blocks[groupStart].section === null ) {
fixed = true;
}
// Save group to block
for ( var i = groupStart; i <= groupEnd; i ++ ) {
blocks[i].group = groups.length;
blocks[i].fixed = fixed;
}
// Save group
groups.push( {
oldNumber: blocks[groupStart].oldNumber,
blockStart: groupStart,
blockEnd: groupEnd,
unique: unique,
maxWords: maxWords,
words: words,
chars: chars,
fixed: fixed,
movedFrom: null,
color: null
} );
block = groupEnd;
// Set global word count of longest linked block
if ( maxWords > this.maxWords ) {
this.maxWords = maxWords;
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getGroups' );
}
return;
};
/**
* Set longest sequence of increasing groups in sections as fixed (not moved).
*
* @param[in] array sections Sections table object
* @param[in/out] array groups Groups table object, fixed property
* @param[in/out] array blocks Blocks table object, fixed property
*/
this.setFixed = function () {
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
var blocks = this.blocks;
var groups = this.groups;
var sections = this.sections;
// Cycle through sections
var sectionsLength = sections.length;
for ( var section = 0; section < sectionsLength; section ++ ) {
var blockStart = sections[section].blockStart;
var blockEnd = sections[section].blockEnd;
var groupStart = blocks[blockStart].group;
var groupEnd = blocks[blockEnd].group;
// Recusively find path of groups in increasing old group order with longest char length
var cache = [];
var maxChars = 0;
var maxPath = null;
// Start at each group of section
for ( var i = groupStart; i <= groupEnd; i ++ ) {
var pathObj = this.findMaxPath( i, groupEnd, cache );
if ( pathObj.chars > maxChars ) {
maxPath = pathObj.path;
maxChars = pathObj.chars;
}
}
// Mark fixed groups
var maxPathLength = maxPath.length;
for ( var i = 0; i < maxPathLength; i ++ ) {
var group = maxPath[i];
groups[group].fixed = true;
// Mark fixed blocks
for ( var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++ ) {
blocks[block].fixed = true;
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setFixed' );
}
return;
};
/**
* Recusively find path of groups in increasing old group order with longest char length.
*
* @param int start Path start group
* @param int groupEnd Path last group
* @param array cache Cache object, contains returnObj for start
* @return array returnObj Contains path and char length
*/
this.findMaxPath = function ( start, groupEnd, cache ) {
var groups = this.groups;
// Find longest sub-path
var maxChars = 0;
var oldNumber = groups[start].oldNumber;
var returnObj = { path: [], chars: 0};
for ( var i = start + 1; i <= groupEnd; i ++ ) {
// Only in increasing old group order
if ( groups[i].oldNumber < oldNumber ) {
continue;
}
// Get longest sub-path from cache (deep copy)
var pathObj;
if ( cache[i] !== undefined ) {
pathObj = { path: cache[i].path.slice(), chars: cache[i].chars };
}
// Get longest sub-path by recursion
else {
pathObj = this.findMaxPath( i, groupEnd, cache );
}
// Select longest sub-path
if ( pathObj.chars > maxChars ) {
maxChars = pathObj.chars;
returnObj = pathObj;
}
}
// Add current start to path
returnObj.path.unshift( start );
returnObj.chars += groups[start].chars;
// Save path to cache (deep copy)
if ( cache[start] === undefined ) {
cache[start] = { path: returnObj.path.slice(), chars: returnObj.chars };
}
return returnObj;
};
/**
* Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs
* if too short and too common.
* Prevents fragmentated diffs for very different versions.
*
* @param[in] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object, linked property
* @param[in/out] array groups Groups table object
* @return bool True if text tokens were unlinked
*/
this.unlinkBlocks = function () {
var blocks = this.blocks;
var groups = this.groups;
// Cycle through groups
var unlinked = false;
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
// Unlink whole group if no block is at least blockMinLength words long and unique
if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
// Otherwise unlink block flanks
else {
// Unlink blocks from start
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
// Stop unlinking if more than one word or a unique word
if ( blocks[block].words > 1 || blocks[block].unique === true ) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
blockStart = block;
}
}
// Unlink blocks from end
for ( var block = blockEnd; block > blockStart; block -- ) {
if ( blocks[block].type === '=' ) {
// Stop unlinking if more than one word or a unique word
if (
blocks[block].words > 1 ||
( blocks[block].words === 1 && blocks[block].unique === true )
) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
}
return unlinked;
};
/**
* Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs.
*
* @param[in] array blocks Blocks table object
* @param[out] WikEdDiffText newText, oldText Text objects, link property
*/
this.unlinkSingleBlock = function ( block ) {
// Cycle through old text
var j = block.oldStart;
for ( var count = 0; count < block.count; count ++ ) {
// Unlink tokens
this.newText.tokens[ this.oldText.tokens[j].link ].link = null;
this.oldText.tokens[j].link = null;
j = this.oldText.tokens[j].next;
}
return;
};
/**
* Collect deletion ('-') blocks from old text.
*
* @param[in] WikEdDiffText oldText Old Text object
* @param[out] array blocks Blocks table object
*/
this.getDelBlocks = function () {
if ( this.config.timer === true ) {
this.time( 'getDelBlocks' );
}
var blocks = this.blocks;
// Cycle through old text to find connected (linked, matched) blocks
var j = this.oldText.first;
var i = null;
while ( j !== null ) {
// Collect '-' blocks
var oldStart = j;
var count = 0;
var text = '';
while ( j !== null && this.oldText.tokens[j].link === null ) {
count ++;
text += this.oldText.tokens[j].token;
j = this.oldText.tokens[j].next;
}
// Save old text '-' block
if ( count !== 0 ) {
blocks.push( {
oldBlock: null,
newBlock: null,
oldNumber: this.oldText.tokens[oldStart].number,
newNumber: null,
oldStart: oldStart,
count: count,
unique: false,
words: null,
chars: text.length,
type: '-',
section: null,
group: null,
fixed: null,
moved: null,
text: text
} );
}
// Skip '=' blocks
if ( j !== null ) {
i = this.oldText.tokens[j].link;
while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) {
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getDelBlocks' );
}
return;
};
/**
* Position deletion '-' blocks into new text order.
* Deletion blocks move with fixed reference:
* Old: 1 D 2 1 D 2
* / \ / \ \
* New: 1 D 2 1 D 2
* Fixed: * *
* newNumber: 1 1 2 2
*
* Marks '|' and deletions '-' get newNumber of reference block
* and are sorted around it by old text number.
*
* @param[in/out] array blocks Blocks table, newNumber, section, group, and fixed properties
*
*/
this.positionDelBlocks = function () {
if ( this.config.timer === true ) {
this.time( 'positionDelBlocks' );
}
var blocks = this.blocks;
var groups = this.groups;
// Sort shallow copy of blocks by oldNumber
var blocksOld = blocks.slice();
blocksOld.sort( function( a, b ) {
return a.oldNumber - b.oldNumber;
} );
// Cycle through blocks in old text order
var blocksOldLength = blocksOld.length;
for ( var block = 0; block < blocksOldLength; block ++ ) {
var delBlock = blocksOld[block];
// '-' block only
if ( delBlock.type !== '-' ) {
continue;
}
// Find fixed '=' reference block from original block position to position '-' block
// Similar to position marks '|' code
// Get old text prev block
var prevBlockNumber = null;
var prevBlock = null;
if ( block > 0 ) {
prevBlockNumber = blocksOld[block - 1].newBlock;
prevBlock = blocks[prevBlockNumber];
}
// Get old text next block
var nextBlockNumber = null;
var nextBlock = null;
if ( block < blocksOld.length - 1 ) {
nextBlockNumber = blocksOld[block + 1].newBlock;
nextBlock = blocks[nextBlockNumber];
}
// Move after prev block if fixed
var refBlock = null;
if ( prevBlock !== null && prevBlock.type === '=' && prevBlock.fixed === true ) {
refBlock = prevBlock;
}
// Move before next block if fixed
else if ( nextBlock !== null && nextBlock.type === '=' && nextBlock.fixed === true ) {
refBlock = nextBlock;
}
// Move after prev block if not start of group
else if (
prevBlock !== null &&
prevBlock.type === '=' &&
prevBlockNumber !== groups[ prevBlock.group ].blockEnd
) {
refBlock = prevBlock;
}
// Move before next block if not start of group
else if (
nextBlock !== null &&
nextBlock.type === '=' &&
nextBlockNumber !== groups[ nextBlock.group ].blockStart
) {
refBlock = nextBlock;
}
// Move after closest previous fixed block
else {
for ( var fixed = block; fixed >= 0; fixed -- ) {
if ( blocksOld[fixed].type === '=' && blocksOld[fixed].fixed === true ) {
refBlock = blocksOld[fixed];
break;
}
}
}
// Move before first block
if ( refBlock === null ) {
delBlock.newNumber = -1;
}
// Update '-' block data
else {
delBlock.newNumber = refBlock.newNumber;
delBlock.section = refBlock.section;
delBlock.group = refBlock.group;
delBlock.fixed = refBlock.fixed;
}
}
// Sort '-' blocks in and update groups
this.sortBlocks();
if ( this.config.timer === true ) {
this.timeEnd( 'positionDelBlocks' );
}
return;
};
/**
* Collect insertion ('+') blocks from new text.
*
* @param[in] WikEdDiffText newText New Text object
* @param[out] array blocks Blocks table object
*/
this.getInsBlocks = function () {
if ( this.config.timer === true ) {
this.time( 'getInsBlocks' );
}
var blocks = this.blocks;
// Cycle through new text to find insertion blocks
var i = this.newText.first;
while ( i !== null ) {
// Jump over linked (matched) block
while ( i !== null && this.newText.tokens[i].link !== null ) {
i = this.newText.tokens[i].next;
}
// Detect insertion blocks ('+')
if ( i !== null ) {
var iStart = i;
var count = 0;
var text = '';
while ( i !== null && this.newText.tokens[i].link === null ) {
count ++;
text += this.newText.tokens[i].token;
i = this.newText.tokens[i].next;
}
// Save new text '+' block
blocks.push( {
oldBlock: null,
newBlock: null,
oldNumber: null,
newNumber: this.newText.tokens[iStart].number,
oldStart: null,
count: count,
unique: false,
words: null,
chars: text.length,
type: '+',
section: null,
group: null,
fixed: null,
moved: null,
text: text
} );
}
}
// Sort '+' blocks in and update groups
this.sortBlocks();
if ( this.config.timer === true ) {
this.timeEnd( 'getInsBlocks' );
}
return;
};
/**
* Sort blocks by new text token number and update groups.
*
* @param[in/out] array groups Groups table object
* @param[in/out] array blocks Blocks table object
*/
this.sortBlocks = function () {
var blocks = this.blocks;
var groups = this.groups;
// Sort by newNumber, then by old number
blocks.sort( function( a, b ) {
var comp = a.newNumber - b.newNumber;
if ( comp === 0 ) {
comp = a.oldNumber - b.oldNumber;
}
return comp;
} );
// Cycle through blocks and update groups with new block numbers
var group = null;
var blocksLength = blocks.length;
for ( var block = 0; block < blocksLength; block ++ ) {
var blockGroup = blocks[block].group;
if ( blockGroup !== null ) {
if ( blockGroup !== group ) {
group = blocks[block].group;
groups[group].blockStart = block;
groups[group].oldNumber = blocks[block].oldNumber;
}
groups[blockGroup].blockEnd = block;
}
}
return;
};
/**
* Set group numbers of insertion '+' blocks.
*
* @param[in/out] array groups Groups table object
* @param[in/out] array blocks Blocks table object, fixed and group properties
*/
this.setInsGroups = function () {
if ( this.config.timer === true ) {
this.time( 'setInsGroups' );
}
var blocks = this.blocks;
var groups = this.groups;
// Set group numbers of '+' blocks inside existing groups
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var fixed = groups[group].fixed;
for ( var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++ ) {
if ( blocks[block].group === null ) {
blocks[block].group = group;
blocks[block].fixed = fixed;
}
}
}
// Add remaining '+' blocks to new groups
// Cycle through blocks
var blocksLength = blocks.length;
for ( var block = 0; block < blocksLength; block ++ ) {
// Skip existing groups
if ( blocks[block].group === null ) {
blocks[block].group = groups.length;
// Save new single-block group
groups.push( {
oldNumber: blocks[block].oldNumber,
blockStart: block,
blockEnd: block,
unique: blocks[block].unique,
maxWords: blocks[block].words,
words: blocks[block].words,
chars: blocks[block].chars,
fixed: blocks[block].fixed,
movedFrom: null,
color: null
} );
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setInsGroups' );
}
return;
};
/**
* Mark original positions of moved groups.
* Scheme: moved block marks at original positions relative to fixed groups:
* Groups: 3 7
* 1 <| | (no next smaller fixed)
* 5 |< |
* |> 5 |
* | 5 <|
* | >| 5
* | |> 9 (no next larger fixed)
* Fixed: * *
*
* Mark direction: groups.movedGroup.blockStart < groups.group.blockStart
* Group side: groups.movedGroup.oldNumber < groups.group.oldNumber
*
* Marks '|' and deletions '-' get newNumber of reference block
* and are sorted around it by old text number.
*
* @param[in/out] array groups Groups table object, movedFrom property
* @param[in/out] array blocks Blocks table object
*/
this.insertMarks = function () {
if ( this.config.timer === true ) {
this.time( 'insertMarks' );
}
var blocks = this.blocks;
var groups = this.groups;
var moved = [];
var color = 1;
// Make shallow copy of blocks
var blocksOld = blocks.slice();
// Enumerate copy
var blocksOldLength = blocksOld.length;
for ( var i = 0; i < blocksOldLength; i ++ ) {
blocksOld[i].number = i;
}
// Sort copy by oldNumber
blocksOld.sort( function( a, b ) {
var comp = a.oldNumber - b.oldNumber;
if ( comp === 0 ) {
comp = a.newNumber - b.newNumber;
}
return comp;
} );
// Create lookup table: original to sorted
var lookupSorted = [];
for ( var i = 0; i < blocksOldLength; i ++ ) {
lookupSorted[ blocksOld[i].number ] = i;
}
// Cycle through groups (moved group)
var groupsLength = groups.length;
for ( var moved = 0; moved < groupsLength; moved ++ ) {
var movedGroup = groups[moved];
if ( movedGroup.fixed !== false ) {
continue;
}
var movedOldNumber = movedGroup.oldNumber;
// Find fixed '=' reference block from original block position to position '|' block
// Similar to position deletions '-' code
// Get old text prev block
var prevBlock = null;
var block = lookupSorted[ movedGroup.blockStart ];
if ( block > 0 ) {
prevBlock = blocksOld[block - 1];
}
// Get old text next block
var nextBlock = null;
var block = lookupSorted[ movedGroup.blockEnd ];
if ( block < blocksOld.length - 1 ) {
nextBlock = blocksOld[block + 1];
}
// Move after prev block if fixed
var refBlock = null;
if ( prevBlock !== null && prevBlock.type === '=' && prevBlock.fixed === true ) {
refBlock = prevBlock;
}
// Move before next block if fixed
else if ( nextBlock !== null && nextBlock.type === '=' && nextBlock.fixed === true ) {
refBlock = nextBlock;
}
// Find closest fixed block to the left
else {
for ( var fixed = lookupSorted[ movedGroup.blockStart ] - 1; fixed >= 0; fixed -- ) {
if ( blocksOld[fixed].type === '=' && blocksOld[fixed].fixed === true ) {
refBlock = blocksOld[fixed];
break;
}
}
}
// Get position of new mark block
var newNumber;
var markGroup;
// No smaller fixed block, moved right from before first block
if ( refBlock === null ) {
newNumber = -1;
markGroup = groups.length;
// Save new single-mark-block group
groups.push( {
oldNumber: 0,
blockStart: blocks.length,
blockEnd: blocks.length,
unique: false,
maxWords: null,
words: null,
chars: 0,
fixed: null,
movedFrom: null,
color: null
} );
}
else {
newNumber = refBlock.newNumber;
markGroup = refBlock.group;
}
// Insert '|' block
blocks.push( {
oldBlock: null,
newBlock: null,
oldNumber: movedOldNumber,
newNumber: newNumber,
oldStart: null,
count: null,
unique: null,
words: null,
chars: 0,
type: '|',
section: null,
group: markGroup,
fixed: true,
moved: moved,
text: ''
} );
// Set group color
movedGroup.color = color;
movedGroup.movedFrom = markGroup;
color ++;
}
// Sort '|' blocks in and update groups
this.sortBlocks();
if ( this.config.timer === true ) {
this.timeEnd( 'insertMarks' );
}
return;
};
/**
* Collect diff fragment list for markup, create abstraction layer for customized diffs.
* Adds the following fagment types:
* '=', '-', '+' same, deletion, insertion
* '<', '>' mark left, mark right
* '(<', '(>', ')' block start and end
* '[', ']' fragment start and end
* '{', '}' container start and end
*
* @param[in] array groups Groups table object
* @param[in] array blocks Blocks table object
* @param[out] array fragments Fragments array, abstraction layer for diff code
*/
this.getDiffFragments = function () {
var blocks = this.blocks;
var groups = this.groups;
var fragments = this.fragments;
// Make shallow copy of groups and sort by blockStart
var groupsSort = groups.slice();
groupsSort.sort( function( a, b ) {
return a.blockStart - b.blockStart;
} );
// Cycle through groups
var groupsSortLength = groupsSort.length;
for ( var group = 0; group < groupsSortLength; group ++ ) {
var blockStart = groupsSort[group].blockStart;
var blockEnd = groupsSort[group].blockEnd;
// Add moved block start
var color = groupsSort[group].color;
if ( color !== null ) {
var type;
if ( groupsSort[group].movedFrom < blocks[ blockStart ].group ) {
type = '(<';
}
else {
type = '(>';
}
fragments.push( {
text: '',
type: type,
color: color
} );
}
// Cycle through blocks
for ( var block = blockStart; block <= blockEnd; block ++ ) {
var type = blocks[block].type;
// Add '=' unchanged text and moved block
if ( type === '=' || type === '-' || type === '+' ) {
fragments.push( {
text: blocks[block].text,
type: type,
color: color
} );
}
// Add '<' and '>' marks
else if ( type === '|' ) {
var movedGroup = groups[ blocks[block].moved ];
// Get mark text
var markText = '';
for (
var movedBlock = movedGroup.blockStart;
movedBlock <= movedGroup.blockEnd;
movedBlock ++
) {
if ( blocks[movedBlock].type === '=' || blocks[movedBlock].type === '-' ) {
markText += blocks[movedBlock].text;
}
}
// Get mark direction
var markType;
if ( movedGroup.blockStart < blockStart ) {
markType = '<';
}
else {
markType = '>';
}
// Add mark
fragments.push( {
text: markText,
type: markType,
color: movedGroup.color
} );
}
}
// Add moved block end
if ( color !== null ) {
fragments.push( {
text: '',
type: ' )',
color: color
} );
}
}
// Cycle through fragments, join consecutive fragments of same type (i.e. '-' blocks)
var fragmentsLength = fragments.length;
for ( var fragment = 1; fragment < fragmentsLength; fragment ++ ) {
// Check if joinable
if (
fragments[fragment].type === fragments[fragment - 1].type &&
fragments[fragment].color === fragments[fragment - 1].color &&
fragments[fragment].text !== '' && fragments[fragment - 1].text !== ''
) {
// Join and splice
fragments[fragment - 1].text += fragments[fragment].text;
fragments.splice( fragment, 1 );
fragment --;
}
}
// Enclose in containers
fragments.unshift( { text: '', type: '{', color: null }, { text: '', type: '[', color: null } );
fragments.push( { text: '', type: ']', color: null }, { text: '', type: '}', color: null } );
return;
};
/**
* Clip unchanged sections from unmoved block text.
* Adds the following fagment types:
* '~', ' ~', '~ ' omission indicators
* '[', ']', ',' fragment start and end, fragment separator
*
* @param[in/out] array fragments Fragments array, abstraction layer for diff code
*/
this.clipDiffFragments = function () {
var fragments = this.fragments;
// Skip if only one fragment in containers, no change
if ( fragments.length === 5 ) {
return;
}
// Min length for clipping right
var minRight = this.config.clipHeadingRight;
if ( this.config.clipParagraphRightMin < minRight ) {
minRight = this.config.clipParagraphRightMin;
}
if ( this.config.clipLineRightMin < minRight ) {
minRight = this.config.clipLineRightMin;
}
if ( this.config.clipBlankRightMin < minRight ) {
minRight = this.config.clipBlankRightMin;
}
if ( this.config.clipCharsRight < minRight ) {
minRight = this.config.clipCharsRight;
}
// Min length for clipping left
var minLeft = this.config.clipHeadingLeft;
if ( this.config.clipParagraphLeftMin < minLeft ) {
minLeft = this.config.clipParagraphLeftMin;
}
if ( this.config.clipLineLeftMin < minLeft ) {
minLeft = this.config.clipLineLeftMin;
}
if ( this.config.clipBlankLeftMin < minLeft ) {
minLeft = this.config.clipBlankLeftMin;
}
if ( this.config.clipCharsLeft < minLeft ) {
minLeft = this.config.clipCharsLeft;
}
// Cycle through fragments
var fragmentsLength = fragments.length;
for ( var fragment = 0; fragment < fragmentsLength; fragment ++ ) {
// Skip if not an unmoved and unchanged block
var type = fragments[fragment].type;
var color = fragments[fragment].color;
if ( type !== '=' || color !== null ) {
continue;
}
// Skip if too short for clipping
var text = fragments[fragment].text;
var textLength = text.length;
if ( textLength < minRight && textLength < minLeft ) {
continue;
}
// Get line positions including start and end
var lines = [];
var lastIndex = null;
var regExpMatch;
while ( ( regExpMatch = this.config.regExp.clipLine.exec( text ) ) !== null ) {
lines.push( regExpMatch.index );
lastIndex = this.config.regExp.clipLine.lastIndex;
}
if ( lines[0] !== 0 ) {
lines.unshift( 0 );
}
if ( lastIndex !== textLength ) {
lines.push( textLength );
}
// Get heading positions
var headings = [];
var headingsEnd = [];
while ( ( regExpMatch = this.config.regExp.clipHeading.exec( text ) ) !== null ) {
headings.push( regExpMatch.index );
headingsEnd.push( regExpMatch.index + regExpMatch[0].length );
}
// Get paragraph positions including start and end
var paragraphs = [];
var lastIndex = null;
while ( ( regExpMatch = this.config.regExp.clipParagraph.exec( text ) ) !== null ) {
paragraphs.push( regExpMatch.index );
lastIndex = this.config.regExp.clipParagraph.lastIndex;
}
if ( paragraphs[0] !== 0 ) {
paragraphs.unshift( 0 );
}
if ( lastIndex !== textLength ) {
paragraphs.push( textLength );
}
// Determine ranges to keep on left and right side
var rangeRight = null;
var rangeLeft = null;
var rangeRightType = '';
var rangeLeftType = '';
// Find clip pos from left, skip for first non-container block
if ( fragment !== 2 ) {
// Maximum lines to search from left
var rangeLeftMax = textLength;
if ( this.config.clipLinesLeftMax < lines.length ) {
rangeLeftMax = lines[this.config.clipLinesLeftMax];
}
// Find first heading from left
if ( rangeLeft === null ) {
var headingsLength = headingsEnd.length;
for ( var j = 0; j < headingsLength; j ++ ) {
if ( headingsEnd[j] > this.config.clipHeadingLeft || headingsEnd[j] > rangeLeftMax ) {
break;
}
rangeLeft = headingsEnd[j];
rangeLeftType = 'heading';
break;
}
}
// Find first paragraph from left
if ( rangeLeft === null ) {
var paragraphsLength = paragraphs.length;
for ( var j = 0; j < paragraphsLength; j ++ ) {
if (
paragraphs[j] > this.config.clipParagraphLeftMax ||
paragraphs[j] > rangeLeftMax
) {
break;
}
if ( paragraphs[j] > this.config.clipParagraphLeftMin ) {
rangeLeft = paragraphs[j];
rangeLeftType = 'paragraph';
break;
}
}
}
// Find first line break from left
if ( rangeLeft === null ) {
var linesLength = lines.length;
for ( var j = 0; j < linesLength; j ++ ) {
if ( lines[j] > this.config.clipLineLeftMax || lines[j] > rangeLeftMax ) {
break;
}
if ( lines[j] > this.config.clipLineLeftMin ) {
rangeLeft = lines[j];
rangeLeftType = 'line';
break;
}
}
}
// Find first blank from left
if ( rangeLeft === null ) {
this.config.regExp.clipBlank.lastIndex = this.config.clipBlankLeftMin;
if ( ( regExpMatch = this.config.regExp.clipBlank.exec( text ) ) !== null ) {
if (
regExpMatch.index < this.config.clipBlankLeftMax &&
regExpMatch.index < rangeLeftMax
) {
rangeLeft = regExpMatch.index;
rangeLeftType = 'blank';
}
}
}
// Fixed number of chars from left
if ( rangeLeft === null ) {
if ( this.config.clipCharsLeft < rangeLeftMax ) {
rangeLeft = this.config.clipCharsLeft;
rangeLeftType = 'chars';
}
}
// Fixed number of lines from left
if ( rangeLeft === null ) {
rangeLeft = rangeLeftMax;
rangeLeftType = 'fixed';
}
}
// Find clip pos from right, skip for last non-container block
if ( fragment !== fragments.length - 3 ) {
// Maximum lines to search from right
var rangeRightMin = 0;
if ( lines.length >= this.config.clipLinesRightMax ) {
rangeRightMin = lines[lines.length - this.config.clipLinesRightMax];
}
// Find last heading from right
if ( rangeRight === null ) {
for ( var j = headings.length - 1; j >= 0; j -- ) {
if (
headings[j] < textLength - this.config.clipHeadingRight ||
headings[j] < rangeRightMin
) {
break;
}
rangeRight = headings[j];
rangeRightType = 'heading';
break;
}
}
// Find last paragraph from right
if ( rangeRight === null ) {
for ( var j = paragraphs.length - 1; j >= 0 ; j -- ) {
if (
paragraphs[j] < textLength - this.config.clipParagraphRightMax ||
paragraphs[j] < rangeRightMin
) {
break;
}
if ( paragraphs[j] < textLength - this.config.clipParagraphRightMin ) {
rangeRight = paragraphs[j];
rangeRightType = 'paragraph';
break;
}
}
}
// Find last line break from right
if ( rangeRight === null ) {
for ( var j = lines.length - 1; j >= 0; j -- ) {
if (
lines[j] < textLength - this.config.clipLineRightMax ||
lines[j] < rangeRightMin
) {
break;
}
if ( lines[j] < textLength - this.config.clipLineRightMin ) {
rangeRight = lines[j];
rangeRightType = 'line';
break;
}
}
}
// Find last blank from right
if ( rangeRight === null ) {
var startPos = textLength - this.config.clipBlankRightMax;
if ( startPos < rangeRightMin ) {
startPos = rangeRightMin;
}
this.config.regExp.clipBlank.lastIndex = startPos;
var lastPos = null;
while ( ( regExpMatch = this.config.regExp.clipBlank.exec( text ) ) !== null ) {
if ( regExpMatch.index > textLength - this.config.clipBlankRightMin ) {
if ( lastPos !== null ) {
rangeRight = lastPos;
rangeRightType = 'blank';
}
break;
}
lastPos = regExpMatch.index;
}
}
// Fixed number of chars from right
if ( rangeRight === null ) {
if ( textLength - this.config.clipCharsRight > rangeRightMin ) {
rangeRight = textLength - this.config.clipCharsRight;
rangeRightType = 'chars';
}
}
// Fixed number of lines from right
if ( rangeRight === null ) {
rangeRight = rangeRightMin;
rangeRightType = 'fixed';
}
}
// Check if we skip clipping if ranges are close together
if ( rangeLeft !== null && rangeRight !== null ) {
// Skip if overlapping ranges
if ( rangeLeft > rangeRight ) {
continue;
}
// Skip if chars too close
var skipChars = rangeRight - rangeLeft;
if ( skipChars < this.config.clipSkipChars ) {
continue;
}
// Skip if lines too close
var skipLines = 0;
var linesLength = lines.length;
for ( var j = 0; j < linesLength; j ++ ) {
if ( lines[j] > rangeRight || skipLines > this.config.clipSkipLines ) {
break;
}
if ( lines[j] > rangeLeft ) {
skipLines ++;
}
}
if ( skipLines < this.config.clipSkipLines ) {
continue;
}
}
// Skip if nothing to clip
if ( rangeLeft === null && rangeRight === null ) {
continue;
}
// Split left text
var textLeft = null;
var omittedLeft = null;
if ( rangeLeft !== null ) {
textLeft = text.slice( 0, rangeLeft );
// Remove trailing empty lines
textLeft = textLeft.replace( this.config.regExp.clipTrimNewLinesLeft, '' );
// Get omission indicators, remove trailing blanks
if ( rangeLeftType === 'chars' ) {
omittedLeft = '~';
textLeft = textLeft.replace( this.config.regExp.clipTrimBlanksLeft, '' );
}
else if ( rangeLeftType === 'blank' ) {
omittedLeft = ' ~';
textLeft = textLeft.replace( this.config.regExp.clipTrimBlanksLeft, '' );
}
}
// Split right text
var textRight = null;
var omittedRight = null;
if ( rangeRight !== null ) {
textRight = text.slice( rangeRight );
// Remove leading empty lines
textRight = textRight.replace( this.config.regExp.clipTrimNewLinesRight, '' );
// Get omission indicators, remove leading blanks
if ( rangeRightType === 'chars' ) {
omittedRight = '~';
textRight = textRight.replace( this.config.regExp.clipTrimBlanksRight, '' );
}
else if ( rangeRightType === 'blank' ) {
omittedRight = '~ ';
textRight = textRight.replace( this.config.regExp.clipTrimBlanksRight, '' );
}
}
// Remove split element
fragments.splice( fragment, 1 );
fragmentsLength --;
// Add left text to fragments list
if ( rangeLeft !== null ) {
fragments.splice( fragment ++, 0, { text: textLeft, type: '=', color: null } );
fragmentsLength ++;
if ( omittedLeft !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedLeft, color: null } );
fragmentsLength ++;
}
}
// Add fragment container and separator to list
if ( rangeLeft !== null && rangeRight !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: ']', color: null } );
fragments.splice( fragment ++, 0, { text: '', type: ',', color: null } );
fragments.splice( fragment ++, 0, { text: '', type: '[', color: null } );
fragmentsLength += 3;
}
// Add right text to fragments list
if ( rangeRight !== null ) {
if ( omittedRight !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedRight, color: null } );
fragmentsLength ++;
}
fragments.splice( fragment ++, 0, { text: textRight, type: '=', color: null } );
fragmentsLength ++;
}
}
// Debug log
if ( this.config.debug === true ) {
this.debugFragments( 'Fragments' );
}
return;
};
/**
* Create html formatted diff code from diff fragments.
*
* @param[in] array fragments Fragments array, abstraction layer for diff code
* @param string|undefined version
* Output version: 'new' or 'old': only text from new or old version, used for unit tests
* @param[out] string html Html code of diff
*/
this.getDiffHtml = function ( version ) {
var fragments = this.fragments;
// No change, only one unchanged block in containers
if ( fragments.length === 5 && fragments[2].type === '=' ) {
this.html = '';
return;
}
// Cycle through fragments
var htmlFragments = [];
var fragmentsLength = fragments.length;
for ( var fragment = 0; fragment < fragmentsLength; fragment ++ ) {
var text = fragments[fragment].text;
var type = fragments[fragment].type;
var color = fragments[fragment].color;
var html = '';
// Test if text is blanks-only or a single character
var blank = false;
if ( text !== '' ) {
blank = this.config.regExp.blankBlock.test( text );
}
// Add container start markup
if ( type === '{' ) {
html = this.config.htmlCode.containerStart;
}
// Add container end markup
else if ( type === '}' ) {
html = this.config.htmlCode.containerEnd;
}
// Add fragment start markup
if ( type === '[' ) {
html = this.config.htmlCode.fragmentStart;
}
// Add fragment end markup
else if ( type === ']' ) {
html = this.config.htmlCode.fragmentEnd;
}
// Add fragment separator markup
else if ( type === ',' ) {
html = this.config.htmlCode.separator;
}
// Add omission markup
if ( type === '~' ) {
html = this.config.htmlCode.omittedChars;
}
// Add omission markup
if ( type === ' ~' ) {
html = ' ' + this.config.htmlCode.omittedChars;
}
// Add omission markup
if ( type === '~ ' ) {
html = this.config.htmlCode.omittedChars + ' ';
}
// Add colored left-pointing block start markup
else if ( type === '(<' ) {
if ( version !== 'old' ) {
// Get title
var title;
if ( this.config.noUnicodeSymbols === true ) {
title = this.config.msg['wiked-diff-block-left-nounicode'];
}
else {
title = this.config.msg['wiked-diff-block-left'];
}
// Get html
if ( this.config.coloredBlocks === true ) {
html = this.config.htmlCode.blockColoredStart;
}
else {
html = this.config.htmlCode.blockStart;
}
html = this.htmlCustomize( html, color, title );
}
}
// Add colored right-pointing block start markup
else if ( type === '(>' ) {
if ( version !== 'old' ) {
// Get title
var title;
if ( this.config.noUnicodeSymbols === true ) {
title = this.config.msg['wiked-diff-block-right-nounicode'];
}
else {
title = this.config.msg['wiked-diff-block-right'];
}
// Get html
if ( this.config.coloredBlocks === true ) {
html = this.config.htmlCode.blockColoredStart;
}
else {
html = this.config.htmlCode.blockStart;
}
html = this.htmlCustomize( html, color, title );
}
}
// Add colored block end markup
else if ( type === ' )' ) {
if ( version !== 'old' ) {
html = this.config.htmlCode.blockEnd;
}
}
// Add '=' (unchanged) text and moved block
if ( type === '=' ) {
text = this.htmlEscape( text );
if ( color !== null ) {
if ( version !== 'old' ) {
html = this.markupBlanks( text, true );
}
}
else {
html = this.markupBlanks( text );
}
}
// Add '-' text
else if ( type === '-' ) {
if ( version !== 'new' ) {
// For old version skip '-' inside moved group
if ( version !== 'old' || color === null ) {
text = this.htmlEscape( text );
text = this.markupBlanks( text, true );
if ( blank === true ) {
html = this.config.htmlCode.deleteStartBlank;
}
else {
html = this.config.htmlCode.deleteStart;
}
html += text + this.config.htmlCode.deleteEnd;
}
}
}
// Add '+' text
else if ( type === '+' ) {
if ( version !== 'old' ) {
text = this.htmlEscape( text );
text = this.markupBlanks( text, true );
if ( blank === true ) {
html = this.config.htmlCode.insertStartBlank;
}
else {
html = this.config.htmlCode.insertStart;
}
html += text + this.config.htmlCode.insertEnd;
}
}
// Add '<' and '>' code
else if ( type === '<' || type === '>' ) {
if ( version !== 'new' ) {
// Display as deletion at original position
if ( this.config.showBlockMoves === false || version === 'old' ) {
text = this.htmlEscape( text );
text = this.markupBlanks( text, true );
if ( version === 'old' ) {
if ( this.config.coloredBlocks === true ) {
html =
this.htmlCustomize( this.config.htmlCode.blockColoredStart, color ) +
text +
this.config.htmlCode.blockEnd;
}
else {
html =
this.htmlCustomize( this.config.htmlCode.blockStart, color ) +
text +
this.config.htmlCode.blockEnd;
}
}
else {
if ( blank === true ) {
html =
this.config.htmlCode.deleteStartBlank +
text +
this.config.htmlCode.deleteEnd;
}
else {
html = this.config.htmlCode.deleteStart + text + this.config.htmlCode.deleteEnd;
}
}
}
// Display as mark
else {
if ( type === '<' ) {
if ( this.config.coloredBlocks === true ) {
html = this.htmlCustomize( this.config.htmlCode.markLeftColored, color, text );
}
else {
html = this.htmlCustomize( this.config.htmlCode.markLeft, color, text );
}
}
else {
if ( this.config.coloredBlocks === true ) {
html = this.htmlCustomize( this.config.htmlCode.markRightColored, color, text );
}
else {
html = this.htmlCustomize( this.config.htmlCode.markRight, color, text );
}
}
}
}
}
htmlFragments.push( html );
}
// Join fragments
this.html = htmlFragments.join( '' );
return;
};
/**
* Customize html code fragments.
* Replaces:
* {number}: class/color/block/mark/id number
* {title}: title attribute (popup)
* {nounicode}: noUnicodeSymbols fallback
* input: html, number: block number, title: title attribute (popup) text
*
* @param string html Html code to be customized
* @return string Customized html code
*/
this.htmlCustomize = function ( html, number, title ) {
// Replace {number} with class/color/block/mark/id number
html = html.replace( /\{number\}/g, number);
// Replace {nounicode} with wikEdDiffNoUnicode class name
if ( this.config.noUnicodeSymbols === true ) {
html = html.replace( /\{nounicode\}/g, ' wikEdDiffNoUnicode');
}
else {
html = html.replace( /\{nounicode\}/g, '');
}
// Shorten title text, replace {title}
if ( title !== undefined ) {
var max = 512;
var end = 128;
var gapMark = ' [...] ';
if ( title.length > max ) {
title =
title.substr( 0, max - gapMark.length - end ) +
gapMark +
title.substr( title.length - end );
}
title = this.htmlEscape( title );
title = title.replace( /\t/g, ' ');
title = title.replace( / /g, ' ');
html = html.replace( /\{title\}/, title);
}
return html;
};
/**
* Replace html-sensitive characters in output text with character entities.
*
* @param string html Html code to be escaped
* @return string Escaped html code
*/
this.htmlEscape = function ( html ) {
html = html.replace( /&/g, '&');
html = html.replace( /</g, '<');
html = html.replace( />/g, '>');
html = html.replace( /"/g, '"');
return html;
};
/**
* Markup tabs, newlines, and spaces in diff fragment text.
*
* @param bool highlight Highlight newlines and spaces in addition to tabs
* @param string html Text code to be marked-up
* @return string Marked-up text
*/
this.markupBlanks = function ( html, highlight ) {
if ( highlight === true ) {
html = html.replace( / /g, this.config.htmlCode.space);
html = html.replace( /\n/g, this.config.htmlCode.newline);
}
html = html.replace( /\t/g, this.config.htmlCode.tab);
return html;
};
/**
* Count real words in text.
*
* @param string text Text for word counting
* @return int Number of words in text
*/
this.wordCount = function ( text ) {
return ( text.match( this.config.regExp.countWords ) || [] ).length;
};
/**
* Test diff code for consistency with input versions.
* Prints results to debug console.
*
* @param[in] WikEdDiffText newText, oldText Text objects
*/
this.unitTests = function () {
// Check if output is consistent with new text
this.getDiffHtml( 'new' );
var diff = this.html.replace( /<[^>]*>/g, '');
var text = this.htmlEscape( this.newText.text );
if ( diff !== text ) {
console.log(
'Error: wikEdDiff unit test failure: diff not consistent with new text version!'
);
this.error = true;
console.log( 'new text:\n', text );
console.log( 'new diff:\n', diff );
}
else {
console.log( 'OK: wikEdDiff unit test passed: diff consistent with new text.' );
}
// Check if output is consistent with old text
this.getDiffHtml( 'old' );
var diff = this.html.replace( /<[^>]*>/g, '');
var text = this.htmlEscape( this.oldText.text );
if ( diff !== text ) {
console.log(
'Error: wikEdDiff unit test failure: diff not consistent with old text version!'
);
this.error = true;
console.log( 'old text:\n', text );
console.log( 'old diff:\n', diff );
}
else {
console.log( 'OK: wikEdDiff unit test passed: diff consistent with old text.' );
}
return;
};
/**
* Dump blocks object to browser console.
*
* @param string name Block name
* @param[in] array blocks Blocks table object
*/
this.debugBlocks = function ( name, blocks ) {
if ( blocks === undefined ) {
blocks = this.blocks;
}
var dump =
'\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tuniq' +
'\twords \tchars \ttype \tsect \tgroup \tfixed \tmoved \ttext\n';
var blocksLength = blocks.length;
for ( var i = 0; i < blocksLength; i ++ ) {
dump +=
i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' +
blocks[i].oldNumber + ' \t' + blocks[i].newNumber + ' \t' + blocks[i].oldStart + ' \t' +
blocks[i].count + ' \t' + blocks[i].unique + ' \t' + blocks[i].words + ' \t' +
blocks[i].chars + ' \t' + blocks[i].type + ' \t' + blocks[i].section + ' \t' +
blocks[i].group + ' \t' + blocks[i].fixed + ' \t' + blocks[i].moved + ' \t' +
this.debugShortenText( blocks[i].text ) + '\n';
}
console.log( name + ':\n' + dump );
};
/**
* Dump groups object to browser console.
*
* @param string name Group name
* @param[in] array groups Groups table object
*/
this.debugGroups = function ( name, groups ) {
if ( groups === undefined ) {
groups = this.groups;
}
var dump =
'\ni \toldNm \tblSta \tblEnd \tuniq \tmaxWo' +
'\twords \tchars \tfixed \toldNm \tmFrom \tcolor\n';
var groupsLength = groupsLength;
for ( var i = 0; i < groups.length; i ++ ) {
dump +=
i + ' \t' + groups[i].oldNumber + ' \t' + groups[i].blockStart + ' \t' +
groups[i].blockEnd + ' \t' + groups[i].unique + ' \t' + groups[i].maxWords + ' \t' +
groups[i].words + ' \t' + groups[i].chars + ' \t' + groups[i].fixed + ' \t' +
groups[i].oldNumber + ' \t' + groups[i].movedFrom + ' \t' + groups[i].color + '\n';
}
console.log( name + ':\n' + dump );
};
/**
* Dump fragments array to browser console.
*
* @param string name Fragments name
* @param[in] array fragments Fragments array
*/
this.debugFragments = function ( name ) {
var fragments = this.fragments;
var dump = '\ni \ttype \tcolor \ttext\n';
var fragmentsLength = fragments.length;
for ( var i = 0; i < fragmentsLength; i ++ ) {
dump +=
i + ' \t"' + fragments[i].type + '" \t' + fragments[i].color + ' \t' +
this.debugShortenText( fragments[i].text, 120, 40 ) + '\n';
}
console.log( name + ':\n' + dump );
};
/**
* Dump borders array to browser console.
*
* @param string name Arrays name
* @param[in] array border Match border array
*/
this.debugBorders = function ( name, borders ) {
var dump = '\ni \t[ new \told ]\n';
var bordersLength = borders.length;
for ( var i = 0; i < bordersLength; i ++ ) {
dump += i + ' \t[ ' + borders[i][0] + ' \t' + borders[i][1] + ' ]\n';
}
console.log( name, dump );
};
/**
* Shorten text for dumping.
*
* @param string text Text to be shortened
* @param int max Max length of (shortened) text
* @param int end Length of trailing fragment of shortened text
* @return string Shortened text
*/
this.debugShortenText = function ( text, max, end ) {
if ( typeof text !== 'string' ) {
text = text.toString();
}
text = text.replace( /\n/g, '\\n');
text = text.replace( /\t/g, ' ');
if ( max === undefined ) {
max = 50;
}
if ( end === undefined ) {
end = 15;
}
if ( text.length > max ) {
text = text.substr( 0, max - 1 - end ) + '…' + text.substr( text.length - end );
}
return '"' + text + '"';
};
/**
* Start timer 'label', analogous to JavaScript console timer.
* Usage: this.time( 'label' );
*
* @param string label Timer label
* @param[out] array timer Current time in milliseconds (float)
*/
this.time = function ( label ) {
this.timer[label] = new Date().getTime();
return;
};
/**
* Stop timer 'label', analogous to JavaScript console timer.
* Logs time in milliseconds since start to browser console.
* Usage: this.timeEnd( 'label' );
*
* @param string label Timer label
* @param bool noLog Do not log result
* @return float Time in milliseconds
*/
this.timeEnd = function ( label, noLog ) {
var diff = 0;
if ( this.timer[label] !== undefined ) {
var start = this.timer[label];
var stop = new Date().getTime();
diff = stop - start;
this.timer[label] = undefined;
if ( noLog !== true ) {
console.log( label + ': ' + diff.toFixed( 2 ) + ' ms' );
}
}
return diff;
};
/**
* Log recursion timer results to browser console.
* Usage: this.timeRecursionEnd();
*
* @param string text Text label for output
* @param[in] array recursionTimer Accumulated recursion times
*/
this.timeRecursionEnd = function ( text ) {
if ( this.recursionTimer.length > 1 ) {
// Subtract times spent in deeper recursions
var timerEnd = this.recursionTimer.length - 1;
for ( var i = 0; i < timerEnd; i ++ ) {
this.recursionTimer[i] -= this.recursionTimer[i + 1];
}
// Log recursion times
var timerLength = this.recursionTimer.length;
for ( var i = 0; i < timerLength; i ++ ) {
console.log( text + ' recursion ' + i + ': ' + this.recursionTimer[i].toFixed( 2 ) + ' ms' );
}
}
this.recursionTimer = [];
return;
};
/**
* Log variable values to debug console.
* Usage: this.debug( 'var', var );
*
* @param string name Object identifier
* @param mixed|undefined name Object to be logged
*/
this.debug = function ( name, object ) {
if ( object === undefined ) {
console.log( name );
}
else {
console.log( name + ': ' + object );
}
return;
};
/**
* Add script to document head.
*
* @param string code JavaScript code
*/
this.addScript = function ( code ) {
if ( document.getElementById( 'wikEdDiffBlockHandler' ) === null ) {
var script = document.createElement( 'script' );
script.id = 'wikEdDiffBlockHandler';
if ( script.innerText !== undefined ) {
script.innerText = code;
}
else {
script.textContent = code;
}
document.getElementsByTagName( 'head' )[0].appendChild( script );
}
return;
};
/**
* Add stylesheet to document head, cross-browser >= IE6.
*
* @param string css CSS code
*/
this.addStyleSheet = function ( css ) {
if ( document.getElementById( 'wikEdDiffStyles' ) === null ) {
// Replace mark symbols
css = css.replace( /\{cssMarkLeft\}/g, this.config.cssMarkLeft);
css = css.replace( /\{cssMarkRight\}/g, this.config.cssMarkRight);
var style = document.createElement( 'style' );
style.id = 'wikEdDiffStyles';
style.type = 'text/css';
if ( style.styleSheet !== undefined ) {
style.styleSheet.cssText = css;
}
else {
style.appendChild( document.createTextNode( css ) );
}
document.getElementsByTagName( 'head' )[0].appendChild( style );
}
return;
};
/**
* Recursive deep copy from target over source for customization import.
*
* @param object source Source object
* @param object target Target object
*/
this.deepCopy = function ( source, target ) {
for ( var key in source ) {
if ( Object.prototype.hasOwnProperty.call( source, key ) === true ) {
if ( typeof source[key] === 'object' ) {
this.deepCopy( source[key], target[key] );
}
else {
target[key] = source[key];
}
}
}
return;
};
// Initialze WikEdDiff object
this.init();
};
/**
* Data and methods for single text version (old or new one).
*
* @class WikEdDiffText
*/
WikEdDiff.WikEdDiffText = function ( text, parent ) {
/** @var WikEdDiff parent Parent object for configuration settings and debugging methods */
this.parent = parent;
/** @var string text Text of this version */
this.text = null;
/** @var array tokens Tokens list */
this.tokens = [];
/** @var int first, last First and last index of tokens list */
this.first = null;
this.last = null;
/** @var array words Word counts for version text */
this.words = {};
/**
* Constructor, initialize text object.
*
* @param string text Text of version
* @param WikEdDiff parent Parent, for configuration settings and debugging methods
*/
this.init = function () {
if ( typeof text !== 'string' ) {
text = text.toString();
}
// IE / Mac fix
this.text = text.replace( /\r\n?/g, '\n');
// Parse and count words and chunks for identification of unique real words
if ( this.parent.config.timer === true ) {
this.parent.time( 'wordParse' );
}
this.wordParse( this.parent.config.regExp.countWords );
this.wordParse( this.parent.config.regExp.countChunks );
if ( this.parent.config.timer === true ) {
this.parent.timeEnd( 'wordParse' );
}
return;
};
/**
* Parse and count words and chunks for identification of unique words.
*
* @param string regExp Regular expression for counting words
* @param[in] string text Text of version
* @param[out] array words Number of word occurrences
*/
this.wordParse = function ( regExp ) {
var regExpMatch = this.text.match( regExp );
if ( regExpMatch !== null ) {
var matchLength = regExpMatch.length;
for (var i = 0; i < matchLength; i ++) {
var word = regExpMatch[i];
if ( Object.prototype.hasOwnProperty.call( this.words, word ) === false ) {
this.words[word] = 1;
}
else {
this.words[word] ++;
}
}
}
return;
};
/**
* Split text into paragraph, line, sentence, chunk, word, or character tokens.
*
* @param string level Level of splitting: paragraph, line, sentence, chunk, word, or character
* @param int|null token Index of token to be split, otherwise uses full text
* @param[in] string text Full text to be split
* @param[out] array tokens Tokens list
* @param[out] int first, last First and last index of tokens list
*/
this.splitText = function ( level, token ) {
var prev = null;
var next = null;
var current = this.tokens.length;
var first = current;
var text = '';
// Split full text or specified token
if ( token === undefined ) {
text = this.text;
}
else {
prev = this.tokens[token].prev;
next = this.tokens[token].next;
text = this.tokens[token].token;
}
// Split text into tokens, regExp match as separator
var number = 0;
var split = [];
var regExpMatch;
var lastIndex = 0;
var regExp = this.parent.config.regExp.split[level];
while ( ( regExpMatch = regExp.exec( text ) ) !== null ) {
if ( regExpMatch.index > lastIndex ) {
split.push( text.substring( lastIndex, regExpMatch.index ) );
}
split.push( regExpMatch[0] );
lastIndex = regExp.lastIndex;
}
if ( lastIndex < text.length ) {
split.push( text.substring( lastIndex ) );
}
// Cycle through new tokens
var splitLength = split.length;
for ( var i = 0; i < splitLength; i ++ ) {
// Insert current item, link to previous
this.tokens.push( {
token: split[i],
prev: prev,
next: null,
link: null,
number: null,
unique: false
} );
number ++;
// Link previous item to current
if ( prev !== null ) {
this.tokens[prev].next = current;
}
prev = current;
current ++;
}
// Connect last new item and existing next item
if ( number > 0 && token !== undefined ) {
if ( prev !== null ) {
this.tokens[prev].next = next;
}
if ( next !== null ) {
this.tokens[next].prev = prev;
}
}
// Set text first and last token index
if ( number > 0 ) {
// Initial text split
if ( token === undefined ) {
this.first = 0;
this.last = prev;
}
// First or last token has been split
else {
if ( token === this.first ) {
this.first = first;
}
if ( token === this.last ) {
this.last = prev;
}
}
}
return;
};
/**
* Split unique unmatched tokens into smaller tokens.
*
* @param string level Level of splitting: line, sentence, chunk, or word
* @param[in] array tokens Tokens list
*/
this.splitRefine = function ( regExp ) {
// Cycle through tokens list
var i = this.first;
while ( i !== null ) {
// Refine unique unmatched tokens into smaller tokens
if ( this.tokens[i].link === null ) {
this.splitText( regExp, i );
}
i = this.tokens[i].next;
}
return;
};
/**
* Enumerate text token list before detecting blocks.
*
* @param[out] array tokens Tokens list
*/
this.enumerateTokens = function () {
// Enumerate tokens list
var number = 0;
var i = this.first;
while ( i !== null ) {
this.tokens[i].number = number;
number ++;
i = this.tokens[i].next;
}
return;
};
/**
* Dump tokens object to browser console.
*
* @param string name Text name
* @param[in] int first, last First and last index of tokens list
* @param[in] array tokens Tokens list
*/
this.debugText = function ( name ) {
var tokens = this.tokens;
var dump = 'first: ' + this.first + '\tlast: ' + this.last + '\n';
dump += '\ni \tlink \t(prev \tnext) \tuniq \t#num \t"token"\n';
var i = this.first;
while ( i !== null ) {
dump +=
i + ' \t' + tokens[i].link + ' \t(' + tokens[i].prev + ' \t' + tokens[i].next + ') \t' +
tokens[i].unique + ' \t#' + tokens[i].number + ' \t' +
parent.debugShortenText( tokens[i].token ) + '\n';
i = tokens[i].next;
}
console.log( name + ':\n' + dump );
return;
};
// Initialize WikEdDiffText object
this.init();
};
// </syntaxhighlight>