Nenhuma descrição

diff.js 19KB


  1. /* See LICENSE file for terms of use */
  2. /*
  3. * Text diff implementation.
  4. *
  5. * This library supports the following APIS:
  6. * JsDiff.diffChars: Character by character diff
  7. * JsDiff.diffWords: Word (as defined by \b regex) diff which ignores whitespace
  8. * JsDiff.diffLines: Line based diff
  9. *
  10. * JsDiff.diffCss: Diff targeted at CSS content
  11. *
  12. * These methods are based on the implementation proposed in
  13. * "An O(ND) Difference Algorithm and its Variations" (Myers, 1986).
  14. * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927
  15. */
  16. (function(global, undefined) {
  17. var JsDiff = (function() {
  18. /*jshint maxparams: 5*/
  19. /*istanbul ignore next*/
  20. function map(arr, mapper, that) {
  21. if (Array.prototype.map) {
  22. return Array.prototype.map.call(arr, mapper, that);
  23. }
  24. var other = new Array(arr.length);
  25. for (var i = 0, n = arr.length; i < n; i++) {
  26. other[i] = mapper.call(that, arr[i], i, arr);
  27. }
  28. return other;
  29. }
  30. function clonePath(path) {
  31. return { newPos: path.newPos, components: path.components.slice(0) };
  32. }
  33. function removeEmpty(array) {
  34. var ret = [];
  35. for (var i = 0; i < array.length; i++) {
  36. if (array[i]) {
  37. ret.push(array[i]);
  38. }
  39. }
  40. return ret;
  41. }
  42. function escapeHTML(s) {
  43. var n = s;
  44. n = n.replace(/&/g, '&amp;');
  45. n = n.replace(/</g, '&lt;');
  46. n = n.replace(/>/g, '&gt;');
  47. n = n.replace(/"/g, '&quot;');
  48. return n;
  49. }
  50. function buildValues(components, newString, oldString, useLongestToken) {
  51. var componentPos = 0,
  52. componentLen = components.length,
  53. newPos = 0,
  54. oldPos = 0;
  55. for (; componentPos < componentLen; componentPos++) {
  56. var component = components[componentPos];
  57. if (!component.removed) {
  58. if (!component.added && useLongestToken) {
  59. var value = newString.slice(newPos, newPos + component.count);
  60. value = map(value, function(value, i) {
  61. var oldValue = oldString[oldPos + i];
  62. return oldValue.length > value.length ? oldValue : value;
  63. });
  64. component.value = value.join('');
  65. } else {
  66. component.value = newString.slice(newPos, newPos + component.count).join('');
  67. }
  68. newPos += component.count;
  69. // Common case
  70. if (!component.added) {
  71. oldPos += component.count;
  72. }
  73. } else {
  74. component.value = oldString.slice(oldPos, oldPos + component.count).join('');
  75. oldPos += component.count;
  76. }
  77. }
  78. return components;
  79. }
  80. var Diff = function(ignoreWhitespace) {
  81. this.ignoreWhitespace = ignoreWhitespace;
  82. };
  83. Diff.prototype = {
  84. diff: function(oldString, newString, callback) {
  85. var self = this;
  86. function done(value) {
  87. if (callback) {
  88. setTimeout(function() { callback(undefined, value); }, 0);
  89. return true;
  90. } else {
  91. return value;
  92. }
  93. }
  94. // Handle the identity case (this is due to unrolling editLength == 0
  95. if (newString === oldString) {
  96. return done([{ value: newString }]);
  97. }
  98. if (!newString) {
  99. return done([{ value: oldString, removed: true }]);
  100. }
  101. if (!oldString) {
  102. return done([{ value: newString, added: true }]);
  103. }
  104. newString = this.tokenize(newString);
  105. oldString = this.tokenize(oldString);
  106. var newLen = newString.length, oldLen = oldString.length;
  107. var maxEditLength = newLen + oldLen;
  108. var bestPath = [{ newPos: -1, components: [] }];
  109. // Seed editLength = 0, i.e. the content starts with the same values
  110. var oldPos = this.extractCommon(bestPath[0], newString, oldString, 0);
  111. if (bestPath[0].newPos+1 >= newLen && oldPos+1 >= oldLen) {
  112. // Identity per the equality and tokenizer
  113. return done([{value: newString.join('')}]);
  114. }
  115. // Main worker method. checks all permutations of a given edit length for acceptance.
  116. function execEditLength() {
  117. for (var diagonalPath = -1*editLength; diagonalPath <= editLength; diagonalPath+=2) {
  118. var basePath;
  119. var addPath = bestPath[diagonalPath-1],
  120. removePath = bestPath[diagonalPath+1];
  121. oldPos = (removePath ? removePath.newPos : 0) - diagonalPath;
  122. if (addPath) {
  123. // No one else is going to attempt to use this value, clear it
  124. bestPath[diagonalPath-1] = undefined;
  125. }
  126. var canAdd = addPath && addPath.newPos+1 < newLen;
  127. var canRemove = removePath && 0 <= oldPos && oldPos < oldLen;
  128. if (!canAdd && !canRemove) {
  129. // If this path is a terminal then prune
  130. bestPath[diagonalPath] = undefined;
  131. continue;
  132. }
  133. // Select the diagonal that we want to branch from. We select the prior
  134. // path whose position in the new string is the farthest from the origin
  135. // and does not pass the bounds of the diff graph
  136. if (!canAdd || (canRemove && addPath.newPos < removePath.newPos)) {
  137. basePath = clonePath(removePath);
  138. self.pushComponent(basePath.components, undefined, true);
  139. } else {
  140. basePath = addPath; // No need to clone, we've pulled it from the list
  141. basePath.newPos++;
  142. self.pushComponent(basePath.components, true, undefined);
  143. }
  144. var oldPos = self.extractCommon(basePath, newString, oldString, diagonalPath);
  145. // If we have hit the end of both strings, then we are done
  146. if (basePath.newPos+1 >= newLen && oldPos+1 >= oldLen) {
  147. return done(buildValues(basePath.components, newString, oldString, self.useLongestToken));
  148. } else {
  149. // Otherwise track this path as a potential candidate and continue.
  150. bestPath[diagonalPath] = basePath;
  151. }
  152. }
  153. editLength++;
  154. }
  155. // Performs the length of edit iteration. Is a bit fugly as this has to support the
  156. // sync and async mode which is never fun. Loops over execEditLength until a value
  157. // is produced.
  158. var editLength = 1;
  159. if (callback) {
  160. (function exec() {
  161. setTimeout(function() {
  162. // This should not happen, but we want to be safe.
  163. /*istanbul ignore next */
  164. if (editLength > maxEditLength) {
  165. return callback();
  166. }
  167. if (!execEditLength()) {
  168. exec();
  169. }
  170. }, 0);
  171. })();
  172. } else {
  173. while(editLength <= maxEditLength) {
  174. var ret = execEditLength();
  175. if (ret) {
  176. return ret;
  177. }
  178. }
  179. }
  180. },
  181. pushComponent: function(components, added, removed) {
  182. var last = components[components.length-1];
  183. if (last && last.added === added && last.removed === removed) {
  184. // We need to clone here as the component clone operation is just
  185. // as shallow array clone
  186. components[components.length-1] = {count: last.count + 1, added: added, removed: removed };
  187. } else {
  188. components.push({count: 1, added: added, removed: removed });
  189. }
  190. },
  191. extractCommon: function(basePath, newString, oldString, diagonalPath) {
  192. var newLen = newString.length,
  193. oldLen = oldString.length,
  194. newPos = basePath.newPos,
  195. oldPos = newPos - diagonalPath,
  196. commonCount = 0;
  197. while (newPos+1 < newLen && oldPos+1 < oldLen && this.equals(newString[newPos+1], oldString[oldPos+1])) {
  198. newPos++;
  199. oldPos++;
  200. commonCount++;
  201. }
  202. if (commonCount) {
  203. basePath.components.push({count: commonCount});
  204. }
  205. basePath.newPos = newPos;
  206. return oldPos;
  207. },
  208. equals: function(left, right) {
  209. var reWhitespace = /\S/;
  210. return left === right || (this.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right));
  211. },
  212. tokenize: function(value) {
  213. return value.split('');
  214. }
  215. };
  216. var CharDiff = new Diff();
  217. var WordDiff = new Diff(true);
  218. var WordWithSpaceDiff = new Diff();
  219. WordDiff.tokenize = WordWithSpaceDiff.tokenize = function(value) {
  220. return removeEmpty(value.split(/(\s+|\b)/));
  221. };
  222. var CssDiff = new Diff(true);
  223. CssDiff.tokenize = function(value) {
  224. return removeEmpty(value.split(/([{}:;,]|\s+)/));
  225. };
  226. var LineDiff = new Diff();
  227. LineDiff.tokenize = function(value) {
  228. var retLines = [],
  229. lines = value.split(/^/m);
  230. for(var i = 0; i < lines.length; i++) {
  231. var line = lines[i],
  232. lastLine = lines[i - 1];
  233. // Merge lines that may contain windows new lines
  234. if (line === '\n' && lastLine && lastLine[lastLine.length - 1] === '\r') {
  235. retLines[retLines.length - 1] += '\n';
  236. } else if (line) {
  237. retLines.push(line);
  238. }
  239. }
  240. return retLines;
  241. };
  242. var SentenceDiff = new Diff();
  243. SentenceDiff.tokenize = function (value) {
  244. return removeEmpty(value.split(/(\S.+?[.!?])(?=\s+|$)/));
  245. };
  246. var JsonDiff = new Diff();
  247. // Discriminate between two lines of pretty-printed, serialized JSON where one of them has a
  248. // dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output:
  249. JsonDiff.useLongestToken = true;
  250. JsonDiff.tokenize = LineDiff.tokenize;
  251. JsonDiff.equals = function(left, right) {
  252. return LineDiff.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1'));
  253. };
  254. var objectPrototypeToString = Object.prototype.toString;
  255. // This function handles the presence of circular references by bailing out when encountering an
  256. // object that is already on the "stack" of items being processed.
  257. function canonicalize(obj, stack, replacementStack) {
  258. stack = stack || [];
  259. replacementStack = replacementStack || [];
  260. var i;
  261. for (var i = 0 ; i < stack.length ; i += 1) {
  262. if (stack[i] === obj) {
  263. return replacementStack[i];
  264. }
  265. }
  266. var canonicalizedObj;
  267. if ('[object Array]' === objectPrototypeToString.call(obj)) {
  268. stack.push(obj);
  269. canonicalizedObj = new Array(obj.length);
  270. replacementStack.push(canonicalizedObj);
  271. for (i = 0 ; i < obj.length ; i += 1) {
  272. canonicalizedObj[i] = canonicalize(obj[i], stack, replacementStack);
  273. }
  274. stack.pop();
  275. replacementStack.pop();
  276. } else if (typeof obj === 'object' && obj !== null) {
  277. stack.push(obj);
  278. canonicalizedObj = {};
  279. replacementStack.push(canonicalizedObj);
  280. var sortedKeys = [];
  281. for (var key in obj) {
  282. sortedKeys.push(key);
  283. }
  284. sortedKeys.sort();
  285. for (i = 0 ; i < sortedKeys.length ; i += 1) {
  286. var key = sortedKeys[i];
  287. canonicalizedObj[key] = canonicalize(obj[key], stack, replacementStack);
  288. }
  289. stack.pop();
  290. replacementStack.pop();
  291. } else {
  292. canonicalizedObj = obj;
  293. }
  294. return canonicalizedObj;
  295. }
  296. return {
  297. Diff: Diff,
  298. diffChars: function(oldStr, newStr, callback) { return CharDiff.diff(oldStr, newStr, callback); },
  299. diffWords: function(oldStr, newStr, callback) { return WordDiff.diff(oldStr, newStr, callback); },
  300. diffWordsWithSpace: function(oldStr, newStr, callback) { return WordWithSpaceDiff.diff(oldStr, newStr, callback); },
  301. diffLines: function(oldStr, newStr, callback) { return LineDiff.diff(oldStr, newStr, callback); },
  302. diffSentences: function(oldStr, newStr, callback) { return SentenceDiff.diff(oldStr, newStr, callback); },
  303. diffCss: function(oldStr, newStr, callback) { return CssDiff.diff(oldStr, newStr, callback); },
  304. diffJson: function(oldObj, newObj, callback) {
  305. return JsonDiff.diff(
  306. typeof oldObj === 'string' ? oldObj : JSON.stringify(canonicalize(oldObj), undefined, ' '),
  307. typeof newObj === 'string' ? newObj : JSON.stringify(canonicalize(newObj), undefined, ' '),
  308. callback
  309. );
  310. },
  311. createPatch: function(fileName, oldStr, newStr, oldHeader, newHeader) {
  312. var ret = [];
  313. ret.push('Index: ' + fileName);
  314. ret.push('===================================================================');
  315. ret.push('--- ' + fileName + (typeof oldHeader === 'undefined' ? '' : '\t' + oldHeader));
  316. ret.push('+++ ' + fileName + (typeof newHeader === 'undefined' ? '' : '\t' + newHeader));
  317. var diff = LineDiff.diff(oldStr, newStr);
  318. if (!diff[diff.length-1].value) {
  319. diff.pop(); // Remove trailing newline add
  320. }
  321. diff.push({value: '', lines: []}); // Append an empty value to make cleanup easier
  322. function contextLines(lines) {
  323. return map(lines, function(entry) { return ' ' + entry; });
  324. }
  325. function eofNL(curRange, i, current) {
  326. var last = diff[diff.length-2],
  327. isLast = i === diff.length-2,
  328. isLastOfType = i === diff.length-3 && (current.added !== last.added || current.removed !== last.removed);
  329. // Figure out if this is the last line for the given file and missing NL
  330. if (!/\n$/.test(current.value) && (isLast || isLastOfType)) {
  331. curRange.push('\\ No newline at end of file');
  332. }
  333. }
  334. var oldRangeStart = 0, newRangeStart = 0, curRange = [],
  335. oldLine = 1, newLine = 1;
  336. for (var i = 0; i < diff.length; i++) {
  337. var current = diff[i],
  338. lines = current.lines || current.value.replace(/\n$/, '').split('\n');
  339. current.lines = lines;
  340. if (current.added || current.removed) {
  341. if (!oldRangeStart) {
  342. var prev = diff[i-1];
  343. oldRangeStart = oldLine;
  344. newRangeStart = newLine;
  345. if (prev) {
  346. curRange = contextLines(prev.lines.slice(-4));
  347. oldRangeStart -= curRange.length;
  348. newRangeStart -= curRange.length;
  349. }
  350. }
  351. curRange.push.apply(curRange, map(lines, function(entry) { return (current.added?'+':'-') + entry; }));
  352. eofNL(curRange, i, current);
  353. if (current.added) {
  354. newLine += lines.length;
  355. } else {
  356. oldLine += lines.length;
  357. }
  358. } else {
  359. if (oldRangeStart) {
  360. // Close out any changes that have been output (or join overlapping)
  361. if (lines.length <= 8 && i < diff.length-2) {
  362. // Overlapping
  363. curRange.push.apply(curRange, contextLines(lines));
  364. } else {
  365. // end the range and output
  366. var contextSize = Math.min(lines.length, 4);
  367. ret.push(
  368. '@@ -' + oldRangeStart + ',' + (oldLine-oldRangeStart+contextSize)
  369. + ' +' + newRangeStart + ',' + (newLine-newRangeStart+contextSize)
  370. + ' @@');
  371. ret.push.apply(ret, curRange);
  372. ret.push.apply(ret, contextLines(lines.slice(0, contextSize)));
  373. if (lines.length <= 4) {
  374. eofNL(ret, i, current);
  375. }
  376. oldRangeStart = 0; newRangeStart = 0; curRange = [];
  377. }
  378. }
  379. oldLine += lines.length;
  380. newLine += lines.length;
  381. }
  382. }
  383. return ret.join('\n') + '\n';
  384. },
  385. applyPatch: function(oldStr, uniDiff) {
  386. var diffstr = uniDiff.split('\n');
  387. var diff = [];
  388. var remEOFNL = false,
  389. addEOFNL = false;
  390. for (var i = (diffstr[0][0]==='I'?4:0); i < diffstr.length; i++) {
  391. if(diffstr[i][0] === '@') {
  392. var meh = diffstr[i].split(/@@ -(\d+),(\d+) \+(\d+),(\d+) @@/);
  393. diff.unshift({
  394. start:meh[3],
  395. oldlength:meh[2],
  396. oldlines:[],
  397. newlength:meh[4],
  398. newlines:[]
  399. });
  400. } else if(diffstr[i][0] === '+') {
  401. diff[0].newlines.push(diffstr[i].substr(1));
  402. } else if(diffstr[i][0] === '-') {
  403. diff[0].oldlines.push(diffstr[i].substr(1));
  404. } else if(diffstr[i][0] === ' ') {
  405. diff[0].newlines.push(diffstr[i].substr(1));
  406. diff[0].oldlines.push(diffstr[i].substr(1));
  407. } else if(diffstr[i][0] === '\\') {
  408. if (diffstr[i-1][0] === '+') {
  409. remEOFNL = true;
  410. } else if(diffstr[i-1][0] === '-') {
  411. addEOFNL = true;
  412. }
  413. }
  414. }
  415. var str = oldStr.split('\n');
  416. for (var i = diff.length - 1; i >= 0; i--) {
  417. var d = diff[i];
  418. for (var j = 0; j < d.oldlength; j++) {
  419. if(str[d.start-1+j] !== d.oldlines[j]) {
  420. return false;
  421. }
  422. }
  423. Array.prototype.splice.apply(str,[d.start-1,+d.oldlength].concat(d.newlines));
  424. }
  425. if (remEOFNL) {
  426. while (!str[str.length-1]) {
  427. str.pop();
  428. }
  429. } else if (addEOFNL) {
  430. str.push('');
  431. }
  432. return str.join('\n');
  433. },
  434. convertChangesToXML: function(changes){
  435. var ret = [];
  436. for ( var i = 0; i < changes.length; i++) {
  437. var change = changes[i];
  438. if (change.added) {
  439. ret.push('<ins>');
  440. } else if (change.removed) {
  441. ret.push('<del>');
  442. }
  443. ret.push(escapeHTML(change.value));
  444. if (change.added) {
  445. ret.push('</ins>');
  446. } else if (change.removed) {
  447. ret.push('</del>');
  448. }
  449. }
  450. return ret.join('');
  451. },
  452. // See: http://code.google.com/p/google-diff-match-patch/wiki/API
  453. convertChangesToDMP: function(changes){
  454. var ret = [], change;
  455. for ( var i = 0; i < changes.length; i++) {
  456. change = changes[i];
  457. ret.push([(change.added ? 1 : change.removed ? -1 : 0), change.value]);
  458. }
  459. return ret;
  460. },
  461. canonicalize: canonicalize
  462. };
  463. })();
  464. /*istanbul ignore next */
  465. if (typeof module !== 'undefined' && module.exports) {
  466. module.exports = JsDiff;
  467. }
  468. else if (typeof define === 'function' && define.amd) {
  469. /*global define */
  470. define([], function() { return JsDiff; });
  471. }
  472. else if (typeof global.JsDiff === 'undefined') {
  473. global.JsDiff = JsDiff;
  474. }
  475. })(this);