From 5e2808445a547318c0be154a5c7211e2674bb867 Mon Sep 17 00:00:00 2001 From: khiemnd777 Date: Fri, 5 Oct 2018 11:49:32 +0700 Subject: [PATCH] [add] item match serves object hash --- Src/JsonDiffPatchDotNet.UnitTests/DiffUnitTests.cs | 22 ++- Src/JsonDiffPatchDotNet/ArrayDiffMode.cs | 2 +- Src/JsonDiffPatchDotNet/DefaultItemMatch.cs | 17 ++ Src/JsonDiffPatchDotNet/ItemMatch.cs | 49 ++++++ Src/JsonDiffPatchDotNet/JsonDiffPatch.cs | 174 ++++++++++++--------- Src/JsonDiffPatchDotNet/Lcs.cs | 78 +++++---- Src/JsonDiffPatchDotNet/Options.cs | 61 +++++--- 7 files changed, 262 insertions(+), 141 deletions(-) create mode 100644 Src/JsonDiffPatchDotNet/DefaultItemMatch.cs create mode 100644 Src/JsonDiffPatchDotNet/ItemMatch.cs diff --git a/Src/JsonDiffPatchDotNet.UnitTests/DiffUnitTests.cs b/Src/JsonDiffPatchDotNet.UnitTests/DiffUnitTests.cs index 5b517c2..9e7abb7 100644 --- a/Src/JsonDiffPatchDotNet.UnitTests/DiffUnitTests.cs +++ b/Src/JsonDiffPatchDotNet.UnitTests/DiffUnitTests.cs @@ -224,9 +224,9 @@ namespace JsonDiffPatchDotNet.UnitTests [Test] public void Diff_EfficientArrayDiffSameLengthNested_ValidDiff() { - var jdp = new JsonDiffPatch(new Options { ArrayDiff = ArrayDiffMode.Efficient }); - var left = JToken.Parse(@"[1,2,{""p"":false},4]"); - var right = JToken.Parse(@"[1,2,{""p"":true},4]"); + var jdp = new JsonDiffPatch(new Options { ArrayDiff = ArrayDiffMode.Efficient, ObjectHash = (jObj) => jObj["Id"].Value() }); + var left = JToken.Parse(@"[1,2,{""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC8"", ""p"":false},4]"); + var right = JToken.Parse(@"[1,2,{""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC8"", ""p"":true},4]"); JObject diff = jdp.Diff(left, right) as JObject; @@ -235,7 +235,21 @@ namespace JsonDiffPatchDotNet.UnitTests Assert.IsNotNull(diff["2"]); } - [Test] + [Test] + public void Diff_EfficientArrayDiffWithComplexObject_ValidDiff() + { + var jdp = new JsonDiffPatch(new Options { ArrayDiff = ArrayDiffMode.Efficient, ObjectHash = (jObj) => jObj["Id"].Value() }); + //var jdp = new JsonDiffPatch(new Options { ArrayDiff = ArrayDiffMode.Efficient }); + var left = JToken.Parse(@"[{""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC8"", ""p"":false}, {""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC9"", ""p"":true}]"); + var right = JToken.Parse(@"[{""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC8"", ""p"":true}, {""Id"" : ""F12B21EF-F57D-4958-ADDC-A3F52EC25EC10"", ""p"":false}]"); + + JObject diff = jdp.Diff(left, right) as JObject; + + Assert.IsNotNull(diff); + Assert.AreEqual(4, diff.Properties().Count()); + } + + [Test] public void Diff_EfficientArrayDiffSameWithObject_NoDiff() { var jdp = new JsonDiffPatch(new Options { ArrayDiff = ArrayDiffMode.Efficient }); diff --git a/Src/JsonDiffPatchDotNet/ArrayDiffMode.cs b/Src/JsonDiffPatchDotNet/ArrayDiffMode.cs index c53f106..5b50844 100644 --- a/Src/JsonDiffPatchDotNet/ArrayDiffMode.cs +++ b/Src/JsonDiffPatchDotNet/ArrayDiffMode.cs @@ -16,6 +16,6 @@ /// the entire left and entire right arrays are added to the patch document as a simple /// JSON token replace. If they are the same, then token is skipped in the patch document. /// - Simple, + Simple } } diff --git a/Src/JsonDiffPatchDotNet/DefaultItemMatch.cs b/Src/JsonDiffPatchDotNet/DefaultItemMatch.cs new file mode 100644 index 0000000..a8da2d3 --- /dev/null +++ b/Src/JsonDiffPatchDotNet/DefaultItemMatch.cs @@ -0,0 +1,17 @@ +using Newtonsoft.Json.Linq; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace JsonDiffPatchDotNet +{ + public class DefaultItemMatch : ItemMatch + { + public DefaultItemMatch(Func objectHash):base(objectHash) + { + + } + } +} diff --git a/Src/JsonDiffPatchDotNet/ItemMatch.cs b/Src/JsonDiffPatchDotNet/ItemMatch.cs new file mode 100644 index 0000000..4010c28 --- /dev/null +++ b/Src/JsonDiffPatchDotNet/ItemMatch.cs @@ -0,0 +1,49 @@ +using Newtonsoft.Json.Linq; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace JsonDiffPatchDotNet +{ + public abstract class ItemMatch + { + internal Func ObjectHash; + + protected ItemMatch() + { + + } + + protected ItemMatch(Func objectHash) + { + ObjectHash = objectHash; + } + + public virtual bool Match(JToken object1, JToken object2) + { + return Match(object1, object2, ObjectHash); + } + + public virtual bool Match(JToken object1, JToken object2, Func objectHash) + { + if(objectHash == null || object1.Type != JTokenType.Object) + { + return JToken.DeepEquals(object1, object2); + } + + var hash1 = objectHash.Invoke(object1); + if(hash1 == null) + { + return false; + } + var hash2 = objectHash.Invoke(object2); + if(hash2 == null) + { + return false; + } + return hash1.Equals(hash2); + } + } +} diff --git a/Src/JsonDiffPatchDotNet/JsonDiffPatch.cs b/Src/JsonDiffPatchDotNet/JsonDiffPatch.cs index 585da83..80838d3 100644 --- a/Src/JsonDiffPatchDotNet/JsonDiffPatch.cs +++ b/Src/JsonDiffPatchDotNet/JsonDiffPatch.cs @@ -37,6 +37,9 @@ namespace JsonDiffPatchDotNet /// JSON Patch Document public JToken Diff(JToken left, JToken right) { + var objectHash = this._options.ObjectHash; + var itemMatch = new DefaultItemMatch(objectHash); + if (left == null) left = new JValue(""); if (right == null) @@ -66,7 +69,7 @@ namespace JsonDiffPatchDotNet : null; } - if (!JToken.DeepEquals(left, right)) + if (!itemMatch.Match(left, right)) return new JArray(left, right); return null; @@ -356,92 +359,117 @@ namespace JsonDiffPatchDotNet return null; } - private JObject ArrayDiff(JArray left, JArray right) - { - var result = JObject.Parse(@"{ ""_t"": ""a"" }"); + private JObject ArrayDiff(JArray left, JArray right) + { + var objectHash = this._options.ObjectHash; + var itemMatch = new DefaultItemMatch(objectHash); + var result = JObject.Parse(@"{ ""_t"": ""a"" }"); - int commonHead = 0; - int commonTail = 0; + int commonHead = 0; + int commonTail = 0; - if (JToken.DeepEquals(left, right)) - return null; + if (itemMatch.Match(left, right)) + return null; - // Find common head - while (commonHead < left.Count - && commonHead < right.Count - && JToken.DeepEquals(left[commonHead], right[commonHead])) - { - commonHead++; - } + var childContext = new List(); - // Find common tail - while (commonTail + commonHead < left.Count - && commonTail + commonHead < right.Count - && JToken.DeepEquals(left[left.Count - 1 - commonTail], right[right.Count - 1 - commonTail])) - { - commonTail++; - } + // Find common head + while (commonHead < left.Count + && commonHead < right.Count + && itemMatch.Match(left[commonHead], right[commonHead])) + { + var index = commonHead; + var child = Diff(left[index], right[index]); + if(child != null) + { + result[$"{index}"] = child; + } + commonHead++; + } - if (commonHead + commonTail == left.Count) - { - // Trivial case, a block (1 or more consecutive items) was added - for (int index = commonHead; index < right.Count - commonTail; ++index) - { - result[$"{index}"] = new JArray(right[index]); - } + // Find common tail + while (commonTail + commonHead < left.Count + && commonTail + commonHead < right.Count + && itemMatch.Match(left[left.Count - 1 - commonTail], right[right.Count - 1 - commonTail])) + { + var index1 = left.Count - 1 - commonTail; + var index2 = right.Count - 1 - commonTail; + var child = Diff(left[index1], right[index2]); + if(child != null) + { + result[$"{index2}"] = child; + } + commonTail++; + } - return result; - } - if (commonHead + commonTail == right.Count) - { - // Trivial case, a block (1 or more consecutive items) was removed - for (int index = commonHead; index < left.Count - commonTail; ++index) - { - result[$"_{index}"] = new JArray(left[index], 0, (int)DiffOperation.Deleted); - } + if (commonHead + commonTail == left.Count) + { + // Trivial case, a block (1 or more consecutive items) was added + for (int index = commonHead; index < right.Count - commonTail; ++index) + { + result[$"{index}"] = new JArray(right[index]); + } - return result; - } + return result; + } + if (commonHead + commonTail == right.Count) + { + // Trivial case, a block (1 or more consecutive items) was removed + for (int index = commonHead; index < left.Count - commonTail; ++index) + { + if (result.ContainsKey(index.ToString())) + { + result.Remove(index.ToString()); + } + result[$"_{index}"] = new JArray(left[index], 0, (int)DiffOperation.Deleted); + } - // Complex Diff, find the LCS (Longest Common Subsequence) - List trimmedLeft = left.ToList().GetRange(commonHead, left.Count - commonTail - commonHead); - List trimmedRight = right.ToList().GetRange(commonHead, right.Count - commonTail - commonHead); - Lcs lcs = Lcs.Get(trimmedLeft, trimmedRight); + return result; + } - for (int index = commonHead; index < left.Count - commonTail; ++index) - { - if (lcs.Indices1.IndexOf(index - commonHead) < 0) - { - // Removed - result[$"_{index}"] = new JArray(left[index], 0, (int)DiffOperation.Deleted); - } - } + // Complex Diff, find the LCS (Longest Common Subsequence) + List trimmedLeft = left.ToList().GetRange(commonHead, left.Count - commonTail - commonHead); + List trimmedRight = right.ToList().GetRange(commonHead, right.Count - commonTail - commonHead); + Lcs lcs = Lcs.Get(trimmedLeft, trimmedRight, itemMatch); - for (int index = commonHead; index < right.Count - commonTail; index++) - { - int indexRight = lcs.Indices2.IndexOf(index - commonHead); + for (int index = commonHead; index < left.Count - commonTail; ++index) + { + if (lcs.Indices1.IndexOf(index - commonHead) < 0) + { + // Removed + if (result.ContainsKey(index.ToString())) + { + result.Remove(index.ToString()); + } + result[$"_{index}"] = new JArray(left[index], 0, (int)DiffOperation.Deleted); + } + } - if (indexRight < 0) - { - // Added - result[$"{index}"] = new JArray(right[index]); - } - else - { - int li = lcs.Indices1[indexRight] + commonHead; - int ri = lcs.Indices2[indexRight] + commonHead; + for (int index = commonHead; index < right.Count - commonTail; index++) + { + int indexRight = lcs.Indices2.IndexOf(index - commonHead); - JToken diff = Diff(left[li], right[ri]); + if (indexRight < 0) + { + // Added + result[$"{index}"] = new JArray(right[index]); + } + else + { + int li = lcs.Indices1[indexRight] + commonHead; + int ri = lcs.Indices2[indexRight] + commonHead; - if (diff != null) - { - result[$"{index}"] = diff; - } - } - } + JToken diff = Diff(left[li], right[ri]); - return result; - } + if (diff != null) + { + result[$"{index}"] = diff; + } + } + } + + return result; + } private JObject ObjectPatch(JObject obj, JObject patch) { diff --git a/Src/JsonDiffPatchDotNet/Lcs.cs b/Src/JsonDiffPatchDotNet/Lcs.cs index 04ed310..26e103b 100644 --- a/Src/JsonDiffPatchDotNet/Lcs.cs +++ b/Src/JsonDiffPatchDotNet/Lcs.cs @@ -4,51 +4,51 @@ using Newtonsoft.Json.Linq; namespace JsonDiffPatchDotNet { - internal class Lcs - { - internal List Sequence { get; set; } + internal class Lcs + { + internal List Sequence { get; set; } - internal List Indices1 { get; set; } + internal List Indices1 { get; set; } - internal List Indices2 { get; set; } + internal List Indices2 { get; set; } - private Lcs() - { - Sequence = new List(); - Indices1 = new List(); - Indices2 = new List(); - } + private Lcs() + { + Sequence = new List(); + Indices1 = new List(); + Indices2 = new List(); + } - internal static Lcs Get(List left, List right) - { - var matrix = LcsInternal(left, right); - var result = Backtrack(matrix, left, right, left.Count, right.Count); - return result; - } + internal static Lcs Get(List left, List right, ItemMatch match) + { + var matrix = LcsInternal(left, right, match); + var result = Backtrack(matrix, left, right, left.Count, right.Count, match); + return result; + } - private static int[,] LcsInternal(List left, List right) - { - var arr = new int[left.Count + 1, right.Count + 1]; + private static int[,] LcsInternal(List left, List right, ItemMatch match) + { + var arr = new int[left.Count + 1, right.Count + 1]; - for (int i = 1; i <= left.Count; i++) - { - for (int j = 1; j <= right.Count; j++) - { - if (JToken.DeepEquals(left[i - 1], right[j - 1])) - { - arr[i, j] = arr[i - 1, j - 1] + 1; - } - else - { - arr[i, j] = Math.Max(arr[i - 1, j], arr[i, j - 1]); - } - } - } + for (int i = 1; i <= left.Count; i++) + { + for (int j = 1; j <= right.Count; j++) + { + if (match.Match(left[i - 1], right[j - 1])) + { + arr[i, j] = arr[i - 1, j - 1] + 1; + } + else + { + arr[i, j] = Math.Max(arr[i - 1, j], arr[i, j - 1]); + } + } + } - return arr; - } + return arr; + } - private static Lcs Backtrack(int[,] matrix, List left, List right, int li, int ri) + private static Lcs Backtrack(int[,] matrix, List left, List right, int li, int ri, ItemMatch match) { var result = new Lcs(); for (int i = 1, j = 1; i <= li && j <= ri;) @@ -56,9 +56,7 @@ namespace JsonDiffPatchDotNet // If the JSON tokens at the same position are both Objects or both Arrays, we just say they // are the same even if they are not, because we can package smaller deltas than an entire // object or array replacement by doing object to object or array to array diff. - if (JToken.DeepEquals(left[i - 1], right[j - 1]) - || left[i - 1].Type == JTokenType.Object && right[j - 1].Type == JTokenType.Object - || left[i - 1].Type == JTokenType.Array && right[j - 1].Type == JTokenType.Array) + if (match.Match(left[i - 1], right[j - 1])) { result.Sequence.Add(left[i - 1]); result.Indices1.Add(i - 1); diff --git a/Src/JsonDiffPatchDotNet/Options.cs b/Src/JsonDiffPatchDotNet/Options.cs index 04a74b7..7bc9121 100644 --- a/Src/JsonDiffPatchDotNet/Options.cs +++ b/Src/JsonDiffPatchDotNet/Options.cs @@ -1,28 +1,43 @@ -namespace JsonDiffPatchDotNet +using System; +using Newtonsoft.Json.Linq; + +namespace JsonDiffPatchDotNet { - public sealed class Options - { - public Options() - { - ArrayDiff = ArrayDiffMode.Efficient; - TextDiff = TextDiffMode.Efficient; - MinEfficientTextDiffLength = 50; - } + public sealed class Options + { + public Options() + { + ArrayDiff = ArrayDiffMode.Efficient; + TextDiff = TextDiffMode.Efficient; + MinEfficientTextDiffLength = 50; + } - /// - /// Specifies how arrays are diffed. The default is Simple. - /// - public ArrayDiffMode ArrayDiff { get; set; } + /// + /// Specifies how arrays are diffed. The default is Simple. + /// + public ArrayDiffMode ArrayDiff { get; set; } - /// - /// Specifies how string values are diffed. The default is Efficient. - /// - public TextDiffMode TextDiff { get; set; } + /// + /// Specifies how string values are diffed. The default is Efficient. + /// + public TextDiffMode TextDiff { get; set; } - /// - /// The minimum string length required to use Efficient text diff. If the minimum - /// length is not met, simple text diff will be used. The default length is 50 characters. - /// - public long MinEfficientTextDiffLength { get; set; } - } + /// + /// The minimum string length required to use Efficient text diff. If the minimum + /// length is not met, simple text diff will be used. The default length is 50 characters. + /// + public long MinEfficientTextDiffLength { get; set; } + + /// + /// for LCS to work, it needs a way to match items between previous/original (or left/right) arrays. In traditional text diff tools this is trivial, as two lines of text are compared char + /// char. + /// When no matches by reference or value are found, array diffing fallbacks to a dumb behavior: matching items by position. + /// Matching by position is not the most efficient option (eg. if an item is added at the first position, all the items below will be considered modified), but it produces expected results + /// in most trivial cases.This is good enough as soon as movements/insertions/deletions only happen near the bottom of the array. + /// This is because if 2 objects are not equal by reference(ie.the same object) both objects are considered different values, as there is no trivial solution to compare two arbitrary objects + /// in JavaScript. + /// To improve the results leveraging the power of LCS(and position move detection) you need to provide a way to compare 2 objects. + /// + public Func ObjectHash { get; set; } + } }