1 module dshould.stringcmp;
2 
3 import std.algorithm : map;
4 import std.format : format;
5 import std.range;
6 import std.typecons;
7 import dshould.ShouldType;
8 import dshould.basic;
9 
10 /**
11  * Given string types, this version of the word `.equal` implements color-coded diffs.
12  * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically.
13  */
14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__)
15 if (isInstanceOf!(ShouldType, Should) && is(T == string))
16 {
17     auto got = should.got();
18 
19     should.allowOnlyWords!().before!"equal (string)";
20     should.terminateChain;
21 
22     if (got != expected)
23     {
24         stringCmpError(got, expected, Yes.quote, file, line);
25     }
26 }
27 
28 package void stringCmpError(string got, string expected, Flag!"quote" quote, string file, size_t line) pure @safe
29 {
30     import std.algorithm : canFind;
31 
32     string original;
33     string diff;
34     const bool multiline = got.canFind("\n");
35 
36     if (multiline)
37     {
38         auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n"));
39 
40         original = diffPair.original.join("\n");
41         diff = diffPair.target.join("\n");
42     }
43     else
44     {
45         auto diffPair = oneLineDiff(expected, got);
46 
47         original = diffPair.original;
48         diff = diffPair.target;
49     }
50 
51     const string fmt = (multiline ? "\n" : "") ~ (quote ? "'%s'" : "%s");
52 
53     throw new FluentError(
54         format(fmt, original),
55         format(fmt, diff),
56         file, line
57     );
58 }
59 
60 public auto oneLineDiff(string expected, string text) pure @safe
61 {
62     alias removePred = text => red(text);
63     alias addPred = text => green(text);
64     alias keepPred = text => text;
65     alias ditchPred = lines => string.init;
66 
67     auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text);
68     auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text);
69 
70     alias DiffPair = Tuple!(string, "original", string, "target");
71 
72     if (originalColored.isNull)
73     {
74         return DiffPair(expected, text);
75     }
76     else
77     {
78         return DiffPair(originalColored.get, targetColored.get);
79     }
80 }
81 
82 @("collates successive replacements")
83 @safe unittest
84 {
85     const expectedOriginal = "Hello W" ~ red("or") ~ "ld";
86     const expectedTarget = "Hello W" ~ green("ey") ~ "ld";
87     const diff = "Hello World".oneLineDiff("Hello Weyld");
88 
89     (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget);
90 }
91 
92 @("does not colorize diff view for strings that are too different")
93 @safe unittest
94 {
95     const diff = "Hello World".oneLineDiff("Goodbye Universe");
96 
97     (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe");
98 }
99 
100 @("tries to not change diff mode too often")
101 @safe unittest
102 {
103     const cleanDiff = `method="` ~ green(`multiply`) ~ `"`;
104 
105     `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff);
106 }
107 
108 @safe unittest
109 {
110     const originalText = "test";
111     const targetText = "test, but";
112 
113     with (originalText.oneLineDiff(targetText))
114     {
115         (original ~ target).should.equal(originalText ~ originalText ~ green(", but"));
116     }
117 }
118 
119 public auto multiLineDiff(string[] expected, string[] text) @safe
120 {
121     alias removePred = lines => lines.map!(line => red("-" ~ line));
122     alias addPred = lines => lines.map!(line => green("+" ~ line));
123     alias keepPred = lines => lines.map!(line => " " ~ line);
124     alias ditchPred = lines => (string[]).init;
125 
126     auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text);
127     auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text);
128 
129     alias DiffPair = Tuple!(string[], "original", string[], "target");
130 
131     if (originalColored.isNull)
132     {
133         return DiffPair(expected, text);
134     }
135     else
136     {
137         return DiffPair(originalColored.get, targetColored.get);
138     }
139 }
140 
141 @("supports multiline diff")
142 @safe unittest
143 {
144     import std.string : join, split;
145 
146     // given
147     const originalText = `{
148         "int": 1,
149         "array": ["XX"],
150         "timecodeBegin": "2003-02-01T12:00:00Z",
151         "timecodeEnd": "2003-02-01T14:00:00Z",
152         "enum": "ENUM",
153         "lastEntry": "goodbye"
154     }`;
155     const modifiedText = `{
156         "int": 1,
157         "array": ["XX"],
158         "enum": "ENUM",
159         "somethingElse": "goes here",
160         "lastEntry": "goodbye"
161     }`;
162 
163     // then
164     const patchTextOriginal = ` {
165          "int": 1,
166          "array": ["XX"],
167 ` ~ red(`-        "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ `
168 ` ~ red(`-        "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ `
169          "enum": "ENUM",
170          "lastEntry": "goodbye"
171      }`;
172 
173     const patchTextTarget = ` {
174          "int": 1,
175          "array": ["XX"],
176          "enum": "ENUM",
177 ` ~ green(`+        "somethingElse": "goes here",`) ~ `
178          "lastEntry": "goodbye"
179      }`;
180 
181     const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n"));
182 
183     (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget);
184 }
185 
186 @("supports comparison of large strings")
187 @safe unittest
188 {
189     import std.string : join, split;
190 
191     // given
192     const repetitions = 500/"Hello World".length;
193     const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join;
194     const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join;
195 
196     originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan
197 }
198 
199 // TODO bracket crossing cost
200 public Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)(
201     T expected, T text, Flag!"forceDiff" forceDiff = No.forceDiff) pure @trusted
202 {
203     import std.algorithm : count;
204     import std.array : Appender, empty;
205     import std.range : dropOne, front;
206 
207     Appender!T diff;
208     diff.reserve(text.length);
209 
210     // preds are called with continuous runs
211     Appender!(ElementType!T[]) addBuffer;
212     Appender!(ElementType!T[]) removeBuffer;
213 
214     auto levenshtein = Levenshtein!T(expected, text);
215     const path = levenshtein.reconstructPath;
216 
217     if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length && !forceDiff)
218     {
219         return Nullable!T.init; // no diff view, too different
220     }
221 
222     void flushAdd() @safe
223     {
224         if (!addBuffer.data.empty)
225         {
226             diff ~= addPred(addBuffer.data);
227             addBuffer.clear;
228         }
229     }
230 
231     void flushRemove() @safe
232     {
233         if (!removeBuffer.data.empty)
234         {
235             diff ~= removePred(removeBuffer.data);
236             removeBuffer.clear;
237         }
238     }
239 
240     void add(ElementType!T element) @safe
241     {
242         flushRemove;
243         addBuffer ~= element;
244     }
245 
246     void remove(ElementType!T element) @safe
247     {
248         flushAdd;
249         removeBuffer ~= element;
250     }
251 
252     void flush() @safe
253     {
254         flushRemove;
255         flushAdd;
256     }
257 
258     void same(ElementType!T element) @safe
259     {
260         flush;
261         diff ~= keepPred([element]);
262     }
263 
264     foreach (editOp; path)
265     {
266         final switch (editOp)
267         {
268             case levenshtein.EditOp.Type.keep:
269                 same(text.front);
270                 text = text.dropOne;
271                 expected = expected.dropOne;
272                 break;
273             case levenshtein.EditOp.Type.insert:
274                 add(text.front);
275                 text = text.dropOne;
276                 break;
277             case levenshtein.EditOp.Type.remove:
278                 remove(expected.front);
279                 expected = expected.dropOne;
280                 break;
281         }
282     }
283 
284     assert(text.empty, format!`leftover %s`(text));
285     assert(expected.empty);
286 
287     flush;
288 
289     return diff.data.nullable;
290 }
291 
292 package auto red(T)(T text)
293 {
294     return RED_CODE ~ text ~ CLEAR_CODE;
295 }
296 
297 package auto green(T)(T text)
298 {
299     return GREEN_CODE ~ text ~ CLEAR_CODE;
300 }
301 
302 private enum RED_CODE = "\x1b[31m";
303 private enum GREEN_CODE = "\x1b[32m";
304 private enum CLEAR_CODE = "\x1b[39m";
305 
306 /**
307  * Modified Levenshtein distance from from std.algorithm
308  * Given two ranges, returns a sequence of insertions and deletions
309  * that turn the first range into the second.
310  * This is equivalent to diff computation, though it's
311  * comparatively inefficient at O(n^2) memory and runtime.
312  *
313  * This version adds customizable path cost, used to implement orphan avoidance.
314  */
315 private struct Levenshtein(Range)
316 {
317     @disable this();
318 
319     public this(Range s, Range t) @safe
320     {
321         import std.algorithm : min;
322 
323         const slen = walkLength(s.save);
324         const tlen = walkLength(t.save);
325 
326         this.rows = slen + 1;
327         this.cols = tlen + 1;
328         this.matrixData = new Cell[this.rows * this.cols];
329         initMatrix;
330 
331         foreach (i; 1 .. this.rows)
332         {
333             const sFront = s.front;
334             auto tCurrent = t.save;
335 
336             foreach (j; 1 .. this.cols)
337             {
338                 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement
339                     + pathCost(EditOp.insert(1), i, j);
340                 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement
341                     + pathCost(EditOp.remove(1), i, j);
342                 auto costNone = (sFront != tCurrent.front)
343                     ? float.infinity
344                     : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j));
345 
346                 tCurrent.popFront();
347 
348                 Cell cell;
349 
350                 if (costNone <= costDeletion)
351                 {
352                     if (costNone <= costInsertion)
353                     {
354                         cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op));
355                     }
356                     else
357                     {
358                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
359                     }
360                 }
361                 else
362                 {
363                     if (costDeletion <= costInsertion)
364                     {
365                         cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op));
366                     }
367                     else
368                     {
369                         cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op));
370                     }
371                 }
372 
373                 matrix(i, j) = cell;
374             }
375             s.popFront();
376         }
377     }
378 
379     private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc
380     {
381         import std.algorithm : startsWith;
382 
383         enum Path
384         {
385             insert,
386             remove,
387         }
388 
389         struct PathRange(Path path)
390         {
391             Levenshtein* self;
392 
393             size_t i;
394             size_t j;
395 
396             EditOp op;
397 
398             @property EditOp.Type front() const
399             in
400             {
401                 final switch (path)
402                 {
403                     case Path.insert:
404                         assert(op.type != EditOp.Type.remove);
405                         break;
406                     case Path.remove:
407                         assert(op.type != EditOp.Type.insert);
408                         break;
409                 }
410             }
411             do
412             {
413 
414                 return this.op.type;
415             }
416 
417             bool empty()
418             {
419                 if (this.op.type == EditOp.Type.keep && this.op.count == 0)
420                 {
421                     assert(this.i == 0 && this.j == 0);
422 
423                     return true;
424                 }
425 
426                 assert(this.op.count > 0);
427 
428                 return false;
429             }
430 
431             // advance until front is again on the path
432             void advance()
433             {
434                 while (true)
435                 {
436                     final switch (path)
437                     {
438                         case Path.insert:
439                             if (this.op.type != EditOp.Type.remove)
440                             {
441                                 return;
442                             }
443                             break;
444                         case Path.remove:
445                             if (this.op.type != EditOp.Type.insert)
446                             {
447                                 return;
448                             }
449                             break;
450                     }
451 
452                     this.self.skipByOp(this.op, this.i, this.j);
453                     this.op = this.self.matrix(this.i, this.j).op;
454                 }
455             }
456 
457             void popFront()
458             {
459                 this.self.moveByOp(this.op, this.i, this.j);
460                 this.op = this.self.matrix(this.i, this.j).op;
461 
462                 advance;
463             }
464         }
465 
466         static assert(isInputRange!(PathRange!(Path.insert)));
467         static assert(isInputRange!(PathRange!(Path.remove)));
468 
469         auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp);
470         auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp);
471 
472         // prepare for use
473         traceInsert.advance;
474         traceRemove.advance;
475 
476         // significantly penalize orphaned "no change" lines
477         alias orphan = op => only(op, EditOp.Type.keep, op);
478         const orphanPathCost =
479             traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 +
480             traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0;
481 
482         // slightly penalize mode changes, to avoid pathologies arising from distant matches
483         const pathChangesModeCost =
484             traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 +
485             traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0;
486 
487         return orphanPathCost + pathChangesModeCost;
488     }
489 
490     public EditOp.Type[] reconstructPath()
491     {
492         import std.algorithm.mutation : reverse;
493 
494         EditOp.Type[] result;
495         size_t i = this.rows - 1;
496         size_t j = this.cols - 1;
497 
498         while (i > 0 || j > 0)
499         {
500             const op = matrix(i, j).op;
501 
502             assert(op.count > 0);
503 
504             result ~= op.type.repeat(op.count).array;
505 
506             skipByOp(op, i, j);
507         }
508         reverse(result);
509         return result;
510     }
511 
512     private void moveByOp(EditOp op, ref size_t i, ref size_t j)
513     in
514     {
515         assert(i > 0 || j > 0);
516         assert(op.count > 0);
517     }
518     do
519     {
520         final switch (op.type)
521         {
522             case EditOp.Type.keep:
523                 i--;
524                 j--;
525                 break;
526             case EditOp.Type.insert:
527                 j--;
528                 break;
529             case EditOp.Type.remove:
530                 i--;
531                 break;
532         }
533     }
534 
535     private void skipByOp(EditOp op, ref size_t i, ref size_t j)
536     {
537         final switch (op.type)
538         {
539             case EditOp.Type.keep:
540                 assert(i >= op.count && j >= op.count);
541 
542                 i -= op.count;
543                 j -= op.count;
544                 break;
545             case EditOp.Type.insert:
546                 assert(j >= op.count);
547 
548                 j -= op.count;
549                 break;
550             case EditOp.Type.remove:
551                 assert(i >= op.count);
552 
553                 i -= op.count;
554                 break;
555         }
556     }
557 
558     struct EditOp
559     {
560         enum Type
561         {
562             insert,
563             remove,
564             keep,
565         }
566 
567         template constructByType(Type type)
568         {
569             static EditOp constructByType(size_t count)
570             {
571                 return EditOp(type, count);
572             }
573 
574             static EditOp constructByType(EditOp previousOp)
575             {
576                 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1);
577             }
578         }
579 
580         alias insert = constructByType!(Type.insert);
581         alias remove = constructByType!(Type.remove);
582         alias keep = constructByType!(Type.keep);
583 
584         Type type;
585 
586         size_t count; // number of times this op is repeated on the best path
587     }
588 
589     private enum deletionIncrement = 1;
590     private enum insertionIncrement = 1;
591     private enum orphanCost = 2.5;
592     private enum modeChangeCost = 0.05;
593 
594     private alias Cell = Tuple!(float, "cost", EditOp, "op");
595     private Cell[] matrixData = null;
596     private size_t rows = 0;
597     private size_t cols = 0;
598 
599     invariant
600     {
601         assert(matrixData.length == rows * cols);
602     }
603 
604     // Treat _matrix as a rectangular array
605     private ref Cell matrix(size_t row, size_t col)
606     in
607     {
608         assert(row >= 0 && row < this.rows);
609         assert(col >= 0 && col < this.cols);
610     }
611     do
612     {
613         return this.matrixData[row * this.cols + col];
614     }
615 
616     private void initMatrix()
617     {
618         this.matrixData[] = Cell(0, EditOp.keep(0));
619 
620         foreach (r; 1 .. rows)
621         {
622             this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r));
623         }
624 
625         foreach (c; 1 .. cols)
626         {
627             this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c));
628         }
629     }
630 }