1 module dshould.stringcmp; 2 3 import std.algorithm : map; 4 import std.format : format; 5 import std.range; 6 import std.typecons; 7 import dshould.ShouldType; 8 import dshould.basic; 9 10 /** 11 * Given string types, this version of the word `.equal` implements color-coded diffs. 12 * <b>There is no need to call this directly</b> - simply `import dshould;` the right function will be called automatically. 13 */ 14 public void equal(Should, T)(Should should, T expected, Fence _ = Fence(), string file = __FILE__, size_t line = __LINE__) 15 if (isInstanceOf!(ShouldType, Should) && is(T == string)) 16 { 17 auto got = should.got(); 18 19 should.allowOnlyWords!().before!"equal (string)"; 20 should.terminateChain; 21 22 if (got != expected) 23 { 24 stringCmpError(got, expected, Yes.quote, file, line); 25 } 26 } 27 28 package void stringCmpError(string got, string expected, Flag!"quote" quote, string file, size_t line) pure @safe 29 { 30 import std.algorithm : canFind; 31 32 string original; 33 string diff; 34 const bool multiline = got.canFind("\n"); 35 36 if (multiline) 37 { 38 auto diffPair = multiLineDiff(expected.split("\n"), got.split("\n")); 39 40 original = diffPair.original.join("\n"); 41 diff = diffPair.target.join("\n"); 42 } 43 else 44 { 45 auto diffPair = oneLineDiff(expected, got); 46 47 original = diffPair.original; 48 diff = diffPair.target; 49 } 50 51 const string fmt = (multiline ? "\n" : "") ~ (quote ? "'%s'" : "%s"); 52 53 throw new FluentError( 54 format(fmt, original), 55 format(fmt, diff), 56 file, line 57 ); 58 } 59 60 public auto oneLineDiff(string expected, string text) pure @safe 61 { 62 alias removePred = text => red(text); 63 alias addPred = text => green(text); 64 alias keepPred = text => text; 65 alias ditchPred = lines => string.init; 66 67 auto originalColored = colorizedDiff!(string, removePred, ditchPred, keepPred)(expected, text); 68 auto targetColored = colorizedDiff!(string, ditchPred, addPred, keepPred)(expected, text); 69 70 alias DiffPair = Tuple!(string, "original", string, "target"); 71 72 if (originalColored.isNull) 73 { 74 return DiffPair(expected, text); 75 } 76 else 77 { 78 return DiffPair(originalColored.get, targetColored.get); 79 } 80 } 81 82 @("collates successive replacements") 83 @safe unittest 84 { 85 const expectedOriginal = "Hello W" ~ red("or") ~ "ld"; 86 const expectedTarget = "Hello W" ~ green("ey") ~ "ld"; 87 const diff = "Hello World".oneLineDiff("Hello Weyld"); 88 89 (diff.original ~ diff.target).should.equal(expectedOriginal ~ expectedTarget); 90 } 91 92 @("does not colorize diff view for strings that are too different") 93 @safe unittest 94 { 95 const diff = "Hello World".oneLineDiff("Goodbye Universe"); 96 97 (diff.original ~ diff.target).should.equal("Hello WorldGoodbye Universe"); 98 } 99 100 @("tries to not change diff mode too often") 101 @safe unittest 102 { 103 const cleanDiff = `method="` ~ green(`multiply`) ~ `"`; 104 105 `method="update"`.oneLineDiff(`method="multiply"`).target.should.equal(cleanDiff); 106 } 107 108 @safe unittest 109 { 110 const originalText = "test"; 111 const targetText = "test, but"; 112 113 with (originalText.oneLineDiff(targetText)) 114 { 115 (original ~ target).should.equal(originalText ~ originalText ~ green(", but")); 116 } 117 } 118 119 public auto multiLineDiff(string[] expected, string[] text) @safe 120 { 121 alias removePred = lines => lines.map!(line => red("-" ~ line)); 122 alias addPred = lines => lines.map!(line => green("+" ~ line)); 123 alias keepPred = lines => lines.map!(line => " " ~ line); 124 alias ditchPred = lines => (string[]).init; 125 126 auto originalColored = colorizedDiff!(string[], removePred, ditchPred, keepPred)(expected, text); 127 auto targetColored = colorizedDiff!(string[], ditchPred, addPred, keepPred)(expected, text); 128 129 alias DiffPair = Tuple!(string[], "original", string[], "target"); 130 131 if (originalColored.isNull) 132 { 133 return DiffPair(expected, text); 134 } 135 else 136 { 137 return DiffPair(originalColored.get, targetColored.get); 138 } 139 } 140 141 @("supports multiline diff") 142 @safe unittest 143 { 144 import std.string : join, split; 145 146 // given 147 const originalText = `{ 148 "int": 1, 149 "array": ["XX"], 150 "timecodeBegin": "2003-02-01T12:00:00Z", 151 "timecodeEnd": "2003-02-01T14:00:00Z", 152 "enum": "ENUM", 153 "lastEntry": "goodbye" 154 }`; 155 const modifiedText = `{ 156 "int": 1, 157 "array": ["XX"], 158 "enum": "ENUM", 159 "somethingElse": "goes here", 160 "lastEntry": "goodbye" 161 }`; 162 163 // then 164 const patchTextOriginal = ` { 165 "int": 1, 166 "array": ["XX"], 167 ` ~ red(`- "timecodeBegin": "2003-02-01T12:00:00Z",`) ~ ` 168 ` ~ red(`- "timecodeEnd": "2003-02-01T14:00:00Z",`) ~ ` 169 "enum": "ENUM", 170 "lastEntry": "goodbye" 171 }`; 172 173 const patchTextTarget = ` { 174 "int": 1, 175 "array": ["XX"], 176 "enum": "ENUM", 177 ` ~ green(`+ "somethingElse": "goes here",`) ~ ` 178 "lastEntry": "goodbye" 179 }`; 180 181 const diff = originalText.split("\n").multiLineDiff(modifiedText.split("\n")); 182 183 (diff.original.join("\n") ~ diff.target.join("\n")).should.equal(patchTextOriginal ~ patchTextTarget); 184 } 185 186 @("supports comparison of large strings") 187 @safe unittest 188 { 189 import std.string : join, split; 190 191 // given 192 const repetitions = 500/"Hello World".length; 193 const originalText = `Hello World`.repeat(repetitions).join ~ `AAA` ~ `Hello World`.repeat(repetitions).join; 194 const modifiedText = `Hello World`.repeat(repetitions).join ~ `BBB` ~ `Hello World`.repeat(repetitions).join; 195 196 originalText.oneLineDiff(modifiedText); // should terminate in an acceptable timespan 197 } 198 199 // TODO bracket crossing cost 200 public Nullable!T colorizedDiff(T, alias removePred, alias addPred, alias keepPred)( 201 T expected, T text, Flag!"forceDiff" forceDiff = No.forceDiff) pure @trusted 202 { 203 import std.algorithm : count; 204 import std.array : Appender, empty; 205 import std.range : dropOne, front; 206 207 Appender!T diff; 208 diff.reserve(text.length); 209 210 // preds are called with continuous runs 211 Appender!(ElementType!T[]) addBuffer; 212 Appender!(ElementType!T[]) removeBuffer; 213 214 auto levenshtein = Levenshtein!T(expected, text); 215 const path = levenshtein.reconstructPath; 216 217 if (path.count!(a => a != levenshtein.EditOp.Type.keep) > text.length && !forceDiff) 218 { 219 return Nullable!T.init; // no diff view, too different 220 } 221 222 void flushAdd() @safe 223 { 224 if (!addBuffer.data.empty) 225 { 226 diff ~= addPred(addBuffer.data); 227 addBuffer.clear; 228 } 229 } 230 231 void flushRemove() @safe 232 { 233 if (!removeBuffer.data.empty) 234 { 235 diff ~= removePred(removeBuffer.data); 236 removeBuffer.clear; 237 } 238 } 239 240 void add(ElementType!T element) @safe 241 { 242 flushRemove; 243 addBuffer ~= element; 244 } 245 246 void remove(ElementType!T element) @safe 247 { 248 flushAdd; 249 removeBuffer ~= element; 250 } 251 252 void flush() @safe 253 { 254 flushRemove; 255 flushAdd; 256 } 257 258 void same(ElementType!T element) @safe 259 { 260 flush; 261 diff ~= keepPred([element]); 262 } 263 264 foreach (editOp; path) 265 { 266 final switch (editOp) 267 { 268 case levenshtein.EditOp.Type.keep: 269 same(text.front); 270 text = text.dropOne; 271 expected = expected.dropOne; 272 break; 273 case levenshtein.EditOp.Type.insert: 274 add(text.front); 275 text = text.dropOne; 276 break; 277 case levenshtein.EditOp.Type.remove: 278 remove(expected.front); 279 expected = expected.dropOne; 280 break; 281 } 282 } 283 284 assert(text.empty, format!`leftover %s`(text)); 285 assert(expected.empty); 286 287 flush; 288 289 return diff.data.nullable; 290 } 291 292 package auto red(T)(T text) 293 { 294 return RED_CODE ~ text ~ CLEAR_CODE; 295 } 296 297 package auto green(T)(T text) 298 { 299 return GREEN_CODE ~ text ~ CLEAR_CODE; 300 } 301 302 private enum RED_CODE = "\x1b[31m"; 303 private enum GREEN_CODE = "\x1b[32m"; 304 private enum CLEAR_CODE = "\x1b[39m"; 305 306 /** 307 * Modified Levenshtein distance from from std.algorithm 308 * Given two ranges, returns a sequence of insertions and deletions 309 * that turn the first range into the second. 310 * This is equivalent to diff computation, though it's 311 * comparatively inefficient at O(n^2) memory and runtime. 312 * 313 * This version adds customizable path cost, used to implement orphan avoidance. 314 */ 315 private struct Levenshtein(Range) 316 { 317 @disable this(); 318 319 public this(Range s, Range t) @safe 320 { 321 import std.algorithm : min; 322 323 const slen = walkLength(s.save); 324 const tlen = walkLength(t.save); 325 326 this.rows = slen + 1; 327 this.cols = tlen + 1; 328 this.matrixData = new Cell[this.rows * this.cols]; 329 initMatrix; 330 331 foreach (i; 1 .. this.rows) 332 { 333 const sFront = s.front; 334 auto tCurrent = t.save; 335 336 foreach (j; 1 .. this.cols) 337 { 338 auto costInsertion = this.matrix(i, j - 1).cost + insertionIncrement 339 + pathCost(EditOp.insert(1), i, j); 340 auto costDeletion = this.matrix(i - 1, j).cost + deletionIncrement 341 + pathCost(EditOp.remove(1), i, j); 342 auto costNone = (sFront != tCurrent.front) 343 ? float.infinity 344 : (this.matrix(i - 1, j - 1).cost + pathCost(EditOp.keep(1), i, j)); 345 346 tCurrent.popFront(); 347 348 Cell cell; 349 350 if (costNone <= costDeletion) 351 { 352 if (costNone <= costInsertion) 353 { 354 cell = Cell(costNone, EditOp.keep(matrix(i - 1, j - 1).op)); 355 } 356 else 357 { 358 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 359 } 360 } 361 else 362 { 363 if (costDeletion <= costInsertion) 364 { 365 cell = Cell(costDeletion, EditOp.remove(matrix(i - 1, j).op)); 366 } 367 else 368 { 369 cell = Cell(costInsertion, EditOp.insert(matrix(i, j - 1).op)); 370 } 371 } 372 373 matrix(i, j) = cell; 374 } 375 s.popFront(); 376 } 377 } 378 379 private float pathCost(EditOp proposedOp, size_t i, size_t j) @nogc 380 { 381 import std.algorithm : startsWith; 382 383 enum Path 384 { 385 insert, 386 remove, 387 } 388 389 struct PathRange(Path path) 390 { 391 Levenshtein* self; 392 393 size_t i; 394 size_t j; 395 396 EditOp op; 397 398 @property EditOp.Type front() const 399 in 400 { 401 final switch (path) 402 { 403 case Path.insert: 404 assert(op.type != EditOp.Type.remove); 405 break; 406 case Path.remove: 407 assert(op.type != EditOp.Type.insert); 408 break; 409 } 410 } 411 do 412 { 413 414 return this.op.type; 415 } 416 417 bool empty() 418 { 419 if (this.op.type == EditOp.Type.keep && this.op.count == 0) 420 { 421 assert(this.i == 0 && this.j == 0); 422 423 return true; 424 } 425 426 assert(this.op.count > 0); 427 428 return false; 429 } 430 431 // advance until front is again on the path 432 void advance() 433 { 434 while (true) 435 { 436 final switch (path) 437 { 438 case Path.insert: 439 if (this.op.type != EditOp.Type.remove) 440 { 441 return; 442 } 443 break; 444 case Path.remove: 445 if (this.op.type != EditOp.Type.insert) 446 { 447 return; 448 } 449 break; 450 } 451 452 this.self.skipByOp(this.op, this.i, this.j); 453 this.op = this.self.matrix(this.i, this.j).op; 454 } 455 } 456 457 void popFront() 458 { 459 this.self.moveByOp(this.op, this.i, this.j); 460 this.op = this.self.matrix(this.i, this.j).op; 461 462 advance; 463 } 464 } 465 466 static assert(isInputRange!(PathRange!(Path.insert))); 467 static assert(isInputRange!(PathRange!(Path.remove))); 468 469 auto traceInsert = PathRange!(Path.insert)(&this, i, j, proposedOp); 470 auto traceRemove = PathRange!(Path.remove)(&this, i, j, proposedOp); 471 472 // prepare for use 473 traceInsert.advance; 474 traceRemove.advance; 475 476 // significantly penalize orphaned "no change" lines 477 alias orphan = op => only(op, EditOp.Type.keep, op); 478 const orphanPathCost = 479 traceInsert.startsWith(orphan(EditOp.Type.insert)) ? orphanCost : 0 + 480 traceRemove.startsWith(orphan(EditOp.Type.remove)) ? orphanCost : 0; 481 482 // slightly penalize mode changes, to avoid pathologies arising from distant matches 483 const pathChangesModeCost = 484 traceInsert.startsWith(only(EditOp.Type.keep, EditOp.Type.insert)) ? modeChangeCost : 0 + 485 traceRemove.startsWith(only(EditOp.Type.keep, EditOp.Type.remove)) ? modeChangeCost : 0; 486 487 return orphanPathCost + pathChangesModeCost; 488 } 489 490 public EditOp.Type[] reconstructPath() 491 { 492 import std.algorithm.mutation : reverse; 493 494 EditOp.Type[] result; 495 size_t i = this.rows - 1; 496 size_t j = this.cols - 1; 497 498 while (i > 0 || j > 0) 499 { 500 const op = matrix(i, j).op; 501 502 assert(op.count > 0); 503 504 result ~= op.type.repeat(op.count).array; 505 506 skipByOp(op, i, j); 507 } 508 reverse(result); 509 return result; 510 } 511 512 private void moveByOp(EditOp op, ref size_t i, ref size_t j) 513 in 514 { 515 assert(i > 0 || j > 0); 516 assert(op.count > 0); 517 } 518 do 519 { 520 final switch (op.type) 521 { 522 case EditOp.Type.keep: 523 i--; 524 j--; 525 break; 526 case EditOp.Type.insert: 527 j--; 528 break; 529 case EditOp.Type.remove: 530 i--; 531 break; 532 } 533 } 534 535 private void skipByOp(EditOp op, ref size_t i, ref size_t j) 536 { 537 final switch (op.type) 538 { 539 case EditOp.Type.keep: 540 assert(i >= op.count && j >= op.count); 541 542 i -= op.count; 543 j -= op.count; 544 break; 545 case EditOp.Type.insert: 546 assert(j >= op.count); 547 548 j -= op.count; 549 break; 550 case EditOp.Type.remove: 551 assert(i >= op.count); 552 553 i -= op.count; 554 break; 555 } 556 } 557 558 struct EditOp 559 { 560 enum Type 561 { 562 insert, 563 remove, 564 keep, 565 } 566 567 template constructByType(Type type) 568 { 569 static EditOp constructByType(size_t count) 570 { 571 return EditOp(type, count); 572 } 573 574 static EditOp constructByType(EditOp previousOp) 575 { 576 return EditOp(type, (type == previousOp.type) ? (previousOp.count + 1) : 1); 577 } 578 } 579 580 alias insert = constructByType!(Type.insert); 581 alias remove = constructByType!(Type.remove); 582 alias keep = constructByType!(Type.keep); 583 584 Type type; 585 586 size_t count; // number of times this op is repeated on the best path 587 } 588 589 private enum deletionIncrement = 1; 590 private enum insertionIncrement = 1; 591 private enum orphanCost = 2.5; 592 private enum modeChangeCost = 0.05; 593 594 private alias Cell = Tuple!(float, "cost", EditOp, "op"); 595 private Cell[] matrixData = null; 596 private size_t rows = 0; 597 private size_t cols = 0; 598 599 invariant 600 { 601 assert(matrixData.length == rows * cols); 602 } 603 604 // Treat _matrix as a rectangular array 605 private ref Cell matrix(size_t row, size_t col) 606 in 607 { 608 assert(row >= 0 && row < this.rows); 609 assert(col >= 0 && col < this.cols); 610 } 611 do 612 { 613 return this.matrixData[row * this.cols + col]; 614 } 615 616 private void initMatrix() 617 { 618 this.matrixData[] = Cell(0, EditOp.keep(0)); 619 620 foreach (r; 1 .. rows) 621 { 622 this.matrix(r, 0) = Cell(r * deletionIncrement, EditOp.remove(r)); 623 } 624 625 foreach (c; 1 .. cols) 626 { 627 this.matrix(0, c) = Cell(c * insertionIncrement, EditOp.insert(c)); 628 } 629 } 630 }