1 // SemiTwist Library 2 // Written in the D programming language. 3 4 module semitwist.util.text; 5 6 import std.algorithm; 7 import std.array; 8 import std.compiler; 9 import std.conv; 10 import std.digest.md; 11 import std.stdio; 12 import std.traits; 13 import std.stream; 14 import std..string; 15 import std.system; 16 import std.uni; 17 import std.utf; 18 19 public import std.stream: BOM; 20 21 import semitwist.util.all; 22 23 private alias semitwist.util.ctfe.ctfe_strip ctfe_strip; 24 25 /** 26 Notes: 27 Anything in "data" must be doubly escaped. 28 29 For instance, if you want the generated function to return newline (ie, "\n"), 30 then "data" must be ['\\', 'n'], and thus the mixin call would look like this: 31 32 ---- 33 mixin(multiTypeString("unixNewline", "\\n")); 34 // Or 35 mixin(multiTypeString("unixNewline", r"\n")); 36 ---- 37 38 Or, if you want the generated function to return the escape sequence 39 for newline (ie, r"\n", or "\\n", or ['\\', 'n']), then "data" must 40 be ['\\', '\\', 'n'], and thus the mixin call would look like this: 41 42 ---- 43 mixin(multiTypeString("unixNewlineEscSequence", "\\\\n")); 44 // Or 45 mixin(multiTypeString("unixNewlineEscSequence", r"\\n")); 46 ---- 47 48 (This requirement could be changed if there is a way to automatically 49 escape a string at compile-time.) 50 */ 51 template multiTypeString(string name, string data, string access="public") 52 { 53 enum multiTypeString = 54 access~" T[] "~name~"(T)()"~ 55 "{"~ 56 " static if(is(T == char)) { return \""~data~"\"c; }"~ 57 " else static if(is(T == wchar)) { return \""~data~"\"w; }"~ 58 " else static if(is(T == dchar)) { return \""~data~"\"d; }"~ 59 " else static assert(\"T must be char, wchar, or dchar\");"~ 60 "}"; 61 } 62 63 /// Warning: This is missing some unicode whitespace chars 64 mixin(multiTypeString!("whitespaceChars", r" \n\r\t\v\f")); 65 66 /// Unix EOL: "\n" 67 void toUnixEOL(T)(ref T[] str) 68 { 69 str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Linux)); // Win -> Unix 70 str = replace(str, to!(T[])(nlStr_Mac9), to!(T[])(nlStr_Linux)); // Mac9 -> Unix 71 } 72 73 /// Mac9 EOL: "\r" 74 void toMac9EOL(T)(ref T[] str) 75 { 76 str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Mac9)); // Win -> Mac9 77 str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Mac9)); // Unix -> Mac9 78 } 79 80 /// Win EOL: "\r\n" 81 void toWinEOL(T)(ref T[] str) 82 { 83 toUnixEOL(str); // All -> Unix 84 str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Windows)); // Unix -> Win 85 } 86 87 T[] toNativeEOL(T)(T[] str) 88 { 89 version(Windows) toWinEOL(str); 90 version(OSX) toUnixEOL(str); 91 version(linux) toUnixEOL(str); 92 return str; 93 } 94 95 T[] toNativeEOLFromUnix(T)(T[] str) 96 { 97 version(Windows) return str.toNativeEOL(); 98 else return str; 99 } 100 101 T[] toNativeEOLFromWin(T)(T[] str) 102 { 103 version(OSX) return str.toNativeEOL(); 104 else version(linux) return str.toNativeEOL(); 105 else return str; 106 } 107 108 T[] toNativeEOLFromMac9(T)(T[] str) 109 { 110 return str.toNativeEOL(); 111 } 112 113 enum EscapeSequence 114 { 115 DDQS, // D Double Quote String, ex: `"foo\t"` <--> `foo ` 116 HTML, // ex: `&` <--> `&` 117 118 //TODO: Implement these 119 //URI, // ex: `%20` <--> ` ` 120 //SQL, //TODO: Include different types of SQL escaping (SQL's about as standardized as BASIC) 121 } 122 123 /++ 124 Note: 125 For the escape and unescape functions, chaining one with the other 126 (ex: "unescape(escape(str))") will result in a string that is 127 semantically equivalent to the original, but it is *not* necessarily 128 guaranteed to be exactly identical to the original string. 129 130 For example: 131 string str; 132 str = `"\x41\t"`; // 0x41 is ASCII and UTF-8 for A 133 str = unescapeDDQS(str); // == `A ` (That's an actual tab character) 134 str = escapeDDQS(str); // == `"A\t"c` 135 136 Note that "\x41\t" and "A\t"c are equivalent, but not identical. 137 +/ 138 T escape(T)(T str, EscapeSequence type) if(isSomeString!T) 139 { 140 //mixin(ensureCharType!("T")); 141 142 T ret; 143 144 switch(type) 145 { 146 case EscapeSequence.DDQS: 147 ret = escapeDDQS(str); 148 break; 149 150 case EscapeSequence.HTML: 151 ret = escapeHTML(str); 152 break; 153 154 default: 155 throw new Exception("Unsupported EscapeSequence"); 156 } 157 158 return ret; 159 } 160 161 T unescape(T)(T str, EscapeSequence type) if(isSomeString!T) 162 { 163 //mixin(ensureCharType!("T")); 164 165 T ret; 166 167 switch(type) 168 { 169 case EscapeSequence.DDQS: 170 ret = unescapeDDQS(str); 171 break; 172 173 case EscapeSequence.HTML: 174 ret = unescapeHTML(str); 175 break; 176 177 default: 178 throw new Exception("Unsupported EscapeSequence"); 179 } 180 181 return ret; 182 } 183 184 T unescapeChar(T)(T str, T escapeSequence) if(isSomeString!T) 185 { 186 //mixin(ensureCharType!("T")); 187 188 T ret = str.dup; 189 ret = substitute(ret, escapeSequence, escapeSequence[$-1..$]); 190 return ret; 191 } 192 193 /// Warning: This doesn't unescape all escape sequences yet. 194 T unescapeDDQS(T)(T str) if(isSomeString!T) 195 { 196 //mixin(ensureCharType!("T")); 197 enum errStr = "str doesn't contain a valid D Double Quote String"; 198 199 if(str.length < 2) 200 throw new Exception(errStr); 201 202 T ret = str;//.dup; 203 204 //TODO: Do this better 205 ret = ctfe_substitute!(T)(ret, `\\`, `\`); 206 ret = ctfe_substitute!(T)(ret, `\"`, `"`); 207 ret = ctfe_substitute!(T)(ret, `\'`, `'`); 208 209 ret = ctfe_substitute!(T)(ret, `\r`, "\r"); 210 ret = ctfe_substitute!(T)(ret, `\n`, "\n"); 211 ret = ctfe_substitute!(T)(ret, `\t`, "\t"); 212 213 ret = ctfe_substitute!(T)(ret, `\?`, "\?"); 214 ret = ctfe_substitute!(T)(ret, `\a`, "\a"); 215 ret = ctfe_substitute!(T)(ret, `\b`, "\b"); 216 ret = ctfe_substitute!(T)(ret, `\f`, "\f"); 217 ret = ctfe_substitute!(T)(ret, `\v`, "\v"); 218 //TODO: All the others 219 220 if(ret[0..1] != `"`) 221 throw new Exception(errStr); 222 223 auto last = ret[$-1..$]; 224 auto secondLast = ret[$-2..$-1]; 225 226 if(last != `"`) 227 { 228 if(secondLast != `"`) 229 throw new Exception(errStr); 230 else if(secondLast != "c" && secondLast != "w" && secondLast != "d") 231 throw new Exception(errStr); 232 else 233 return ret[1..$-2]; 234 } 235 236 return ret[1..$-1]; 237 } 238 239 T escapeDDQS(T)(T str) if(isSomeString!T) 240 { 241 T ret = str; 242 243 ret = ctfe_substitute!(T)(ret, `\`, `\\`); 244 ret = ctfe_substitute!(T)(ret, `"`, `\"`); 245 ret = ctfe_substitute!(T)(ret, "\r", `\r`); // To prevent accidential conversions to platform-specific EOL 246 ret = ctfe_substitute!(T)(ret, "\n", `\n`); // To prevent accidential conversions to platform-specific EOL 247 ret = ctfe_substitute!(T)(ret, "\t", `\t`); // To prevent possible problems with automatic tab->space conversion 248 // The rest don't need to be escaped 249 250 return `"`~ret~`"`; 251 } 252 253 /+ 254 enum doubleQuoteTestStr = `"They said \"10 \\ 5 = 2\""`; 255 256 pragma(msg, "orig: "~doubleQuoteTestStr); 257 pragma(msg, "unesc: "~unescapeDDQS(doubleQuoteTestStr)); 258 pragma(msg, "esc: "~escapeDDQS(doubleQuoteTestStr)); 259 pragma(msg, "esc(unesc): "~escapeDDQS(unescapeDDQS(doubleQuoteTestStr))); 260 pragma(msg, "unesc(esc): "~unescapeDDQS(escapeDDQS(doubleQuoteTestStr))); 261 262 pragma(msg, "unesc: "~unescape(doubleQuoteTestStr, EscapeSequence.DDQS)); 263 pragma(msg, "unesc: "~doubleQuoteTestStr.unescape(EscapeSequence.DDQS)); 264 265 mixin(unittestSemiTwistDLib("Outputting some things", q{ 266 enum wstring ctEscW = escapeDDQS(`"They said \"10 \\ 5 = 2\""`w); 267 enum dstring ctEscD = escapeDDQS(`"They said \"10 \\ 5 = 2\""`d); 268 enum wstring ctUnescW = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w); 269 enum dstring ctUnescD = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d); 270 writefln("%s%s", "ctEscW: ", ctEscW); 271 writefln("%s%s", "ctEscD: ", ctEscD); 272 writefln("%s%s", "ctUnescW: ", ctUnescW); 273 writefln("%s%s", "ctUnescD: ", ctUnescD); 274 275 writefln("%s%s", "unesc wchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w)); 276 writefln("%s%s", "unesc dchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d)); 277 writefln("%s%s", "esc wchar: ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`w)); 278 writefln("%s%s", "esc dchar: ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`d)); 279 // writefln("%s%s", "int: ", unescapeDDQS([cast(int)1,2,3])); 280 281 writefln("%s%s", "orig: ", doubleQuoteTestStr); 282 writefln("%s%s", "unesc: ", unescapeDDQS(doubleQuoteTestStr)); 283 writefln("%s%s", "esc: ", escapeDDQS(doubleQuoteTestStr)); 284 writefln("%s%s", "esc(unesc): ", escapeDDQS(unescapeDDQS(doubleQuoteTestStr))); 285 writefln("%s%s", "unesc(esc): ", unescapeDDQS(escapeDDQS(doubleQuoteTestStr))); 286 })); 287 +/ 288 289 /// Warning: This doesn't unescape all escape sequences yet. 290 T unescapeHTML(T)(T str) if(isSomeString!T) 291 { 292 auto ret = str; 293 294 ret = ctfe_substitute!(T)(ret, "<", "<"); 295 ret = ctfe_substitute!(T)(ret, ">", ">"); 296 ret = ctfe_substitute!(T)(ret, "&", "&"); 297 298 return ret; 299 } 300 301 T escapeHTML(T)(T str) if(isSomeString!T) 302 { 303 auto ret = str; 304 305 ret = ctfe_substitute!(T)(ret, "&", "&"); 306 ret = ctfe_substitute!(T)(ret, "<", "<"); 307 ret = ctfe_substitute!(T)(ret, ">", ">"); 308 309 return ret; 310 } 311 312 /// Like std.string.indexOf, but with an optional 'start' parameter, 313 /// and returns s.length when not found (instead of -1). 314 //TODO*: Unittest these 315 size_t locate(Char)(in Char[] s, dchar c, size_t start=0, CaseSensitive cs = CaseSensitive.yes) 316 { 317 auto index = std..string.indexOf(s[start..$], c, cs); 318 return (index == -1)? s.length : index + start; 319 } 320 321 /// ditto 322 size_t locatePrior(Char)(in Char[] s, dchar c, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes) 323 { 324 if(start > s.length) 325 start = s.length; 326 327 auto index = std..string.lastIndexOf(s[0..start], c, cs); 328 return (index == -1)? s.length : index; 329 } 330 331 /// ditto 332 size_t locate(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=0, CaseSensitive cs = CaseSensitive.yes) 333 { 334 auto index = std..string.indexOf(s[start..$], sub, cs); 335 return (index == -1)? s.length : index + start; 336 } 337 338 /// ditto 339 size_t locatePrior(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes) 340 { 341 if(start > s.length) 342 start = s.length; 343 344 auto index = std..string.lastIndexOf(s[0..start], sub, cs); 345 return (index == -1)? s.length : index; 346 } 347 348 /// Suggested usage: 349 /// "Hello %s!".formatln("World"); 350 string formatln(T...)(T args) 351 { 352 return format(args)~"\n"; 353 } 354 355 //TODO*: Fix stripNonPrintable 356 T stripNonPrintable(T)(T str) if(isSomeString!T) 357 { 358 //T ret = str.dup; 359 //auto numRemaining = ret.removeIf( (T c){return !isPrintable(c);} ); 360 //return ret[0..numRemaining]; 361 return str; 362 } 363 364 /// Return value is number of code units 365 size_t nextCodePointSize(T)(T str) if(is(T==string) || is(T==wstring)) 366 { 367 size_t ret; 368 str.decode(ret); 369 return ret; 370 } 371 372 /// Indents every line with indentStr 373 T indent(T)(T str, T indentStr="\t") if(isSomeString!T) 374 { 375 if(str == "") 376 return indentStr; 377 378 return 379 indentStr ~ 380 str[0..$-1].replace("\n", "\n"~indentStr) ~ 381 str[$-1]; 382 } 383 384 /// ditto 385 T[] indent(T)(T[] lines, T indentStr="\t") if(isSomeString!T) 386 { 387 // foreach(ref) doesn't work right at compile time: DMD Issue #3835 388 foreach(i, line; lines) 389 lines[i] = indentStr ~ line; 390 391 return lines; 392 } 393 394 /// Unindents the lines of text as much as possible while preserving 395 /// all relative indentation. 396 /// 397 /// Inconsistent indentation (on lines that contain non-whitespace) is an error 398 /// and throws an exception at runtime, or asserts when executed at compile-time. 399 T unindent(T)(T str) if(isSomeString!T) 400 { 401 if(str == "") 402 return ""; 403 404 T[] lines; 405 if(__ctfe) 406 lines = str.ctfe_split("\n"); 407 else 408 lines = str.split("\n"); 409 410 lines = unindentImpl(lines, str); 411 412 if(__ctfe) 413 return lines.ctfe_join("\n"); 414 else 415 return lines.join("\n"); 416 } 417 418 /// ditto 419 T[] unindent(T)(T[] lines) if(isSomeString!T) 420 { 421 return unindentImpl(lines); 422 } 423 424 private T[] unindentImpl(T)(T[] lines, T origStr=null) if(isSomeString!T) 425 { 426 if(lines == []) 427 return []; 428 429 bool isNonWhite(dchar ch) 430 { 431 if(__ctfe) 432 return !ctfe_iswhite(ch); 433 else 434 return !isWhite(ch); 435 } 436 T leadingWhiteOf(T str) 437 { return str[ 0 .. $-find!(isNonWhite)(str).length ]; } 438 439 // Apply leadingWhiteOf, but emit null instead for whitespace-only lines 440 T[] indents; 441 if(__ctfe) 442 indents = semitwist.util.functional.map( lines, 443 (T str){ return str.ctfe_strip()==""? null : leadingWhiteOf(str);} 444 ); 445 else 446 { 447 string mapPredicate(T str){ return str.strip()==""? null : leadingWhiteOf(str);} 448 indents = array( std.algorithm.map!( 449 mapPredicate//(T str){ return str.strip()==""? null : leadingWhiteOf(str);} 450 )(lines) ); 451 } 452 453 T shorterAndNonNull(T a, T b) { 454 if(a is null) return b; 455 if(b is null) return a; 456 457 return (a.length < b.length)? a : b; 458 } 459 auto shortestIndent = std.algorithm.reduce!(shorterAndNonNull)(indents); 460 461 if(shortestIndent is null || shortestIndent == "") 462 { 463 if(origStr == null) 464 return stripLinesLeft(lines); 465 else 466 return [origStr.stripLeft()]; 467 } 468 469 foreach(i; 0..lines.length) 470 { 471 if(indents[i] is null) 472 lines[i] = ""; 473 else if(indents.startsWith(shortestIndent)) 474 lines[i] = lines[i][shortestIndent.length..$]; 475 else 476 { 477 if(__ctfe) 478 assert(false, "Inconsistent indentation"); 479 else 480 throw new Exception("Inconsistent indentation"); 481 } 482 } 483 484 return lines; 485 } 486 487 T stripLinesTop(T)(T str) if(isSomeString!T) 488 { 489 return stripLinesBox_StrImpl!(T, true, false, false, false)(str); 490 } 491 T stripLinesBottom(T)(T str) if(isSomeString!T) 492 { 493 return stripLinesBox_StrImpl!(T, false, true, false, false)(str); 494 } 495 T stripLinesTopBottom(T)(T str) if(isSomeString!T) 496 { 497 return stripLinesBox_StrImpl!(T, true, true, false, false)(str); 498 } 499 500 T stripLinesLeft(T)(T str) if(isSomeString!T) 501 { 502 return stripLinesBox_StrImpl!(T, false, false, true, false)(str); 503 } 504 T stripLinesRight(T)(T str) if(isSomeString!T) 505 { 506 return stripLinesBox_StrImpl!(T, false, false, false, true)(str); 507 } 508 T stripLinesLeftRight(T)(T str) if(isSomeString!T) 509 { 510 return stripLinesBox_StrImpl!(T, false, false, true, true)(str); 511 } 512 513 T stripLinesBox(T)(T str) if(isSomeString!T) 514 { 515 return stripLinesBox_StrImpl!(T, true, true, true, true)(str); 516 } 517 518 private T stripLinesBox_StrImpl 519 (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight) 520 (T str) 521 if(isSomeString!T) 522 { 523 if(str == "") 524 return ""; 525 526 T[] lines; 527 if(__ctfe) 528 lines = str.ctfe_split("\n"); 529 else 530 lines = str.split("\n"); 531 532 lines = stripLinesBox_LineImpl!(T, stripTop, stripBottom, stripLeft, stripRight)(lines); 533 534 if(__ctfe) 535 return lines.ctfe_join("\n"); 536 else 537 return lines.join("\n"); 538 } 539 540 private T[] stripLinesBox_LineImpl 541 (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight) 542 (T[] lines) 543 if(isSomeString!T) 544 { 545 static if(stripTop) lines = stripLinesTop(lines); 546 static if(stripBottom) lines = stripLinesBottom(lines); 547 548 static if(stripLeft && stripRight) 549 { 550 lines = stripLinesLeftRight(lines); 551 } 552 else 553 { 554 static if(stripLeft) lines = stripLinesLeft(lines); 555 static if(stripRight) lines = stripLinesRight(lines); 556 } 557 558 return lines; 559 } 560 561 T[] stripLinesBox(T)(T[] str) if(isSomeString!T) 562 { 563 return stripLinesBox_LineImpl!(T, true, true, true, true)(str); 564 } 565 566 T[] stripLinesTop(T)(T[] lines) if(isSomeString!T) 567 { 568 auto firstLine = lines.length-1; 569 570 foreach(i, line; lines) 571 if(line.ctfe_strip() != "") 572 { 573 firstLine = i; 574 break; 575 } 576 577 return lines[firstLine..$]; 578 } 579 580 T[] stripLinesBottom(T)(T[] lines) if(isSomeString!T) 581 { 582 size_t lastLine = 0; 583 584 foreach_reverse(i, line; lines) 585 if(line.ctfe_strip() != "") 586 { 587 lastLine = i; 588 break; 589 } 590 591 return lines[0..lastLine+1]; 592 } 593 594 T[] stripLinesTopBottom(T)(T[] lines) if(isSomeString!T) 595 { 596 lines = stripLinesTop(lines); 597 lines = stripLinesBottom(lines); 598 return lines; 599 } 600 601 T[] stripLinesLeft(T)(T[] lines) if(isSomeString!T) 602 { 603 // foreach(ref) doesn't work right at compile time: DMD Issue #3835 604 foreach(i, line; lines) 605 lines[i] = line.ctfe_stripl(); 606 607 return lines; 608 } 609 610 T[] stripLinesRight(T)(T[] lines) if(isSomeString!T) 611 { 612 // foreach(ref) doesn't work right at compile time: DMD Issue #3835 613 foreach(i, line; lines) 614 lines[i] = line.ctfe_stripr(); 615 616 return lines; 617 } 618 619 T[] stripLinesLeftRight(T)(T[] lines) if(isSomeString!T) 620 { 621 // foreach(ref) doesn't work right at compile time: DMD Issue #3835 622 foreach(i, line; lines) 623 lines[i] = line.ctfe_strip(); 624 625 return lines; 626 } 627 628 //TODO*: Unittest this 629 bool contains(T1,T2)(T1 haystack, T2 needle) 630 { 631 return std.algorithm.find(haystack, needle) != []; 632 } 633 634 /++ 635 Unindents, strips whitespace-only lines from top and bottom, 636 and strips trailing whitespace from eash line. 637 (Also converts Windows "\r\n" line endings to Unix "\n" line endings.) 638 639 See also the documentation for unindent(). 640 641 Good for making easily-readable multi-line string literals without 642 leaving extra indents and whitespace in the resulting string: 643 644 Do this: 645 -------------------- 646 void foo() 647 { 648 enum codeStr = q{ 649 // Written in the D Programming Langauge 650 // by John Doe 651 652 int main() 653 { 654 return 0; 655 } 656 }.normalize(); 657 } 658 -------------------- 659 660 Instead of this: 661 -------------------- 662 void foo() 663 { 664 enum codeStr = 665 q{// Written in the D Programming Langauge 666 // by John Doe 667 668 int main() 669 { 670 return 0; 671 }}; 672 } 673 -------------------- 674 675 The resulting string is exactly the same. 676 +/ 677 T normalize(T)(T str) if(isSomeString!T) 678 { 679 if(str == "") 680 return ""; 681 682 T[] lines; 683 if(__ctfe) 684 lines = str.ctfe_split("\n"); 685 else 686 lines = str.split("\n"); 687 688 lines = normalize(lines); 689 690 if(__ctfe) 691 return lines.ctfe_join("\n"); 692 else 693 return lines.join("\n"); 694 } 695 696 /// ditto 697 T[] normalize(T)(T[] lines) if(isSomeString!T) 698 { 699 lines = stripLinesTopBottom(lines); 700 lines = unindent(lines); 701 lines = stripLinesRight(lines); 702 return lines; 703 } 704 705 alias md5 = std.digest.md.md5Of; 706 /+string md5(string data) 707 { 708 MD5_CTX context; 709 context.start(); 710 context.update(data); 711 ubyte digest[16]; 712 context.finish(digest); 713 714 return digestToString(digest); 715 }+/ 716 717 immutable(ubyte)[] bomCodeOf(BOM bom) 718 { 719 final switch(bom) 720 { 721 case BOM.UTF8: return cast(immutable(ubyte)[])x"EF BB BF"; 722 case BOM.UTF16LE: return cast(immutable(ubyte)[])x"FF FE"; 723 case BOM.UTF16BE: return cast(immutable(ubyte)[])x"FE FF"; 724 case BOM.UTF32LE: return cast(immutable(ubyte)[])x"FF FE 00 00"; 725 case BOM.UTF32BE: return cast(immutable(ubyte)[])x"00 00 FE FF"; 726 } 727 } 728 729 BOM bomOf(const(ubyte)[] str) 730 { 731 if(str.startsWith(bomCodeOf(BOM.UTF8 ))) return BOM.UTF8; 732 if(str.startsWith(bomCodeOf(BOM.UTF16LE))) return BOM.UTF16LE; 733 if(str.startsWith(bomCodeOf(BOM.UTF16BE))) return BOM.UTF16BE; 734 if(str.startsWith(bomCodeOf(BOM.UTF32LE))) return BOM.UTF32LE; 735 if(str.startsWith(bomCodeOf(BOM.UTF32BE))) return BOM.UTF32BE; 736 737 return BOM.UTF8; 738 } 739 740 version(LittleEndian) 741 { 742 enum BOM native16BitBOM = BOM.UTF16LE; 743 enum BOM native32BitBOM = BOM.UTF32LE; 744 enum BOM nonNative16BitBOM = BOM.UTF16BE; 745 enum BOM nonNative32BitBOM = BOM.UTF32BE; 746 } 747 else 748 { 749 enum BOM native16BitBOM = BOM.UTF16BE; 750 enum BOM native32BitBOM = BOM.UTF32BE; 751 enum BOM nonNative16BitBOM = BOM.UTF16LE; 752 enum BOM nonNative32BitBOM = BOM.UTF32LE; 753 } 754 755 bool isNativeEndian(BOM bom) 756 { 757 return bom == native16BitBOM || bom == native32BitBOM || bom == BOM.UTF8; 758 } 759 760 bool isNonNativeEndian(BOM bom) 761 { 762 return !isNativeEndian(bom); 763 } 764 765 bool is8Bit(BOM bom) 766 { 767 return bom == BOM.UTF8; 768 } 769 770 bool is16Bit(BOM bom) 771 { 772 return bom == native16BitBOM || bom == nonNative16BitBOM; 773 } 774 775 bool is32Bit(BOM bom) 776 { 777 return bom == native32BitBOM || bom == nonNative32BitBOM; 778 } 779 780 Endian endianOf(BOM bom) 781 { 782 // DMD 2.055 changed "LittleEndian" to "littleEndian", etc... 783 static if(__traits(compiles, Endian.littleEndian)) 784 { 785 final switch(bom) 786 { 787 case BOM.UTF8: return endian; 788 case BOM.UTF16LE, BOM.UTF32LE: return Endian.littleEndian; 789 case BOM.UTF16BE, BOM.UTF32BE: return Endian.bigEndian; 790 } 791 } 792 else 793 { 794 final switch(bom) 795 { 796 case BOM.UTF8: return endian; 797 case BOM.UTF16LE, BOM.UTF32LE: return Endian.LittleEndian; 798 case BOM.UTF16BE, BOM.UTF32BE: return Endian.BigEndian; 799 } 800 } 801 } 802 803 template isInsensitive(T) 804 { 805 enum isInsensitive = 806 is(T==InsensitiveT!string ) || 807 is(T==InsensitiveT!wstring) || 808 is(T==InsensitiveT!dstring); 809 } 810 static assert(isInsensitive!Insensitive); 811 static assert(isInsensitive!WInsensitive); 812 static assert(isInsensitive!DInsensitive); 813 static assert(!isInsensitive!string); 814 815 struct InsensitiveT(T) if(isSomeString!T) 816 { 817 private T str; 818 private T foldingCase; 819 820 this(T newStr) 821 { 822 str = newStr; 823 updateFoldingCase(); 824 } 825 826 T toString() const 827 { 828 return str; 829 } 830 831 private void updateFoldingCase() 832 { 833 // Phobos doesn't actually have a toFolding() right now 834 foldingCase = toLower(str); 835 } 836 837 static if(useNoThrowSafeToHash) 838 { 839 const nothrow @trusted hash_t toHash() const 840 { 841 return typeid(string).getHash(&foldingCase); 842 } 843 } 844 else 845 { 846 const hash_t toHash() 847 { 848 return typeid(string).getHash(&foldingCase); 849 } 850 } 851 852 void opAssign(T2)(T2 b) if(isInsensitive!T2 || isSomeString!T2) 853 { 854 static if(is(isInsensitive!T == T2)) 855 { 856 str = b.str; 857 foldingCase = newStr.foldingCase; 858 } 859 else static if(isInsensitive!T2) 860 { 861 str = to!T(b.str); 862 updateFoldingCase(); 863 } 864 else 865 { 866 str = b; 867 updateFoldingCase(); 868 } 869 } 870 871 InsensitiveT!T opBinary(string op)(const InsensitiveT!T b) if(op=="~") 872 { 873 return InsensitiveT!T(str ~ b.str); 874 } 875 876 InsensitiveT!T opOpAssign(string op)(const InsensitiveT!T b) if(op=="~") 877 { 878 str ~= b.str; 879 foldingCase ~= b.foldingCase; 880 return this; 881 } 882 883 //TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058 884 static if(vendor != Vendor.digitalMars || version_minor >= 59) 885 const bool opEquals(const InsensitiveT!T b) const 886 { 887 return opEquals(b); 888 } 889 890 const bool opEquals(ref const InsensitiveT!T b) const 891 { 892 /+if (str is b.str) return true; 893 if (str is null || b.str is null) return false; 894 return foldingCase == b.foldingCase;+/ 895 return this.opCmp(b) == 0; 896 } 897 898 //TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058 899 static if(vendor != Vendor.digitalMars || version_minor >= 59) 900 const int opCmp(const InsensitiveT!T b) const 901 { 902 return opCmp(b); 903 } 904 905 const int opCmp(ref const InsensitiveT!T b) const 906 { 907 if(str is null && b.str is null) return 0; 908 if(str == b.str) return 0; 909 if(str is null ) return -1; 910 if(b.str is null ) return 1; 911 return std..string.cmp(foldingCase, b.foldingCase); 912 } 913 914 InsensitiveT!T opSlice() 915 { 916 return this; 917 } 918 919 auto opSlice(size_t x) 920 { 921 return str[x]; 922 } 923 924 InsensitiveT!T opSlice(size_t x, size_t y) 925 { 926 return InsensitiveT!T(str[x..y]); 927 } 928 } 929 930 alias InsensitiveT!string Insensitive; 931 alias InsensitiveT!wstring WInsensitive; 932 alias InsensitiveT!dstring DInsensitive; 933 934 //TODO: This is quick-n-dirty, do it more efficiently. (Or just 935 // replace with std.digest.toHexString in DMD 2.061) 936 string toHexString(ubyte[] arr) 937 { 938 if(arr.length == 0) 939 return ""; 940 941 string str; 942 while(true) 943 { 944 str ~= format("%.2X", arr[0]); 945 946 if(arr.length <= 1) 947 break; 948 949 arr = arr[1..$]; 950 } 951 952 return str; 953 } 954 955 mixin(unittestSemiTwistDLib(q{ 956 957 // Insensitive 958 mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("Test") })); 959 mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("TEST") })); 960 mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive("ABCD") })); 961 mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive(null) })); 962 mixin(deferAssert!(q{ Insensitive(null) == Insensitive(null) })); 963 mixin(deferAssert!(q{ Insensitive("Test") == Insensitive("TEST") })); 964 mixin(deferAssert!(q{ Insensitive("ABCD") != Insensitive("TEST") })); 965 mixin(deferAssert!(q{ Insensitive(null) != Insensitive("TEST") })); 966 967 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("ES") })); 968 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("es") })); 969 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] != Insensitive("AB") })); 970 971 mixin(deferAssert!(q{ Insensitive("TE")~Insensitive("ST") == Insensitive("TesT") })); 972 973 Insensitive ins; 974 ins = Insensitive("TEST"); 975 ins = "ab"; 976 ins ~= Insensitive("cd"); 977 978 mixin(deferAssert!(q{ ins == Insensitive("AbcD") })); 979 980 int[Insensitive] ins_aa = [Insensitive("ABC"):1, Insensitive("DEF"):2, Insensitive("Xyz"):3]; 981 mixin(deferAssert!(q{ Insensitive("ABC") in ins_aa })); 982 mixin(deferAssert!(q{ Insensitive("DEF") in ins_aa })); 983 mixin(deferAssert!(q{ Insensitive("Xyz") in ins_aa })); 984 mixin(deferAssert!(q{ Insensitive("aBc") in ins_aa })); 985 mixin(deferAssert!(q{ Insensitive("dEf") in ins_aa })); 986 mixin(deferAssert!(q{ Insensitive("xYZ") in ins_aa })); 987 mixin(deferAssert!(q{ Insensitive("HI") !in ins_aa })); 988 989 mixin(deferAssert!(q{ ins_aa[Insensitive("aBc")] == 1 })); 990 mixin(deferAssert!(q{ ins_aa[Insensitive("dEf")] == 2 })); 991 mixin(deferAssert!(q{ ins_aa[Insensitive("xYZ")] == 3 })); 992 993 // escapeDDQS, unescapeDDQS 994 mixin(deferEnsure!(q{ `hello`.escapeDDQS() }, q{ _ == `"hello"` })); 995 mixin(deferEnsure!(q{ `"hello"`.unescapeDDQS() }, q{ _ == "hello" })); 996 mixin(deferEnsure!(q{ `"I"`.unescapeDDQS() }, q{ _ == "I" })); 997 998 mixin(deferEnsure!(q{ (`And...`~"\n"~`sam\nick said "Hi!".`).escapeDDQS() }, q{ _ == `"And...\nsam\\nick said \"Hi!\"."` })); 999 //TODO: Make this one pass 1000 //mixin(deferEnsure!(q{ `"And...\nsam\\nick said \"Hi!\"."`.unescapeDDQS() }, q{ _ == `And...`~"\n"~`sam\nick said "Hi!".` })); 1001 mixin(deferEnsureThrows!(q{ "hello".unescapeDDQS(); }, Exception)); 1002 1003 // indent 1004 mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent(" ") }, q{ _ == " A\n \tB\n \n C" })); 1005 mixin(deferEnsure!(q{ "A\nB\n".indent("\t") }, q{ _ == "\tA\n\tB\n" })); 1006 mixin(deferEnsure!(q{ "".indent("\t") }, q{ _ == "\t" })); 1007 mixin(deferEnsure!(q{ "A".indent("\t") }, q{ _ == "\tA" })); 1008 mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("") }, q{ _ == "A\n\tB\n\nC" })); 1009 1010 // unindent 1011 mixin(deferEnsure!(q{ " \t A\n \t \tB\n \t C\n \t\n \t D".unindent() }, q{ _ == "A\n\tB\nC\n\nD" })); 1012 mixin(deferEnsure!(q{ " D\n".unindent() }, q{ _ == "D\n" })); 1013 mixin(deferEnsure!(q{ " D\n ".unindent() }, q{ _ == "D\n" })); 1014 mixin(deferEnsure!(q{ "D".unindent() }, q{ _ == "D" })); 1015 mixin(deferEnsure!(q{ "".unindent() }, q{ _ == "" })); 1016 mixin(deferEnsure!(q{ " ".unindent() }, q{ _ == "" })); 1017 mixin(deferEnsureThrows!(q{ " \tA\n\t B".unindent(); }, Exception)); 1018 mixin(deferEnsureThrows!(q{ " a\n \tb".unindent(); }, Exception)); 1019 1020 // unindent at compile-time 1021 enum ctfe_unindent_dummy1 = " \t A\n \t \tB\n \t C\n \t\n \t D".unindent(); 1022 enum ctfe_unindent_dummy2 = " D".unindent(); 1023 enum ctfe_unindent_dummy3 = " D\n".unindent(); 1024 enum ctfe_unindent_dummy4 = "".unindent(); 1025 1026 mixin(deferEnsure!(q{ ctfe_unindent_dummy1 }, q{ _ == "A\n\tB\nC\n\nD" })); 1027 mixin(deferEnsure!(q{ ctfe_unindent_dummy2 }, q{ _ == "D" })); 1028 mixin(deferEnsure!(q{ ctfe_unindent_dummy3 }, q{ _ == "D\n" })); 1029 mixin(deferEnsure!(q{ ctfe_unindent_dummy4 }, q{ _ == "" })); 1030 1031 //enum ctfe_unindent_dummy5 = " a\n \tb".unindent(); // Should fail to compile 1032 1033 // contains 1034 mixin(deferEnsure!(q{ contains("abcde", 'a') }, q{ _==true })); 1035 mixin(deferEnsure!(q{ contains("abcde", 'c') }, q{ _==true })); 1036 mixin(deferEnsure!(q{ contains("abcde", 'e') }, q{ _==true })); 1037 mixin(deferEnsure!(q{ contains("abcde", 'x') }, q{ _==false })); 1038 1039 // stripLines: Top and Bottom 1040 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTop() }, q{ _ == " ABC \n \n DEF \n \t \n\t \n" })); 1041 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBottom() }, q{ _ == " \t \n\t \n ABC \n \n DEF " })); 1042 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTopBottom() }, q{ _ == " ABC \n \n DEF " })); 1043 1044 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTop() }, q{ _ == "ABC\n " })); 1045 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBottom() }, q{ _ == "\nABC" })); 1046 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTopBottom() }, q{ _ == "ABC" })); 1047 1048 mixin(deferEnsure!(q{ "\n".stripLinesTop() }, q{ _ == "" })); 1049 mixin(deferEnsure!(q{ "\n".stripLinesBottom() }, q{ _ == "" })); 1050 mixin(deferEnsure!(q{ "\n".stripLinesTopBottom() }, q{ _ == "" })); 1051 1052 mixin(deferEnsure!(q{ "ABC".stripLinesTopBottom() }, q{ _ == "ABC" })); 1053 mixin(deferEnsure!(q{ "".stripLinesTopBottom() }, q{ _ == "" })); 1054 1055 // stripLines: Left and Right 1056 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeft() }, q{ _ == "\n\nABC \n\nDEF \n\n\n" })); 1057 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesRight() }, q{ _ == "\n\n ABC\n\n DEF\n\n\n" })); 1058 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight() }, q{ _ == "\n\nABC\n\nDEF\n\n\n" })); 1059 1060 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeft() }, q{ _ == "\nABC\n" })); 1061 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesRight() }, q{ _ == "\nABC\n" })); 1062 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeftRight() }, q{ _ == "\nABC\n" })); 1063 1064 mixin(deferEnsure!(q{ "\n".stripLinesLeft() }, q{ _ == "\n" })); 1065 mixin(deferEnsure!(q{ "\n".stripLinesRight() }, q{ _ == "\n" })); 1066 mixin(deferEnsure!(q{ "\n".stripLinesLeftRight() }, q{ _ == "\n" })); 1067 1068 mixin(deferEnsure!(q{ "ABC".stripLinesLeftRight() }, q{ _ == "ABC" })); 1069 mixin(deferEnsure!(q{ "".stripLinesLeftRight() }, q{ _ == "" })); 1070 1071 // stripLinesBox 1072 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox() }, q{ _ == "ABC\n\nDEF" })); 1073 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBox() }, q{ _ == "ABC" })); 1074 mixin(deferEnsure!(q{ "\n".stripLinesBox() }, q{ _ == "" })); 1075 mixin(deferEnsure!(q{ "ABC".stripLinesBox() }, q{ _ == "ABC" })); 1076 mixin(deferEnsure!(q{ "".stripLinesBox() }, q{ _ == "" })); 1077 1078 // stripLines at compile-time 1079 enum ctfe_stripLinesBox_dummy1 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox(); 1080 enum ctfe_stripLinesBox_dummy2 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight(); 1081 enum ctfe_stripLinesBox_dummy3 = "".stripLinesBox(); 1082 1083 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy1 }, q{ _ == "ABC\n\nDEF" })); 1084 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy2 }, q{ _ == "\n\nABC\n\nDEF\n\n\n" })); 1085 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy3 }, q{ _ == "" })); 1086 1087 // normalize 1088 mixin(deferEnsure!(q{ 1089 q{ 1090 // test 1091 void foo() { 1092 int x = 2; 1093 } 1094 }.normalize() 1095 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" })); 1096 1097 enum ctfe_normalize_dummy1 = q{ 1098 // test 1099 void foo() { 1100 int x = 2; 1101 } 1102 }.normalize(); 1103 mixin(deferEnsure!(q{ ctfe_normalize_dummy1 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" })); 1104 1105 // toHexString 1106 mixin(deferEnsure!(q{ toHexString([0x00, 0x12, 0x0A, 0xBC]) }, q{ _ == "00120ABC" } )); 1107 mixin(deferEnsure!(q{ toHexString([0xF0]) }, q{ _ == "F0" } )); 1108 mixin(deferEnsure!(q{ toHexString([]) }, q{ _ == "" } )); 1109 }));