1 // SemiTwist Library
2 // Written in the D programming language.
3
4 module semitwist.util.text;
5
6 import std.algorithm;
7 import std.array;
8 import std.compiler;
9 import std.conv;
10 import std.digest.md;
11 import std.stdio;
12 import std.traits;
13 import std.stream;
14 import std..string;
15 import std.system;
16 import std.uni;
17 import std.utf;
18
19 public import std.stream: BOM;
20
21 import semitwist.util.all;
22
23 private alias semitwist.util.ctfe.ctfe_strip ctfe_strip;
24
25 /**
26 Notes:
27 Anything in "data" must be doubly escaped.
28
29 For instance, if you want the generated function to return newline (ie, "\n"),
30 then "data" must be ['\\', 'n'], and thus the mixin call would look like this:
31
32 ----
33 mixin(multiTypeString("unixNewline", "\\n"));
34 // Or
35 mixin(multiTypeString("unixNewline", r"\n"));
36 ----
37
38 Or, if you want the generated function to return the escape sequence
39 for newline (ie, r"\n", or "\\n", or ['\\', 'n']), then "data" must
40 be ['\\', '\\', 'n'], and thus the mixin call would look like this:
41
42 ----
43 mixin(multiTypeString("unixNewlineEscSequence", "\\\\n"));
44 // Or
45 mixin(multiTypeString("unixNewlineEscSequence", r"\\n"));
46 ----
47
48 (This requirement could be changed if there is a way to automatically
49 escape a string at compile-time.)
50 */
51 template multiTypeString(string name, string data, string access="public")
52 {
53 enum multiTypeString =
54 access~" T[] "~name~"(T)()"~
55 "{"~
56 " static if(is(T == char)) { return \""~data~"\"c; }"~
57 " else static if(is(T == wchar)) { return \""~data~"\"w; }"~
58 " else static if(is(T == dchar)) { return \""~data~"\"d; }"~
59 " else static assert(\"T must be char, wchar, or dchar\");"~
60 "}";
61 }
62
63 /// Warning: This is missing some unicode whitespace chars
64 mixin(multiTypeString!("whitespaceChars", r" \n\r\t\v\f"));
65
66 /// Unix EOL: "\n"
67 void toUnixEOL(T)(ref T[] str)
68 {
69 str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Linux)); // Win -> Unix
70 str = replace(str, to!(T[])(nlStr_Mac9), to!(T[])(nlStr_Linux)); // Mac9 -> Unix
71 }
72
73 /// Mac9 EOL: "\r"
74 void toMac9EOL(T)(ref T[] str)
75 {
76 str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Mac9)); // Win -> Mac9
77 str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Mac9)); // Unix -> Mac9
78 }
79
80 /// Win EOL: "\r\n"
81 void toWinEOL(T)(ref T[] str)
82 {
83 toUnixEOL(str); // All -> Unix
84 str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Windows)); // Unix -> Win
85 }
86
87 T[] toNativeEOL(T)(T[] str)
88 {
89 version(Windows) toWinEOL(str);
90 version(OSX) toUnixEOL(str);
91 version(linux) toUnixEOL(str);
92 return str;
93 }
94
95 T[] toNativeEOLFromUnix(T)(T[] str)
96 {
97 version(Windows) return str.toNativeEOL();
98 else return str;
99 }
100
101 T[] toNativeEOLFromWin(T)(T[] str)
102 {
103 version(OSX) return str.toNativeEOL();
104 else version(linux) return str.toNativeEOL();
105 else return str;
106 }
107
108 T[] toNativeEOLFromMac9(T)(T[] str)
109 {
110 return str.toNativeEOL();
111 }
112
113 enum EscapeSequence
114 {
115 DDQS, // D Double Quote String, ex: `"foo\t"` <--> `foo `
116 HTML, // ex: `&` <--> `&`
117
118 //TODO: Implement these
119 //URI, // ex: `%20` <--> ` `
120 //SQL, //TODO: Include different types of SQL escaping (SQL's about as standardized as BASIC)
121 }
122
123 /++
124 Note:
125 For the escape and unescape functions, chaining one with the other
126 (ex: "unescape(escape(str))") will result in a string that is
127 semantically equivalent to the original, but it is *not* necessarily
128 guaranteed to be exactly identical to the original string.
129
130 For example:
131 string str;
132 str = `"\x41\t"`; // 0x41 is ASCII and UTF-8 for A
133 str = unescapeDDQS(str); // == `A ` (That's an actual tab character)
134 str = escapeDDQS(str); // == `"A\t"c`
135
136 Note that "\x41\t" and "A\t"c are equivalent, but not identical.
137 +/
138 T escape(T)(T str, EscapeSequence type) if(isSomeString!T)
139 {
140 //mixin(ensureCharType!("T"));
141
142 T ret;
143
144 switch(type)
145 {
146 case EscapeSequence.DDQS:
147 ret = escapeDDQS(str);
148 break;
149
150 case EscapeSequence.HTML:
151 ret = escapeHTML(str);
152 break;
153
154 default:
155 throw new Exception("Unsupported EscapeSequence");
156 }
157
158 return ret;
159 }
160
161 T unescape(T)(T str, EscapeSequence type) if(isSomeString!T)
162 {
163 //mixin(ensureCharType!("T"));
164
165 T ret;
166
167 switch(type)
168 {
169 case EscapeSequence.DDQS:
170 ret = unescapeDDQS(str);
171 break;
172
173 case EscapeSequence.HTML:
174 ret = unescapeHTML(str);
175 break;
176
177 default:
178 throw new Exception("Unsupported EscapeSequence");
179 }
180
181 return ret;
182 }
183
184 T unescapeChar(T)(T str, T escapeSequence) if(isSomeString!T)
185 {
186 //mixin(ensureCharType!("T"));
187
188 T ret = str.dup;
189 ret = substitute(ret, escapeSequence, escapeSequence[$-1..$]);
190 return ret;
191 }
192
193 /// Warning: This doesn't unescape all escape sequences yet.
194 T unescapeDDQS(T)(T str) if(isSomeString!T)
195 {
196 //mixin(ensureCharType!("T"));
197 enum errStr = "str doesn't contain a valid D Double Quote String";
198
199 if(str.length < 2)
200 throw new Exception(errStr);
201
202 T ret = str;//.dup;
203
204 //TODO: Do this better
205 ret = ctfe_substitute!(T)(ret, `\\`, `\`);
206 ret = ctfe_substitute!(T)(ret, `\"`, `"`);
207 ret = ctfe_substitute!(T)(ret, `\'`, `'`);
208
209 ret = ctfe_substitute!(T)(ret, `\r`, "\r");
210 ret = ctfe_substitute!(T)(ret, `\n`, "\n");
211 ret = ctfe_substitute!(T)(ret, `\t`, "\t");
212
213 ret = ctfe_substitute!(T)(ret, `\?`, "\?");
214 ret = ctfe_substitute!(T)(ret, `\a`, "\a");
215 ret = ctfe_substitute!(T)(ret, `\b`, "\b");
216 ret = ctfe_substitute!(T)(ret, `\f`, "\f");
217 ret = ctfe_substitute!(T)(ret, `\v`, "\v");
218 //TODO: All the others
219
220 if(ret[0..1] != `"`)
221 throw new Exception(errStr);
222
223 auto last = ret[$-1..$];
224 auto secondLast = ret[$-2..$-1];
225
226 if(last != `"`)
227 {
228 if(secondLast != `"`)
229 throw new Exception(errStr);
230 else if(secondLast != "c" && secondLast != "w" && secondLast != "d")
231 throw new Exception(errStr);
232 else
233 return ret[1..$-2];
234 }
235
236 return ret[1..$-1];
237 }
238
239 T escapeDDQS(T)(T str) if(isSomeString!T)
240 {
241 T ret = str;
242
243 ret = ctfe_substitute!(T)(ret, `\`, `\\`);
244 ret = ctfe_substitute!(T)(ret, `"`, `\"`);
245 ret = ctfe_substitute!(T)(ret, "\r", `\r`); // To prevent accidential conversions to platform-specific EOL
246 ret = ctfe_substitute!(T)(ret, "\n", `\n`); // To prevent accidential conversions to platform-specific EOL
247 ret = ctfe_substitute!(T)(ret, "\t", `\t`); // To prevent possible problems with automatic tab->space conversion
248 // The rest don't need to be escaped
249
250 return `"`~ret~`"`;
251 }
252
253 /+
254 enum doubleQuoteTestStr = `"They said \"10 \\ 5 = 2\""`;
255
256 pragma(msg, "orig: "~doubleQuoteTestStr);
257 pragma(msg, "unesc: "~unescapeDDQS(doubleQuoteTestStr));
258 pragma(msg, "esc: "~escapeDDQS(doubleQuoteTestStr));
259 pragma(msg, "esc(unesc): "~escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
260 pragma(msg, "unesc(esc): "~unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
261
262 pragma(msg, "unesc: "~unescape(doubleQuoteTestStr, EscapeSequence.DDQS));
263 pragma(msg, "unesc: "~doubleQuoteTestStr.unescape(EscapeSequence.DDQS));
264
265 mixin(unittestSemiTwistDLib("Outputting some things", q{
266 enum wstring ctEscW = escapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
267 enum dstring ctEscD = escapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
268 enum wstring ctUnescW = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
269 enum dstring ctUnescD = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
270 writefln("%s%s", "ctEscW: ", ctEscW);
271 writefln("%s%s", "ctEscD: ", ctEscD);
272 writefln("%s%s", "ctUnescW: ", ctUnescW);
273 writefln("%s%s", "ctUnescD: ", ctUnescD);
274
275 writefln("%s%s", "unesc wchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
276 writefln("%s%s", "unesc dchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
277 writefln("%s%s", "esc wchar: ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
278 writefln("%s%s", "esc dchar: ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
279 // writefln("%s%s", "int: ", unescapeDDQS([cast(int)1,2,3]));
280
281 writefln("%s%s", "orig: ", doubleQuoteTestStr);
282 writefln("%s%s", "unesc: ", unescapeDDQS(doubleQuoteTestStr));
283 writefln("%s%s", "esc: ", escapeDDQS(doubleQuoteTestStr));
284 writefln("%s%s", "esc(unesc): ", escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
285 writefln("%s%s", "unesc(esc): ", unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
286 }));
287 +/
288
289 /// Warning: This doesn't unescape all escape sequences yet.
290 T unescapeHTML(T)(T str) if(isSomeString!T)
291 {
292 auto ret = str;
293
294 ret = ctfe_substitute!(T)(ret, "<", "<");
295 ret = ctfe_substitute!(T)(ret, ">", ">");
296 ret = ctfe_substitute!(T)(ret, "&", "&");
297
298 return ret;
299 }
300
301 T escapeHTML(T)(T str) if(isSomeString!T)
302 {
303 auto ret = str;
304
305 ret = ctfe_substitute!(T)(ret, "&", "&");
306 ret = ctfe_substitute!(T)(ret, "<", "<");
307 ret = ctfe_substitute!(T)(ret, ">", ">");
308
309 return ret;
310 }
311
312 /// Like std.string.indexOf, but with an optional 'start' parameter,
313 /// and returns s.length when not found (instead of -1).
314 //TODO*: Unittest these
315 size_t locate(Char)(in Char[] s, dchar c, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
316 {
317 auto index = std..string.indexOf(s[start..$], c, cs);
318 return (index == -1)? s.length : index + start;
319 }
320
321 /// ditto
322 size_t locatePrior(Char)(in Char[] s, dchar c, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
323 {
324 if(start > s.length)
325 start = s.length;
326
327 auto index = std..string.lastIndexOf(s[0..start], c, cs);
328 return (index == -1)? s.length : index;
329 }
330
331 /// ditto
332 size_t locate(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
333 {
334 auto index = std..string.indexOf(s[start..$], sub, cs);
335 return (index == -1)? s.length : index + start;
336 }
337
338 /// ditto
339 size_t locatePrior(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
340 {
341 if(start > s.length)
342 start = s.length;
343
344 auto index = std..string.lastIndexOf(s[0..start], sub, cs);
345 return (index == -1)? s.length : index;
346 }
347
348 /// Suggested usage:
349 /// "Hello %s!".formatln("World");
350 string formatln(T...)(T args)
351 {
352 return format(args)~"\n";
353 }
354
355 //TODO*: Fix stripNonPrintable
356 T stripNonPrintable(T)(T str) if(isSomeString!T)
357 {
358 //T ret = str.dup;
359 //auto numRemaining = ret.removeIf( (T c){return !isPrintable(c);} );
360 //return ret[0..numRemaining];
361 return str;
362 }
363
364 /// Return value is number of code units
365 size_t nextCodePointSize(T)(T str) if(is(T==string) || is(T==wstring))
366 {
367 size_t ret;
368 str.decode(ret);
369 return ret;
370 }
371
372 /// Indents every line with indentStr
373 T indent(T)(T str, T indentStr="\t") if(isSomeString!T)
374 {
375 if(str == "")
376 return indentStr;
377
378 return
379 indentStr ~
380 str[0..$-1].replace("\n", "\n"~indentStr) ~
381 str[$-1];
382 }
383
384 /// ditto
385 T[] indent(T)(T[] lines, T indentStr="\t") if(isSomeString!T)
386 {
387 // foreach(ref) doesn't work right at compile time: DMD Issue #3835
388 foreach(i, line; lines)
389 lines[i] = indentStr ~ line;
390
391 return lines;
392 }
393
394 /// Unindents the lines of text as much as possible while preserving
395 /// all relative indentation.
396 ///
397 /// Inconsistent indentation (on lines that contain non-whitespace) is an error
398 /// and throws an exception at runtime, or asserts when executed at compile-time.
399 T unindent(T)(T str) if(isSomeString!T)
400 {
401 if(str == "")
402 return "";
403
404 T[] lines;
405 if(__ctfe)
406 lines = str.ctfe_split("\n");
407 else
408 lines = str.split("\n");
409
410 lines = unindentImpl(lines, str);
411
412 if(__ctfe)
413 return lines.ctfe_join("\n");
414 else
415 return lines.join("\n");
416 }
417
418 /// ditto
419 T[] unindent(T)(T[] lines) if(isSomeString!T)
420 {
421 return unindentImpl(lines);
422 }
423
424 private T[] unindentImpl(T)(T[] lines, T origStr=null) if(isSomeString!T)
425 {
426 if(lines == [])
427 return [];
428
429 bool isNonWhite(dchar ch)
430 {
431 if(__ctfe)
432 return !ctfe_iswhite(ch);
433 else
434 return !isWhite(ch);
435 }
436 T leadingWhiteOf(T str)
437 { return str[ 0 .. $-find!(isNonWhite)(str).length ]; }
438
439 // Apply leadingWhiteOf, but emit null instead for whitespace-only lines
440 T[] indents;
441 if(__ctfe)
442 indents = semitwist.util.functional.map( lines,
443 (T str){ return str.ctfe_strip()==""? null : leadingWhiteOf(str);}
444 );
445 else
446 {
447 string mapPredicate(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
448 indents = array( std.algorithm.map!(
449 mapPredicate//(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
450 )(lines) );
451 }
452
453 T shorterAndNonNull(T a, T b) {
454 if(a is null) return b;
455 if(b is null) return a;
456
457 return (a.length < b.length)? a : b;
458 }
459 auto shortestIndent = std.algorithm.reduce!(shorterAndNonNull)(indents);
460
461 if(shortestIndent is null || shortestIndent == "")
462 {
463 if(origStr == null)
464 return stripLinesLeft(lines);
465 else
466 return [origStr.stripLeft()];
467 }
468
469 foreach(i; 0..lines.length)
470 {
471 if(indents[i] is null)
472 lines[i] = "";
473 else if(indents.startsWith(shortestIndent))
474 lines[i] = lines[i][shortestIndent.length..$];
475 else
476 {
477 if(__ctfe)
478 assert(false, "Inconsistent indentation");
479 else
480 throw new Exception("Inconsistent indentation");
481 }
482 }
483
484 return lines;
485 }
486
487 T stripLinesTop(T)(T str) if(isSomeString!T)
488 {
489 return stripLinesBox_StrImpl!(T, true, false, false, false)(str);
490 }
491 T stripLinesBottom(T)(T str) if(isSomeString!T)
492 {
493 return stripLinesBox_StrImpl!(T, false, true, false, false)(str);
494 }
495 T stripLinesTopBottom(T)(T str) if(isSomeString!T)
496 {
497 return stripLinesBox_StrImpl!(T, true, true, false, false)(str);
498 }
499
500 T stripLinesLeft(T)(T str) if(isSomeString!T)
501 {
502 return stripLinesBox_StrImpl!(T, false, false, true, false)(str);
503 }
504 T stripLinesRight(T)(T str) if(isSomeString!T)
505 {
506 return stripLinesBox_StrImpl!(T, false, false, false, true)(str);
507 }
508 T stripLinesLeftRight(T)(T str) if(isSomeString!T)
509 {
510 return stripLinesBox_StrImpl!(T, false, false, true, true)(str);
511 }
512
513 T stripLinesBox(T)(T str) if(isSomeString!T)
514 {
515 return stripLinesBox_StrImpl!(T, true, true, true, true)(str);
516 }
517
518 private T stripLinesBox_StrImpl
519 (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
520 (T str)
521 if(isSomeString!T)
522 {
523 if(str == "")
524 return "";
525
526 T[] lines;
527 if(__ctfe)
528 lines = str.ctfe_split("\n");
529 else
530 lines = str.split("\n");
531
532 lines = stripLinesBox_LineImpl!(T, stripTop, stripBottom, stripLeft, stripRight)(lines);
533
534 if(__ctfe)
535 return lines.ctfe_join("\n");
536 else
537 return lines.join("\n");
538 }
539
540 private T[] stripLinesBox_LineImpl
541 (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
542 (T[] lines)
543 if(isSomeString!T)
544 {
545 static if(stripTop) lines = stripLinesTop(lines);
546 static if(stripBottom) lines = stripLinesBottom(lines);
547
548 static if(stripLeft && stripRight)
549 {
550 lines = stripLinesLeftRight(lines);
551 }
552 else
553 {
554 static if(stripLeft) lines = stripLinesLeft(lines);
555 static if(stripRight) lines = stripLinesRight(lines);
556 }
557
558 return lines;
559 }
560
561 T[] stripLinesBox(T)(T[] str) if(isSomeString!T)
562 {
563 return stripLinesBox_LineImpl!(T, true, true, true, true)(str);
564 }
565
566 T[] stripLinesTop(T)(T[] lines) if(isSomeString!T)
567 {
568 auto firstLine = lines.length-1;
569
570 foreach(i, line; lines)
571 if(line.ctfe_strip() != "")
572 {
573 firstLine = i;
574 break;
575 }
576
577 return lines[firstLine..$];
578 }
579
580 T[] stripLinesBottom(T)(T[] lines) if(isSomeString!T)
581 {
582 size_t lastLine = 0;
583
584 foreach_reverse(i, line; lines)
585 if(line.ctfe_strip() != "")
586 {
587 lastLine = i;
588 break;
589 }
590
591 return lines[0..lastLine+1];
592 }
593
594 T[] stripLinesTopBottom(T)(T[] lines) if(isSomeString!T)
595 {
596 lines = stripLinesTop(lines);
597 lines = stripLinesBottom(lines);
598 return lines;
599 }
600
601 T[] stripLinesLeft(T)(T[] lines) if(isSomeString!T)
602 {
603 // foreach(ref) doesn't work right at compile time: DMD Issue #3835
604 foreach(i, line; lines)
605 lines[i] = line.ctfe_stripl();
606
607 return lines;
608 }
609
610 T[] stripLinesRight(T)(T[] lines) if(isSomeString!T)
611 {
612 // foreach(ref) doesn't work right at compile time: DMD Issue #3835
613 foreach(i, line; lines)
614 lines[i] = line.ctfe_stripr();
615
616 return lines;
617 }
618
619 T[] stripLinesLeftRight(T)(T[] lines) if(isSomeString!T)
620 {
621 // foreach(ref) doesn't work right at compile time: DMD Issue #3835
622 foreach(i, line; lines)
623 lines[i] = line.ctfe_strip();
624
625 return lines;
626 }
627
628 //TODO*: Unittest this
629 bool contains(T1,T2)(T1 haystack, T2 needle)
630 {
631 return std.algorithm.find(haystack, needle) != [];
632 }
633
634 /++
635 Unindents, strips whitespace-only lines from top and bottom,
636 and strips trailing whitespace from eash line.
637 (Also converts Windows "\r\n" line endings to Unix "\n" line endings.)
638
639 See also the documentation for unindent().
640
641 Good for making easily-readable multi-line string literals without
642 leaving extra indents and whitespace in the resulting string:
643
644 Do this:
645 --------------------
646 void foo()
647 {
648 enum codeStr = q{
649 // Written in the D Programming Langauge
650 // by John Doe
651
652 int main()
653 {
654 return 0;
655 }
656 }.normalize();
657 }
658 --------------------
659
660 Instead of this:
661 --------------------
662 void foo()
663 {
664 enum codeStr =
665 q{// Written in the D Programming Langauge
666 // by John Doe
667
668 int main()
669 {
670 return 0;
671 }};
672 }
673 --------------------
674
675 The resulting string is exactly the same.
676 +/
677 T normalize(T)(T str) if(isSomeString!T)
678 {
679 if(str == "")
680 return "";
681
682 T[] lines;
683 if(__ctfe)
684 lines = str.ctfe_split("\n");
685 else
686 lines = str.split("\n");
687
688 lines = normalize(lines);
689
690 if(__ctfe)
691 return lines.ctfe_join("\n");
692 else
693 return lines.join("\n");
694 }
695
696 /// ditto
697 T[] normalize(T)(T[] lines) if(isSomeString!T)
698 {
699 lines = stripLinesTopBottom(lines);
700 lines = unindent(lines);
701 lines = stripLinesRight(lines);
702 return lines;
703 }
704
705 alias md5 = std.digest.md.md5Of;
706 /+string md5(string data)
707 {
708 MD5_CTX context;
709 context.start();
710 context.update(data);
711 ubyte digest[16];
712 context.finish(digest);
713
714 return digestToString(digest);
715 }+/
716
717 immutable(ubyte)[] bomCodeOf(BOM bom)
718 {
719 final switch(bom)
720 {
721 case BOM.UTF8: return cast(immutable(ubyte)[])x"EF BB BF";
722 case BOM.UTF16LE: return cast(immutable(ubyte)[])x"FF FE";
723 case BOM.UTF16BE: return cast(immutable(ubyte)[])x"FE FF";
724 case BOM.UTF32LE: return cast(immutable(ubyte)[])x"FF FE 00 00";
725 case BOM.UTF32BE: return cast(immutable(ubyte)[])x"00 00 FE FF";
726 }
727 }
728
729 BOM bomOf(const(ubyte)[] str)
730 {
731 if(str.startsWith(bomCodeOf(BOM.UTF8 ))) return BOM.UTF8;
732 if(str.startsWith(bomCodeOf(BOM.UTF16LE))) return BOM.UTF16LE;
733 if(str.startsWith(bomCodeOf(BOM.UTF16BE))) return BOM.UTF16BE;
734 if(str.startsWith(bomCodeOf(BOM.UTF32LE))) return BOM.UTF32LE;
735 if(str.startsWith(bomCodeOf(BOM.UTF32BE))) return BOM.UTF32BE;
736
737 return BOM.UTF8;
738 }
739
740 version(LittleEndian)
741 {
742 enum BOM native16BitBOM = BOM.UTF16LE;
743 enum BOM native32BitBOM = BOM.UTF32LE;
744 enum BOM nonNative16BitBOM = BOM.UTF16BE;
745 enum BOM nonNative32BitBOM = BOM.UTF32BE;
746 }
747 else
748 {
749 enum BOM native16BitBOM = BOM.UTF16BE;
750 enum BOM native32BitBOM = BOM.UTF32BE;
751 enum BOM nonNative16BitBOM = BOM.UTF16LE;
752 enum BOM nonNative32BitBOM = BOM.UTF32LE;
753 }
754
755 bool isNativeEndian(BOM bom)
756 {
757 return bom == native16BitBOM || bom == native32BitBOM || bom == BOM.UTF8;
758 }
759
760 bool isNonNativeEndian(BOM bom)
761 {
762 return !isNativeEndian(bom);
763 }
764
765 bool is8Bit(BOM bom)
766 {
767 return bom == BOM.UTF8;
768 }
769
770 bool is16Bit(BOM bom)
771 {
772 return bom == native16BitBOM || bom == nonNative16BitBOM;
773 }
774
775 bool is32Bit(BOM bom)
776 {
777 return bom == native32BitBOM || bom == nonNative32BitBOM;
778 }
779
780 Endian endianOf(BOM bom)
781 {
782 // DMD 2.055 changed "LittleEndian" to "littleEndian", etc...
783 static if(__traits(compiles, Endian.littleEndian))
784 {
785 final switch(bom)
786 {
787 case BOM.UTF8: return endian;
788 case BOM.UTF16LE, BOM.UTF32LE: return Endian.littleEndian;
789 case BOM.UTF16BE, BOM.UTF32BE: return Endian.bigEndian;
790 }
791 }
792 else
793 {
794 final switch(bom)
795 {
796 case BOM.UTF8: return endian;
797 case BOM.UTF16LE, BOM.UTF32LE: return Endian.LittleEndian;
798 case BOM.UTF16BE, BOM.UTF32BE: return Endian.BigEndian;
799 }
800 }
801 }
802
803 template isInsensitive(T)
804 {
805 enum isInsensitive =
806 is(T==InsensitiveT!string ) ||
807 is(T==InsensitiveT!wstring) ||
808 is(T==InsensitiveT!dstring);
809 }
810 static assert(isInsensitive!Insensitive);
811 static assert(isInsensitive!WInsensitive);
812 static assert(isInsensitive!DInsensitive);
813 static assert(!isInsensitive!string);
814
815 struct InsensitiveT(T) if(isSomeString!T)
816 {
817 private T str;
818 private T foldingCase;
819
820 this(T newStr)
821 {
822 str = newStr;
823 updateFoldingCase();
824 }
825
826 T toString() const
827 {
828 return str;
829 }
830
831 private void updateFoldingCase()
832 {
833 // Phobos doesn't actually have a toFolding() right now
834 foldingCase = toLower(str);
835 }
836
837 static if(useNoThrowSafeToHash)
838 {
839 const nothrow @trusted hash_t toHash() const
840 {
841 return typeid(string).getHash(&foldingCase);
842 }
843 }
844 else
845 {
846 const hash_t toHash()
847 {
848 return typeid(string).getHash(&foldingCase);
849 }
850 }
851
852 void opAssign(T2)(T2 b) if(isInsensitive!T2 || isSomeString!T2)
853 {
854 static if(is(isInsensitive!T == T2))
855 {
856 str = b.str;
857 foldingCase = newStr.foldingCase;
858 }
859 else static if(isInsensitive!T2)
860 {
861 str = to!T(b.str);
862 updateFoldingCase();
863 }
864 else
865 {
866 str = b;
867 updateFoldingCase();
868 }
869 }
870
871 InsensitiveT!T opBinary(string op)(const InsensitiveT!T b) if(op=="~")
872 {
873 return InsensitiveT!T(str ~ b.str);
874 }
875
876 InsensitiveT!T opOpAssign(string op)(const InsensitiveT!T b) if(op=="~")
877 {
878 str ~= b.str;
879 foldingCase ~= b.foldingCase;
880 return this;
881 }
882
883 //TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058
884 static if(vendor != Vendor.digitalMars || version_minor >= 59)
885 const bool opEquals(const InsensitiveT!T b) const
886 {
887 return opEquals(b);
888 }
889
890 const bool opEquals(ref const InsensitiveT!T b) const
891 {
892 /+if (str is b.str) return true;
893 if (str is null || b.str is null) return false;
894 return foldingCase == b.foldingCase;+/
895 return this.opCmp(b) == 0;
896 }
897
898 //TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058
899 static if(vendor != Vendor.digitalMars || version_minor >= 59)
900 const int opCmp(const InsensitiveT!T b) const
901 {
902 return opCmp(b);
903 }
904
905 const int opCmp(ref const InsensitiveT!T b) const
906 {
907 if(str is null && b.str is null) return 0;
908 if(str == b.str) return 0;
909 if(str is null ) return -1;
910 if(b.str is null ) return 1;
911 return std..string.cmp(foldingCase, b.foldingCase);
912 }
913
914 InsensitiveT!T opSlice()
915 {
916 return this;
917 }
918
919 auto opSlice(size_t x)
920 {
921 return str[x];
922 }
923
924 InsensitiveT!T opSlice(size_t x, size_t y)
925 {
926 return InsensitiveT!T(str[x..y]);
927 }
928 }
929
930 alias InsensitiveT!string Insensitive;
931 alias InsensitiveT!wstring WInsensitive;
932 alias InsensitiveT!dstring DInsensitive;
933
934 //TODO: This is quick-n-dirty, do it more efficiently. (Or just
935 // replace with std.digest.toHexString in DMD 2.061)
936 string toHexString(ubyte[] arr)
937 {
938 if(arr.length == 0)
939 return "";
940
941 string str;
942 while(true)
943 {
944 str ~= format("%.2X", arr[0]);
945
946 if(arr.length <= 1)
947 break;
948
949 arr = arr[1..$];
950 }
951
952 return str;
953 }
954
955 mixin(unittestSemiTwistDLib(q{
956
957 // Insensitive
958 mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("Test") }));
959 mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("TEST") }));
960 mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive("ABCD") }));
961 mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive(null) }));
962 mixin(deferAssert!(q{ Insensitive(null) == Insensitive(null) }));
963 mixin(deferAssert!(q{ Insensitive("Test") == Insensitive("TEST") }));
964 mixin(deferAssert!(q{ Insensitive("ABCD") != Insensitive("TEST") }));
965 mixin(deferAssert!(q{ Insensitive(null) != Insensitive("TEST") }));
966
967 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("ES") }));
968 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("es") }));
969 mixin(deferAssert!(q{ Insensitive("TEST")[1..3] != Insensitive("AB") }));
970
971 mixin(deferAssert!(q{ Insensitive("TE")~Insensitive("ST") == Insensitive("TesT") }));
972
973 Insensitive ins;
974 ins = Insensitive("TEST");
975 ins = "ab";
976 ins ~= Insensitive("cd");
977
978 mixin(deferAssert!(q{ ins == Insensitive("AbcD") }));
979
980 int[Insensitive] ins_aa = [Insensitive("ABC"):1, Insensitive("DEF"):2, Insensitive("Xyz"):3];
981 mixin(deferAssert!(q{ Insensitive("ABC") in ins_aa }));
982 mixin(deferAssert!(q{ Insensitive("DEF") in ins_aa }));
983 mixin(deferAssert!(q{ Insensitive("Xyz") in ins_aa }));
984 mixin(deferAssert!(q{ Insensitive("aBc") in ins_aa }));
985 mixin(deferAssert!(q{ Insensitive("dEf") in ins_aa }));
986 mixin(deferAssert!(q{ Insensitive("xYZ") in ins_aa }));
987 mixin(deferAssert!(q{ Insensitive("HI") !in ins_aa }));
988
989 mixin(deferAssert!(q{ ins_aa[Insensitive("aBc")] == 1 }));
990 mixin(deferAssert!(q{ ins_aa[Insensitive("dEf")] == 2 }));
991 mixin(deferAssert!(q{ ins_aa[Insensitive("xYZ")] == 3 }));
992
993 // escapeDDQS, unescapeDDQS
994 mixin(deferEnsure!(q{ `hello`.escapeDDQS() }, q{ _ == `"hello"` }));
995 mixin(deferEnsure!(q{ `"hello"`.unescapeDDQS() }, q{ _ == "hello" }));
996 mixin(deferEnsure!(q{ `"I"`.unescapeDDQS() }, q{ _ == "I" }));
997
998 mixin(deferEnsure!(q{ (`And...`~"\n"~`sam\nick said "Hi!".`).escapeDDQS() }, q{ _ == `"And...\nsam\\nick said \"Hi!\"."` }));
999 //TODO: Make this one pass
1000 //mixin(deferEnsure!(q{ `"And...\nsam\\nick said \"Hi!\"."`.unescapeDDQS() }, q{ _ == `And...`~"\n"~`sam\nick said "Hi!".` }));
1001 mixin(deferEnsureThrows!(q{ "hello".unescapeDDQS(); }, Exception));
1002
1003 // indent
1004 mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent(" ") }, q{ _ == " A\n \tB\n \n C" }));
1005 mixin(deferEnsure!(q{ "A\nB\n".indent("\t") }, q{ _ == "\tA\n\tB\n" }));
1006 mixin(deferEnsure!(q{ "".indent("\t") }, q{ _ == "\t" }));
1007 mixin(deferEnsure!(q{ "A".indent("\t") }, q{ _ == "\tA" }));
1008 mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("") }, q{ _ == "A\n\tB\n\nC" }));
1009
1010 // unindent
1011 mixin(deferEnsure!(q{ " \t A\n \t \tB\n \t C\n \t\n \t D".unindent() }, q{ _ == "A\n\tB\nC\n\nD" }));
1012 mixin(deferEnsure!(q{ " D\n".unindent() }, q{ _ == "D\n" }));
1013 mixin(deferEnsure!(q{ " D\n ".unindent() }, q{ _ == "D\n" }));
1014 mixin(deferEnsure!(q{ "D".unindent() }, q{ _ == "D" }));
1015 mixin(deferEnsure!(q{ "".unindent() }, q{ _ == "" }));
1016 mixin(deferEnsure!(q{ " ".unindent() }, q{ _ == "" }));
1017 mixin(deferEnsureThrows!(q{ " \tA\n\t B".unindent(); }, Exception));
1018 mixin(deferEnsureThrows!(q{ " a\n \tb".unindent(); }, Exception));
1019
1020 // unindent at compile-time
1021 enum ctfe_unindent_dummy1 = " \t A\n \t \tB\n \t C\n \t\n \t D".unindent();
1022 enum ctfe_unindent_dummy2 = " D".unindent();
1023 enum ctfe_unindent_dummy3 = " D\n".unindent();
1024 enum ctfe_unindent_dummy4 = "".unindent();
1025
1026 mixin(deferEnsure!(q{ ctfe_unindent_dummy1 }, q{ _ == "A\n\tB\nC\n\nD" }));
1027 mixin(deferEnsure!(q{ ctfe_unindent_dummy2 }, q{ _ == "D" }));
1028 mixin(deferEnsure!(q{ ctfe_unindent_dummy3 }, q{ _ == "D\n" }));
1029 mixin(deferEnsure!(q{ ctfe_unindent_dummy4 }, q{ _ == "" }));
1030
1031 //enum ctfe_unindent_dummy5 = " a\n \tb".unindent(); // Should fail to compile
1032
1033 // contains
1034 mixin(deferEnsure!(q{ contains("abcde", 'a') }, q{ _==true }));
1035 mixin(deferEnsure!(q{ contains("abcde", 'c') }, q{ _==true }));
1036 mixin(deferEnsure!(q{ contains("abcde", 'e') }, q{ _==true }));
1037 mixin(deferEnsure!(q{ contains("abcde", 'x') }, q{ _==false }));
1038
1039 // stripLines: Top and Bottom
1040 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTop() }, q{ _ == " ABC \n \n DEF \n \t \n\t \n" }));
1041 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBottom() }, q{ _ == " \t \n\t \n ABC \n \n DEF " }));
1042 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTopBottom() }, q{ _ == " ABC \n \n DEF " }));
1043
1044 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTop() }, q{ _ == "ABC\n " }));
1045 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBottom() }, q{ _ == "\nABC" }));
1046 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTopBottom() }, q{ _ == "ABC" }));
1047
1048 mixin(deferEnsure!(q{ "\n".stripLinesTop() }, q{ _ == "" }));
1049 mixin(deferEnsure!(q{ "\n".stripLinesBottom() }, q{ _ == "" }));
1050 mixin(deferEnsure!(q{ "\n".stripLinesTopBottom() }, q{ _ == "" }));
1051
1052 mixin(deferEnsure!(q{ "ABC".stripLinesTopBottom() }, q{ _ == "ABC" }));
1053 mixin(deferEnsure!(q{ "".stripLinesTopBottom() }, q{ _ == "" }));
1054
1055 // stripLines: Left and Right
1056 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeft() }, q{ _ == "\n\nABC \n\nDEF \n\n\n" }));
1057 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesRight() }, q{ _ == "\n\n ABC\n\n DEF\n\n\n" }));
1058 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight() }, q{ _ == "\n\nABC\n\nDEF\n\n\n" }));
1059
1060 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeft() }, q{ _ == "\nABC\n" }));
1061 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesRight() }, q{ _ == "\nABC\n" }));
1062 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeftRight() }, q{ _ == "\nABC\n" }));
1063
1064 mixin(deferEnsure!(q{ "\n".stripLinesLeft() }, q{ _ == "\n" }));
1065 mixin(deferEnsure!(q{ "\n".stripLinesRight() }, q{ _ == "\n" }));
1066 mixin(deferEnsure!(q{ "\n".stripLinesLeftRight() }, q{ _ == "\n" }));
1067
1068 mixin(deferEnsure!(q{ "ABC".stripLinesLeftRight() }, q{ _ == "ABC" }));
1069 mixin(deferEnsure!(q{ "".stripLinesLeftRight() }, q{ _ == "" }));
1070
1071 // stripLinesBox
1072 mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox() }, q{ _ == "ABC\n\nDEF" }));
1073 mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBox() }, q{ _ == "ABC" }));
1074 mixin(deferEnsure!(q{ "\n".stripLinesBox() }, q{ _ == "" }));
1075 mixin(deferEnsure!(q{ "ABC".stripLinesBox() }, q{ _ == "ABC" }));
1076 mixin(deferEnsure!(q{ "".stripLinesBox() }, q{ _ == "" }));
1077
1078 // stripLines at compile-time
1079 enum ctfe_stripLinesBox_dummy1 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox();
1080 enum ctfe_stripLinesBox_dummy2 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight();
1081 enum ctfe_stripLinesBox_dummy3 = "".stripLinesBox();
1082
1083 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy1 }, q{ _ == "ABC\n\nDEF" }));
1084 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy2 }, q{ _ == "\n\nABC\n\nDEF\n\n\n" }));
1085 mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy3 }, q{ _ == "" }));
1086
1087 // normalize
1088 mixin(deferEnsure!(q{
1089 q{
1090 // test
1091 void foo() {
1092 int x = 2;
1093 }
1094 }.normalize()
1095 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1096
1097 enum ctfe_normalize_dummy1 = q{
1098 // test
1099 void foo() {
1100 int x = 2;
1101 }
1102 }.normalize();
1103 mixin(deferEnsure!(q{ ctfe_normalize_dummy1 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1104
1105 // toHexString
1106 mixin(deferEnsure!(q{ toHexString([0x00, 0x12, 0x0A, 0xBC]) }, q{ _ == "00120ABC" } ));
1107 mixin(deferEnsure!(q{ toHexString([0xF0]) }, q{ _ == "F0" } ));
1108 mixin(deferEnsure!(q{ toHexString([]) }, q{ _ == "" } ));
1109 }));