semitwist.util.text source code

1 // SemiTwist Library
2 // Written in the D programming language.
3 
4 module semitwist.util.text;
5 
6 import std.algorithm;
7 import std.array;
8 import std.compiler;
9 import std.conv;
10 import std.digest.md;
11 import std.stdio;
12 import std.traits;
13 import std.stream;
14 import std..string;
15 import std.system;
16 import std.uni;
17 import std.utf;
18 
19 public import std.stream: BOM;
20 
21 import semitwist.util.all;
22 
23 private alias semitwist.util.ctfe.ctfe_strip ctfe_strip;
24 
25 /**
26 Notes:
27 Anything in "data" must be doubly escaped.
28 
29 For instance, if you want the generated function to return newline (ie, "\n"),
30 then "data" must be ['\\', 'n'], and thus the mixin call would look like this:
31 
32 ----
33 mixin(multiTypeString("unixNewline", "\\n"));
34 // Or
35 mixin(multiTypeString("unixNewline", r"\n"));
36 ----
37 
38 Or, if you want the generated function to return the escape sequence
39 for newline (ie, r"\n", or "\\n", or ['\\', 'n']), then "data" must
40 be ['\\', '\\', 'n'], and thus the mixin call would look like this:
41 
42 ----
43 mixin(multiTypeString("unixNewlineEscSequence", "\\\\n"));
44 // Or
45 mixin(multiTypeString("unixNewlineEscSequence", r"\\n"));
46 ----
47 
48 (This requirement could be changed if there is a way to automatically
49 escape a string at compile-time.)
50 */
51 template multiTypeString(string name, string data, string access="public")
52 {
53 	enum multiTypeString = 
54 	access~" T[] "~name~"(T)()"~
55 	"{"~
56 	"		 static if(is(T ==  char)) { return \""~data~"\"c; }"~
57 	"	else static if(is(T == wchar)) { return \""~data~"\"w; }"~
58 	"	else static if(is(T == dchar)) { return \""~data~"\"d; }"~
59 	"	else static assert(\"T must be char, wchar, or dchar\");"~
60 	"}";
61 }
62 
63 /// Warning: This is missing some unicode whitespace chars
64 mixin(multiTypeString!("whitespaceChars", r" \n\r\t\v\f"));
65 
66 /// Unix EOL: "\n"
67 void toUnixEOL(T)(ref T[] str)
68 {
69 	str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Linux)); // Win  -> Unix
70 	str = replace(str, to!(T[])(nlStr_Mac9),    to!(T[])(nlStr_Linux)); // Mac9 -> Unix
71 }
72 
73 /// Mac9 EOL: "\r"
74 void toMac9EOL(T)(ref T[] str)
75 {
76 	str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Mac9)); // Win  -> Mac9
77 	str = replace(str, to!(T[])(nlStr_Linux),   to!(T[])(nlStr_Mac9)); // Unix -> Mac9
78 }
79 
80 /// Win EOL: "\r\n"
81 void toWinEOL(T)(ref T[] str)
82 {
83 	toUnixEOL(str); // All -> Unix
84 	str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Windows)); // Unix -> Win
85 }
86 
87 T[] toNativeEOL(T)(T[] str)
88 {
89 	version(Windows) toWinEOL(str);
90 	version(OSX)     toUnixEOL(str);
91 	version(linux)   toUnixEOL(str);
92 	return str;
93 }
94 
95 T[] toNativeEOLFromUnix(T)(T[] str)
96 {
97 	     version(Windows) return str.toNativeEOL();
98 	else return str;
99 }
100 
101 T[] toNativeEOLFromWin(T)(T[] str)
102 {
103 	     version(OSX)   return str.toNativeEOL();
104 	else version(linux) return str.toNativeEOL();
105 	else return str;
106 }
107 
108 T[] toNativeEOLFromMac9(T)(T[] str)
109 {
110 	return str.toNativeEOL();
111 }
112 
113 enum EscapeSequence
114 {
115 	DDQS, // D Double Quote String, ex: `"foo\t"` <--> `foo	`
116 	HTML, // ex: `&amp;` <--> `&`
117 
118 	//TODO: Implement these
119 	//URI,  // ex: `%20` <--> ` `
120 	//SQL,  //TODO: Include different types of SQL escaping (SQL's about as standardized as BASIC)
121 }
122 
123 /++
124 Note:
125 For the escape and unescape functions, chaining one with the other
126 (ex: "unescape(escape(str))") will result in a string that is
127 semantically equivalent to the original, but it is *not* necessarily
128 guaranteed to be exactly identical to the original string.
129 
130 For example:
131   string str;
132   str = `"\x41\t"`;        // 0x41 is ASCII and UTF-8 for A
133   str = unescapeDDQS(str); // == `A	` (That's an actual tab character)
134   str = escapeDDQS(str);   // == `"A\t"c`
135 
136   Note that "\x41\t" and "A\t"c are equivalent, but not identical.
137 +/
138 T escape(T)(T str, EscapeSequence type) if(isSomeString!T)
139 {
140 	//mixin(ensureCharType!("T"));
141 
142 	T ret;
143 	
144 	switch(type)
145 	{
146 	case EscapeSequence.DDQS:
147 		ret = escapeDDQS(str);
148 		break;
149 		
150 	case EscapeSequence.HTML:
151 		ret = escapeHTML(str);
152 		break;
153 		
154 	default:
155 		throw new Exception("Unsupported EscapeSequence");
156 	}
157 	
158 	return ret;
159 }
160 
161 T unescape(T)(T str, EscapeSequence type) if(isSomeString!T)
162 {
163 	//mixin(ensureCharType!("T"));
164 
165 	T ret;
166 	
167 	switch(type)
168 	{
169 	case EscapeSequence.DDQS:
170 		ret = unescapeDDQS(str);
171 		break;
172 		
173 	case EscapeSequence.HTML:
174 		ret = unescapeHTML(str);
175 		break;
176 		
177 	default:
178 		throw new Exception("Unsupported EscapeSequence");
179 	}
180 	
181 	return ret;
182 }
183 
184 T unescapeChar(T)(T str, T escapeSequence) if(isSomeString!T)
185 {
186 	//mixin(ensureCharType!("T"));
187 
188 	T ret = str.dup;
189 	ret = substitute(ret, escapeSequence, escapeSequence[$-1..$]);
190 	return ret;
191 }
192 
193 /// Warning: This doesn't unescape all escape sequences yet.
194 T unescapeDDQS(T)(T str) if(isSomeString!T)
195 {
196 	//mixin(ensureCharType!("T"));
197 	enum errStr = "str doesn't contain a valid D Double Quote String";
198 
199 	if(str.length < 2)
200 		throw new Exception(errStr);
201 		
202 	T ret = str;//.dup;
203 	
204 	//TODO: Do this better
205 	ret = ctfe_substitute!(T)(ret, `\\`, `\`);
206 	ret = ctfe_substitute!(T)(ret, `\"`, `"`);
207 	ret = ctfe_substitute!(T)(ret, `\'`, `'`);
208 
209 	ret = ctfe_substitute!(T)(ret, `\r`, "\r");
210 	ret = ctfe_substitute!(T)(ret, `\n`, "\n");
211 	ret = ctfe_substitute!(T)(ret, `\t`, "\t");
212 
213 	ret = ctfe_substitute!(T)(ret, `\?`, "\?");
214 	ret = ctfe_substitute!(T)(ret, `\a`, "\a");
215 	ret = ctfe_substitute!(T)(ret, `\b`, "\b");
216 	ret = ctfe_substitute!(T)(ret, `\f`, "\f");
217 	ret = ctfe_substitute!(T)(ret, `\v`, "\v");
218 	//TODO: All the others
219 
220 	if(ret[0..1] != `"`)
221 		throw new Exception(errStr);
222 	
223 	auto last = ret[$-1..$];
224 	auto secondLast = ret[$-2..$-1];
225 	
226 	if(last != `"`)
227 	{
228 		if(secondLast != `"`)
229 			throw new Exception(errStr);
230 		else if(secondLast != "c" && secondLast != "w" && secondLast != "d")
231 			throw new Exception(errStr);
232 		else
233 			return ret[1..$-2];
234 	}
235 	
236 	return ret[1..$-1];
237 }
238 
239 T escapeDDQS(T)(T str) if(isSomeString!T)
240 {
241 	T ret = str;
242 	
243 	ret = ctfe_substitute!(T)(ret, `\`, `\\`);
244 	ret = ctfe_substitute!(T)(ret, `"`, `\"`);
245 	ret = ctfe_substitute!(T)(ret, "\r", `\r`); // To prevent accidential conversions to platform-specific EOL
246 	ret = ctfe_substitute!(T)(ret, "\n", `\n`); // To prevent accidential conversions to platform-specific EOL
247 	ret = ctfe_substitute!(T)(ret, "\t", `\t`); // To prevent possible problems with automatic tab->space conversion
248 	// The rest don't need to be escaped
249 	
250 	return `"`~ret~`"`;
251 }
252 
253 /+
254 enum doubleQuoteTestStr = `"They said \"10 \\ 5 = 2\""`;
255 
256 pragma(msg, "orig:        "~doubleQuoteTestStr);
257 pragma(msg, "unesc:       "~unescapeDDQS(doubleQuoteTestStr));
258 pragma(msg, "esc:         "~escapeDDQS(doubleQuoteTestStr));
259 pragma(msg, "esc(unesc):  "~escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
260 pragma(msg, "unesc(esc):  "~unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
261 
262 pragma(msg, "unesc:       "~unescape(doubleQuoteTestStr, EscapeSequence.DDQS));
263 pragma(msg, "unesc:       "~doubleQuoteTestStr.unescape(EscapeSequence.DDQS));
264 
265 mixin(unittestSemiTwistDLib("Outputting some things", q{
266 	enum wstring ctEscW = escapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
267 	enum dstring ctEscD = escapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
268 	enum wstring ctUnescW = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
269 	enum dstring ctUnescD = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
270 	writefln("%s%s", "ctEscW:      ", ctEscW);
271 	writefln("%s%s", "ctEscD:      ", ctEscD);
272 	writefln("%s%s", "ctUnescW:    ", ctUnescW);
273 	writefln("%s%s", "ctUnescD:    ", ctUnescD);
274 
275 	writefln("%s%s", "unesc wchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
276 	writefln("%s%s", "unesc dchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
277 	writefln("%s%s", "esc wchar:   ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
278 	writefln("%s%s", "esc dchar:   ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
279 //	writefln("%s%s", "int:         ", unescapeDDQS([cast(int)1,2,3]));
280 
281 	writefln("%s%s", "orig:        ", doubleQuoteTestStr);
282 	writefln("%s%s", "unesc:       ", unescapeDDQS(doubleQuoteTestStr));
283 	writefln("%s%s", "esc:         ", escapeDDQS(doubleQuoteTestStr));
284 	writefln("%s%s", "esc(unesc):  ", escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
285 	writefln("%s%s", "unesc(esc):  ", unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
286 }));
287 +/
288 
289 /// Warning: This doesn't unescape all escape sequences yet.
290 T unescapeHTML(T)(T str) if(isSomeString!T)
291 {
292 	auto ret = str;
293 	
294 	ret = ctfe_substitute!(T)(ret, "&lt;",  "<");
295 	ret = ctfe_substitute!(T)(ret, "&gt;",  ">");
296 	ret = ctfe_substitute!(T)(ret, "&amp;", "&");
297 	
298 	return ret;
299 }
300 
301 T escapeHTML(T)(T str) if(isSomeString!T)
302 {
303 	auto ret = str;
304 	
305 	ret = ctfe_substitute!(T)(ret, "&", "&amp;");
306 	ret = ctfe_substitute!(T)(ret, "<", "&lt;");
307 	ret = ctfe_substitute!(T)(ret, ">", "&gt;");
308 	
309 	return ret;
310 }
311 
312 /// Like std.string.indexOf, but with an optional 'start' parameter,
313 /// and returns s.length when not found (instead of -1).
314 //TODO*: Unittest these
315 size_t locate(Char)(in Char[] s, dchar c, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
316 {
317 	auto index = std..string.indexOf(s[start..$], c, cs);
318 	return (index == -1)? s.length : index + start;
319 }
320 
321 /// ditto
322 size_t locatePrior(Char)(in Char[] s, dchar c, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
323 {
324 	if(start > s.length)
325 		start = s.length;
326 		
327 	auto index = std..string.lastIndexOf(s[0..start], c, cs);
328 	return (index == -1)? s.length : index;
329 }
330 
331 /// ditto
332 size_t locate(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
333 {
334 	auto index = std..string.indexOf(s[start..$], sub, cs);
335 	return (index == -1)? s.length : index + start;
336 }
337 
338 /// ditto
339 size_t locatePrior(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
340 {
341 	if(start > s.length)
342 		start = s.length;
343 		
344 	auto index = std..string.lastIndexOf(s[0..start], sub, cs);
345 	return (index == -1)? s.length : index;
346 }
347 
348 /// Suggested usage:
349 ///   "Hello %s!".formatln("World");
350 string formatln(T...)(T args)
351 {
352 	return format(args)~"\n";
353 }
354 
355 //TODO*: Fix stripNonPrintable
356 T stripNonPrintable(T)(T str) if(isSomeString!T)
357 {
358 	//T ret = str.dup;
359 	//auto numRemaining = ret.removeIf( (T c){return !isPrintable(c);} );
360 	//return ret[0..numRemaining];
361 	return str;
362 }
363 
364 /// Return value is number of code units
365 size_t nextCodePointSize(T)(T str) if(is(T==string) || is(T==wstring))
366 {
367 	size_t ret;
368 	str.decode(ret);
369 	return ret;
370 }
371 
372 /// Indents every line with indentStr
373 T indent(T)(T str, T indentStr="\t") if(isSomeString!T)
374 {
375 	if(str == "")
376 		return indentStr;
377 		
378 	return
379 		indentStr ~
380 		str[0..$-1].replace("\n", "\n"~indentStr) ~
381 		str[$-1];
382 }
383 
384 /// ditto
385 T[] indent(T)(T[] lines, T indentStr="\t") if(isSomeString!T)
386 {
387 	// foreach(ref) doesn't work right at compile time: DMD Issue #3835
388 	foreach(i, line; lines)
389 		lines[i] = indentStr ~ line;
390 		
391 	return lines;
392 }
393 
394 /// Unindents the lines of text as much as possible while preserving
395 /// all relative indentation.
396 ///
397 /// Inconsistent indentation (on lines that contain non-whitespace) is an error
398 /// and throws an exception at runtime, or asserts when executed at compile-time.
399 T unindent(T)(T str) if(isSomeString!T)
400 {
401 	if(str == "")
402 		return "";
403 		
404 	T[] lines;
405 	if(__ctfe)
406 		lines = str.ctfe_split("\n");
407 	else
408 		lines = str.split("\n");
409 	
410 	lines = unindentImpl(lines, str);
411 	
412 	if(__ctfe)
413 		return lines.ctfe_join("\n");
414 	else
415 		return lines.join("\n");
416 }
417 
418 /// ditto
419 T[] unindent(T)(T[] lines) if(isSomeString!T)
420 {
421 	return unindentImpl(lines);
422 }
423 
424 private T[] unindentImpl(T)(T[] lines, T origStr=null) if(isSomeString!T)
425 {
426 	if(lines == [])
427 		return [];
428 		
429 	bool isNonWhite(dchar ch)
430 	{
431 		if(__ctfe)
432 			return !ctfe_iswhite(ch);
433 		else
434 			return !isWhite(ch);
435 	}
436 	T leadingWhiteOf(T str)
437 		{ return str[ 0 .. $-find!(isNonWhite)(str).length ]; }
438 	
439 	// Apply leadingWhiteOf, but emit null instead for whitespace-only lines
440 	T[] indents;
441 	if(__ctfe)
442 		indents = semitwist.util.functional.map( lines,
443 			(T str){ return str.ctfe_strip()==""? null : leadingWhiteOf(str);}
444 		);
445 	else
446 	{
447 		string mapPredicate(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
448 		indents = array( std.algorithm.map!(
449 			mapPredicate//(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
450 			)(lines) );
451 	}
452 
453 	T shorterAndNonNull(T a, T b) {
454 		if(a is null) return b;
455 		if(b is null) return a;
456 		
457 		return (a.length < b.length)? a : b;
458 	}
459 	auto shortestIndent = std.algorithm.reduce!(shorterAndNonNull)(indents);
460 	
461 	if(shortestIndent is null || shortestIndent == "")
462 	{
463 		if(origStr == null)
464 			return stripLinesLeft(lines);
465 		else
466 			return [origStr.stripLeft()];
467 	}
468 		
469 	foreach(i; 0..lines.length)
470 	{
471 		if(indents[i] is null)
472 			lines[i] = "";
473 		else if(indents.startsWith(shortestIndent))
474 			lines[i] = lines[i][shortestIndent.length..$];
475 		else
476 		{
477 			if(__ctfe)
478 				assert(false, "Inconsistent indentation");
479 			else
480 				throw new Exception("Inconsistent indentation");
481 		}
482 	}
483 	
484 	return lines;
485 }
486 
487 T stripLinesTop(T)(T str) if(isSomeString!T)
488 {
489 	return stripLinesBox_StrImpl!(T, true, false, false, false)(str);
490 }
491 T stripLinesBottom(T)(T str) if(isSomeString!T)
492 {
493 	return stripLinesBox_StrImpl!(T, false, true, false, false)(str);
494 }
495 T stripLinesTopBottom(T)(T str) if(isSomeString!T)
496 {
497 	return stripLinesBox_StrImpl!(T, true, true, false, false)(str);
498 }
499 
500 T stripLinesLeft(T)(T str) if(isSomeString!T)
501 {
502 	return stripLinesBox_StrImpl!(T, false, false, true, false)(str);
503 }
504 T stripLinesRight(T)(T str) if(isSomeString!T)
505 {
506 	return stripLinesBox_StrImpl!(T, false, false, false, true)(str);
507 }
508 T stripLinesLeftRight(T)(T str) if(isSomeString!T)
509 {
510 	return stripLinesBox_StrImpl!(T, false, false, true, true)(str);
511 }
512 
513 T stripLinesBox(T)(T str) if(isSomeString!T)
514 {
515 	return stripLinesBox_StrImpl!(T, true, true, true, true)(str);
516 }
517 
518 private T stripLinesBox_StrImpl
519 	(T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
520 	(T str)
521 	if(isSomeString!T)
522 {
523 	if(str == "")
524 		return "";
525 		
526 	T[] lines;
527 	if(__ctfe)
528 		lines = str.ctfe_split("\n");
529 	else
530 		lines = str.split("\n");
531 
532 	lines = stripLinesBox_LineImpl!(T, stripTop, stripBottom, stripLeft, stripRight)(lines);
533 	
534 	if(__ctfe)
535 		return lines.ctfe_join("\n");
536 	else
537 		return lines.join("\n");
538 }
539 
540 private T[] stripLinesBox_LineImpl
541 	(T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
542 	(T[] lines)
543 	if(isSomeString!T)
544 {
545 	static if(stripTop)    lines = stripLinesTop(lines);
546 	static if(stripBottom) lines = stripLinesBottom(lines);
547 	
548 	static if(stripLeft && stripRight)
549 	{
550 		lines = stripLinesLeftRight(lines);
551 	}
552 	else
553 	{
554 		static if(stripLeft)  lines = stripLinesLeft(lines);
555 		static if(stripRight) lines = stripLinesRight(lines);
556 	}
557 	
558 	return lines;
559 }
560 
561 T[] stripLinesBox(T)(T[] str) if(isSomeString!T)
562 {
563 	return stripLinesBox_LineImpl!(T, true, true, true, true)(str);
564 }
565 
566 T[] stripLinesTop(T)(T[] lines) if(isSomeString!T)
567 {
568 	auto firstLine = lines.length-1;
569 
570 	foreach(i, line; lines)
571 	if(line.ctfe_strip() != "")
572 	{
573 		firstLine = i;
574 		break;
575 	}
576 
577 	return lines[firstLine..$];
578 }
579 
580 T[] stripLinesBottom(T)(T[] lines) if(isSomeString!T)
581 {
582 	size_t lastLine = 0;
583 
584 	foreach_reverse(i, line; lines)
585 	if(line.ctfe_strip() != "")
586 	{
587 		lastLine = i;
588 		break;
589 	}
590 
591 	return lines[0..lastLine+1];
592 }
593 
594 T[] stripLinesTopBottom(T)(T[] lines) if(isSomeString!T)
595 {
596 	lines = stripLinesTop(lines);
597 	lines = stripLinesBottom(lines);
598 	return lines;
599 }
600 
601 T[] stripLinesLeft(T)(T[] lines) if(isSomeString!T)
602 {
603 	// foreach(ref) doesn't work right at compile time: DMD Issue #3835
604 	foreach(i, line; lines)
605 		lines[i] = line.ctfe_stripl();
606 		
607 	return lines;
608 }
609 
610 T[] stripLinesRight(T)(T[] lines) if(isSomeString!T)
611 {
612 	// foreach(ref) doesn't work right at compile time: DMD Issue #3835
613 	foreach(i, line; lines)
614 		lines[i] = line.ctfe_stripr();
615 		
616 	return lines;
617 }
618 
619 T[] stripLinesLeftRight(T)(T[] lines) if(isSomeString!T)
620 {
621 	// foreach(ref) doesn't work right at compile time: DMD Issue #3835
622 	foreach(i, line; lines)
623 		lines[i] = line.ctfe_strip();
624 	
625 	return lines;
626 }
627 
628 //TODO*: Unittest this
629 bool contains(T1,T2)(T1 haystack, T2 needle)
630 {
631 	return std.algorithm.find(haystack, needle) != [];
632 }
633 
634 /++
635 Unindents, strips whitespace-only lines from top and bottom,
636 and strips trailing whitespace from eash line.
637 (Also converts Windows "\r\n" line endings to Unix "\n" line endings.)
638 
639 See also the documentation for unindent().
640 
641 Good for making easily-readable multi-line string literals without
642 leaving extra indents and whitespace in the resulting string:
643 
644 Do this:
645 --------------------
646 void foo()
647 {
648 	enum codeStr = q{
649 		// Written in the D Programming Langauge
650 		// by John Doe
651 
652 		int main()
653 		{
654 			return 0;
655 		}
656 	}.normalize();
657 }
658 --------------------
659 
660 Instead of this:
661 --------------------
662 void foo()
663 {
664 	enum codeStr = 
665 q{// Written in the D Programming Langauge
666 // by John Doe
667 
668 int main()
669 {
670 	return 0;
671 }};
672 }
673 --------------------
674 
675 The resulting string is exactly the same.
676 +/
677 T normalize(T)(T str) if(isSomeString!T)
678 {
679 	if(str == "")
680 		return "";
681 		
682 	T[] lines;
683 	if(__ctfe)
684 		lines = str.ctfe_split("\n");
685 	else
686 		lines = str.split("\n");
687 
688 	lines = normalize(lines);
689 	
690 	if(__ctfe)
691 		return lines.ctfe_join("\n");
692 	else
693 		return lines.join("\n");
694 }
695 
696 /// ditto
697 T[] normalize(T)(T[] lines) if(isSomeString!T)
698 {
699 	lines = stripLinesTopBottom(lines);
700 	lines = unindent(lines);
701 	lines = stripLinesRight(lines);
702 	return lines;
703 }
704 
705 alias md5 = std.digest.md.md5Of;
706 /+string md5(string data)
707 {
708 	MD5_CTX context;
709 	context.start();
710 	context.update(data);
711 	ubyte digest[16];
712 	context.finish(digest);
713 	
714 	return digestToString(digest);
715 }+/
716 
717 immutable(ubyte)[] bomCodeOf(BOM bom)
718 {
719 	final switch(bom)
720 	{
721 	case BOM.UTF8:    return cast(immutable(ubyte)[])x"EF BB BF";
722 	case BOM.UTF16LE: return cast(immutable(ubyte)[])x"FF FE";
723 	case BOM.UTF16BE: return cast(immutable(ubyte)[])x"FE FF";
724 	case BOM.UTF32LE: return cast(immutable(ubyte)[])x"FF FE 00 00";
725 	case BOM.UTF32BE: return cast(immutable(ubyte)[])x"00 00 FE FF";
726 	}
727 }
728 
729 BOM bomOf(const(ubyte)[] str)
730 {
731 	if(str.startsWith(bomCodeOf(BOM.UTF8   ))) return BOM.UTF8;
732 	if(str.startsWith(bomCodeOf(BOM.UTF16LE))) return BOM.UTF16LE;
733 	if(str.startsWith(bomCodeOf(BOM.UTF16BE))) return BOM.UTF16BE;
734 	if(str.startsWith(bomCodeOf(BOM.UTF32LE))) return BOM.UTF32LE;
735 	if(str.startsWith(bomCodeOf(BOM.UTF32BE))) return BOM.UTF32BE;
736 	
737 	return BOM.UTF8;
738 }
739 
740 version(LittleEndian)
741 {
742 	enum BOM native16BitBOM    = BOM.UTF16LE;
743 	enum BOM native32BitBOM    = BOM.UTF32LE;
744 	enum BOM nonNative16BitBOM = BOM.UTF16BE;
745 	enum BOM nonNative32BitBOM = BOM.UTF32BE;
746 }
747 else
748 {
749 	enum BOM native16BitBOM    = BOM.UTF16BE;
750 	enum BOM native32BitBOM    = BOM.UTF32BE;
751 	enum BOM nonNative16BitBOM = BOM.UTF16LE;
752 	enum BOM nonNative32BitBOM = BOM.UTF32LE;
753 }
754 
755 bool isNativeEndian(BOM bom)
756 {
757 	return bom == native16BitBOM || bom == native32BitBOM || bom == BOM.UTF8;
758 }
759 
760 bool isNonNativeEndian(BOM bom)
761 {
762 	return !isNativeEndian(bom);
763 }
764 
765 bool is8Bit(BOM bom)
766 {
767 	return bom == BOM.UTF8;
768 }
769 
770 bool is16Bit(BOM bom)
771 {
772 	return bom == native16BitBOM || bom == nonNative16BitBOM;
773 }
774 
775 bool is32Bit(BOM bom)
776 {
777 	return bom == native32BitBOM || bom == nonNative32BitBOM;
778 }
779 
780 Endian endianOf(BOM bom)
781 {
782 	// DMD 2.055 changed "LittleEndian" to "littleEndian", etc...
783 	static if(__traits(compiles, Endian.littleEndian))
784 	{
785 		final switch(bom)
786 		{
787 		case BOM.UTF8: return endian;
788 		case BOM.UTF16LE, BOM.UTF32LE: return Endian.littleEndian;
789 		case BOM.UTF16BE, BOM.UTF32BE: return Endian.bigEndian;
790 		}
791 	}
792 	else
793 	{
794 		final switch(bom)
795 		{
796 		case BOM.UTF8: return endian;
797 		case BOM.UTF16LE, BOM.UTF32LE: return Endian.LittleEndian;
798 		case BOM.UTF16BE, BOM.UTF32BE: return Endian.BigEndian;
799 		}
800 	}
801 }
802 
803 template isInsensitive(T)
804 {
805 	enum isInsensitive =
806 		is(T==InsensitiveT!string ) ||
807 		is(T==InsensitiveT!wstring) ||
808 		is(T==InsensitiveT!dstring);
809 }
810 static assert(isInsensitive!Insensitive);
811 static assert(isInsensitive!WInsensitive);
812 static assert(isInsensitive!DInsensitive);
813 static assert(!isInsensitive!string);
814 
815 struct InsensitiveT(T) if(isSomeString!T)
816 {
817 	private T str;
818 	private T foldingCase;
819 	
820 	this(T newStr)
821 	{
822 		str = newStr;
823 		updateFoldingCase();
824 	}
825 	
826 	T toString() const
827 	{
828 		return str;
829 	}
830 	
831 	private void updateFoldingCase()
832 	{
833 		// Phobos doesn't actually have a toFolding() right now
834 		foldingCase = toLower(str);
835 	}
836 	
837 	static if(useNoThrowSafeToHash)
838 	{
839 		const nothrow @trusted hash_t toHash() const
840 		{
841 			return typeid(string).getHash(&foldingCase);
842 		}
843 	}
844 	else
845 	{
846 		const hash_t toHash()
847 		{
848 			return typeid(string).getHash(&foldingCase);
849 		}
850 	}
851 	
852 	void opAssign(T2)(T2 b) if(isInsensitive!T2 || isSomeString!T2)
853 	{
854 		static if(is(isInsensitive!T == T2))
855 		{
856 			str = b.str;
857 			foldingCase = newStr.foldingCase;
858 		}
859 		else static if(isInsensitive!T2)
860 		{
861 			str = to!T(b.str);
862 			updateFoldingCase();
863 		}
864 		else
865 		{
866 			str = b;
867 			updateFoldingCase();
868 		}
869 	}
870 	
871 	InsensitiveT!T opBinary(string op)(const InsensitiveT!T b) if(op=="~")
872 	{
873 		return InsensitiveT!T(str ~ b.str);
874 	}
875 	
876 	InsensitiveT!T opOpAssign(string op)(const InsensitiveT!T b) if(op=="~")
877 	{
878 		str ~= b.str;
879 		foldingCase ~= b.foldingCase;
880 		return this;
881 	}
882 	
883 	//TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058
884 	static if(vendor != Vendor.digitalMars || version_minor >= 59)
885 	const bool opEquals(const InsensitiveT!T b) const
886 	{
887 		return opEquals(b);
888 	}
889 
890 	const bool opEquals(ref const InsensitiveT!T b) const
891 	{
892 		/+if (str is b.str) return true;
893 		if (str is null || b.str is null) return false;
894 		return foldingCase == b.foldingCase;+/
895 		return this.opCmp(b) == 0;
896 	}
897 	
898 	//TODO: Get rid of this "static if" (but not the func) after dropping support for DMD 2.058
899 	static if(vendor != Vendor.digitalMars || version_minor >= 59)
900 	const int opCmp(const InsensitiveT!T b) const
901 	{
902 		return opCmp(b);
903 	}
904 
905 	const int opCmp(ref const InsensitiveT!T b) const
906 	{
907 		if(str   is null && b.str is null) return 0;
908 		if(str   == b.str) return 0;
909 		if(str   is null ) return -1;
910 		if(b.str is null ) return 1;
911 		return std..string.cmp(foldingCase, b.foldingCase);
912 	}
913 	
914     InsensitiveT!T opSlice()
915 	{
916 		return this;
917 	}
918 
919     auto opSlice(size_t x)
920 	{
921 		return str[x];
922 	}
923 
924     InsensitiveT!T opSlice(size_t x, size_t y)
925 	{
926 		return InsensitiveT!T(str[x..y]);
927 	}
928 }
929 
930 alias InsensitiveT!string  Insensitive;
931 alias InsensitiveT!wstring WInsensitive;
932 alias InsensitiveT!dstring DInsensitive;
933 
934 //TODO: This is quick-n-dirty, do it more efficiently. (Or just
935 //      replace with std.digest.toHexString in DMD 2.061)
936 string toHexString(ubyte[] arr)
937 {
938 	if(arr.length == 0)
939 		return "";
940 
941 	string str;
942 	while(true)
943 	{
944 		str ~= format("%.2X", arr[0]);
945 		
946 		if(arr.length <= 1)
947 			break;
948 
949 		arr = arr[1..$];
950 	}
951 	
952 	return str;
953 }
954 
955 mixin(unittestSemiTwistDLib(q{
956 
957 	// Insensitive
958 	mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("Test") }));
959 	mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("TEST") }));
960 	mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive("ABCD") }));
961 	mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive(null)   }));
962 	mixin(deferAssert!(q{ Insensitive(null)   == Insensitive(null)   }));
963 	mixin(deferAssert!(q{ Insensitive("Test") == Insensitive("TEST") }));
964 	mixin(deferAssert!(q{ Insensitive("ABCD") != Insensitive("TEST") }));
965 	mixin(deferAssert!(q{ Insensitive(null)   != Insensitive("TEST") }));
966 
967 	mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("ES") }));
968 	mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("es") }));
969 	mixin(deferAssert!(q{ Insensitive("TEST")[1..3] != Insensitive("AB") }));
970 
971 	mixin(deferAssert!(q{ Insensitive("TE")~Insensitive("ST") == Insensitive("TesT") }));
972 	
973 	Insensitive ins;
974 	ins = Insensitive("TEST");
975 	ins = "ab";
976 	ins ~= Insensitive("cd");
977 
978 	mixin(deferAssert!(q{ ins == Insensitive("AbcD") }));
979 	
980 	int[Insensitive] ins_aa = [Insensitive("ABC"):1, Insensitive("DEF"):2, Insensitive("Xyz"):3];
981 	mixin(deferAssert!(q{ Insensitive("ABC") in ins_aa }));
982 	mixin(deferAssert!(q{ Insensitive("DEF") in ins_aa }));
983 	mixin(deferAssert!(q{ Insensitive("Xyz") in ins_aa }));
984 	mixin(deferAssert!(q{ Insensitive("aBc") in ins_aa }));
985 	mixin(deferAssert!(q{ Insensitive("dEf") in ins_aa }));
986 	mixin(deferAssert!(q{ Insensitive("xYZ") in ins_aa }));
987 	mixin(deferAssert!(q{ Insensitive("HI") !in ins_aa }));
988 	
989 	mixin(deferAssert!(q{ ins_aa[Insensitive("aBc")] == 1 }));
990 	mixin(deferAssert!(q{ ins_aa[Insensitive("dEf")] == 2 }));
991 	mixin(deferAssert!(q{ ins_aa[Insensitive("xYZ")] == 3 }));
992 
993 	// escapeDDQS, unescapeDDQS
994 	mixin(deferEnsure!(q{ `hello`.escapeDDQS()     }, q{ _ == `"hello"` }));
995 	mixin(deferEnsure!(q{ `"hello"`.unescapeDDQS() }, q{ _ == "hello"   }));
996 	mixin(deferEnsure!(q{ `"I"`.unescapeDDQS()     }, q{ _ == "I"       }));
997 	
998 	mixin(deferEnsure!(q{ (`And...`~"\n"~`sam\nick said "Hi!".`).escapeDDQS()  }, q{ _ == `"And...\nsam\\nick said \"Hi!\"."`  }));
999 	//TODO: Make this one pass
1000 	//mixin(deferEnsure!(q{ `"And...\nsam\\nick said \"Hi!\"."`.unescapeDDQS() }, q{ _ == `And...`~"\n"~`sam\nick said "Hi!".` }));
1001 	mixin(deferEnsureThrows!(q{ "hello".unescapeDDQS(); }, Exception));
1002 
1003 	// indent
1004 	mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("  ") }, q{ _ == "  A\n  \tB\n  \n  C" }));
1005 	mixin(deferEnsure!(q{ "A\nB\n".indent("\t")      }, q{ _ == "\tA\n\tB\n"          }));
1006 	mixin(deferEnsure!(q{ "".indent("\t")            }, q{ _ == "\t"                  }));
1007 	mixin(deferEnsure!(q{ "A".indent("\t")           }, q{ _ == "\tA"                 }));
1008 	mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("")   }, q{ _ == "A\n\tB\n\nC"         }));
1009 
1010 	// unindent
1011 	mixin(deferEnsure!(q{ " \t A\n \t \tB\n \t C\n  \t\n \t D".unindent() }, q{ _ == "A\n\tB\nC\n\nD" }));
1012 	mixin(deferEnsure!(q{ " D\n".unindent()    }, q{ _ == "D\n" }));
1013 	mixin(deferEnsure!(q{ " D\n ".unindent()   }, q{ _ == "D\n" }));
1014 	mixin(deferEnsure!(q{ "D".unindent()       }, q{ _ == "D"   }));
1015 	mixin(deferEnsure!(q{ "".unindent()        }, q{ _ == ""    }));
1016 	mixin(deferEnsure!(q{ " ".unindent()       }, q{ _ == ""    }));
1017 	mixin(deferEnsureThrows!(q{ " \tA\n\t B".unindent(); }, Exception));
1018 	mixin(deferEnsureThrows!(q{ "  a\n \tb".unindent();    }, Exception));
1019 
1020 	// unindent at compile-time
1021 	enum ctfe_unindent_dummy1 = " \t A\n \t \tB\n \t C\n  \t\n \t D".unindent();
1022 	enum ctfe_unindent_dummy2 = " D".unindent();
1023 	enum ctfe_unindent_dummy3 = " D\n".unindent();
1024 	enum ctfe_unindent_dummy4 = "".unindent();
1025 
1026 	mixin(deferEnsure!(q{ ctfe_unindent_dummy1 }, q{ _ == "A\n\tB\nC\n\nD" }));
1027 	mixin(deferEnsure!(q{ ctfe_unindent_dummy2 }, q{ _ == "D"   }));
1028 	mixin(deferEnsure!(q{ ctfe_unindent_dummy3 }, q{ _ == "D\n" }));
1029 	mixin(deferEnsure!(q{ ctfe_unindent_dummy4 }, q{ _ == ""    }));
1030 	
1031 	//enum ctfe_unindent_dummy5 = "  a\n \tb".unindent(); // Should fail to compile
1032 	
1033 	// contains
1034 	mixin(deferEnsure!(q{ contains("abcde", 'a') }, q{ _==true  }));
1035 	mixin(deferEnsure!(q{ contains("abcde", 'c') }, q{ _==true  }));
1036 	mixin(deferEnsure!(q{ contains("abcde", 'e') }, q{ _==true  }));
1037 	mixin(deferEnsure!(q{ contains("abcde", 'x') }, q{ _==false }));
1038 
1039 	// stripLines: Top and Bottom
1040 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTop()       }, q{ _ == " ABC \n \n DEF \n \t \n\t \n" }));
1041 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBottom()    }, q{ _ == " \t \n\t \n ABC \n \n DEF "   }));
1042 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTopBottom() }, q{ _ == " ABC \n \n DEF "              }));
1043 
1044 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTop()       }, q{ _ == "ABC\n " }));
1045 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBottom()    }, q{ _ == "\nABC"  }));
1046 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTopBottom() }, q{ _ == "ABC"    }));
1047 
1048 	mixin(deferEnsure!(q{ "\n".stripLinesTop()       }, q{ _ == "" }));
1049 	mixin(deferEnsure!(q{ "\n".stripLinesBottom()    }, q{ _ == "" }));
1050 	mixin(deferEnsure!(q{ "\n".stripLinesTopBottom() }, q{ _ == "" }));
1051 
1052 	mixin(deferEnsure!(q{ "ABC".stripLinesTopBottom()      }, q{ _ == "ABC" }));
1053 	mixin(deferEnsure!(q{ "".stripLinesTopBottom()         }, q{ _ == ""    }));
1054 
1055 	// stripLines: Left and Right
1056 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeft()      }, q{ _ == "\n\nABC \n\nDEF \n\n\n" }));
1057 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesRight()     }, q{ _ == "\n\n ABC\n\n DEF\n\n\n" }));
1058 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight() }, q{ _ == "\n\nABC\n\nDEF\n\n\n"   }));
1059 
1060 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeft()      }, q{ _ == "\nABC\n" }));
1061 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesRight()     }, q{ _ == "\nABC\n" }));
1062 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeftRight() }, q{ _ == "\nABC\n" }));
1063 
1064 	mixin(deferEnsure!(q{ "\n".stripLinesLeft()      }, q{ _ == "\n" }));
1065 	mixin(deferEnsure!(q{ "\n".stripLinesRight()     }, q{ _ == "\n" }));
1066 	mixin(deferEnsure!(q{ "\n".stripLinesLeftRight() }, q{ _ == "\n" }));
1067 
1068 	mixin(deferEnsure!(q{ "ABC".stripLinesLeftRight() }, q{ _ == "ABC" }));
1069 	mixin(deferEnsure!(q{ "".stripLinesLeftRight()    }, q{ _ == ""    }));
1070 
1071 	// stripLinesBox
1072 	mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox() }, q{ _ == "ABC\n\nDEF" }));
1073 	mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBox() }, q{ _ == "ABC" }));
1074 	mixin(deferEnsure!(q{ "\n".stripLinesBox()       }, q{ _ == ""    }));
1075 	mixin(deferEnsure!(q{ "ABC".stripLinesBox()      }, q{ _ == "ABC" }));
1076 	mixin(deferEnsure!(q{ "".stripLinesBox()         }, q{ _ == ""    }));
1077 	
1078 	// stripLines at compile-time
1079 	enum ctfe_stripLinesBox_dummy1 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox();
1080 	enum ctfe_stripLinesBox_dummy2 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight();
1081 	enum ctfe_stripLinesBox_dummy3 = "".stripLinesBox();
1082 
1083 	mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy1 }, q{ _ == "ABC\n\nDEF" }));
1084 	mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy2 }, q{ _ == "\n\nABC\n\nDEF\n\n\n" }));
1085 	mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy3 }, q{ _ == "" }));
1086 
1087 	// normalize
1088 	mixin(deferEnsure!(q{
1089 				q{
1090 			// test 
1091 			void foo() {  
1092 				int x = 2;
1093 			}
1094 	}.normalize()
1095 	}, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1096 
1097 	enum ctfe_normalize_dummy1 = q{
1098 			// test 
1099 			void foo() {  
1100 				int x = 2;
1101 			}
1102 	}.normalize();
1103 	mixin(deferEnsure!(q{ ctfe_normalize_dummy1 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1104 	
1105 	// toHexString
1106 	mixin(deferEnsure!(q{ toHexString([0x00, 0x12, 0x0A, 0xBC]) }, q{ _ == "00120ABC" } ));
1107 	mixin(deferEnsure!(q{ toHexString([0xF0])                   }, q{ _ == "F0"       } ));
1108 	mixin(deferEnsure!(q{ toHexString([])                       }, q{ _ == ""         } ));
1109 }));