1 // SemiTwist Library
2 // Written in the D programming language.
3 
4 module semitwist.util.io;
5 
6 import std.traits;
7 import std.path;
8 import std.conv;
9 import std.file;
10 import std.stdio;
11 import std.stream;
12 import std..string;
13 import std.system;
14 
15 import semitwist.util.all;
16 
17 version(Win32)
18 	import std.c.windows.windows;
19 else version(OSX)
20 	private extern(C) int _NSGetExecutablePath(char* buf, uint* bufsize);
21 else
22 	import std.c.linux.linux;
23 
24 /++
25 Reads any type of Unicode/UTF text file (UTF-8, UTF-16, UTF-32, big or little
26 endian), detects BOM, and automatically converts it to native endianness and
27 whatever string type is specified in TOut.
28 	
29 Examples:
30 	string  utf8  = readUTFFile!string ( "ANY_unicode_file.txt" );
31 	wstring utf16 = readUTFFile!wstring( "ANY_unicode_file.txt" );
32 	dstring utf32 = readUTFFile!dstring( "ANY_unicode_file.txt" );
33 +/
34 TOut readUTFFile(TOut, TFilename)(TFilename filename)
35 	if(isSomeString!TOut && isSomeString!TFilename)
36 {
37 	auto data = cast(immutable(ubyte)[])read(filename);
38 	return utfConvert!TOut(data);
39 }
40 
41 /++
42 Converts any type of Unicode/UTF string with or without a BOM (UTF-8, UTF-16,
43 UTF-32, big or little endian), strips the BOM (if it exists), and automatically
44 converts it to native endianness and whatever string type is specified in TOut.
45 
46 If there is no BOM, then UTF-8 is assumed.
47 	
48 Examples:
49 	string  utf8  = utfConvert!string ( anyUTFDataWithBOM );
50 	wstring utf16 = utfConvert!wstring( anyUTFDataWithBOM );
51 	dstring utf32 = utfConvert!dstring( anyUTFDataWithBOM );
52 +/
53 TOut utfConvert(TOut, TInChar)(immutable(TInChar)[] data)
54 	if( isSomeString!TOut && (isSomeString!(immutable(TInChar)[]) || is(TInChar==ubyte)) )
55 {
56 	auto bom = bomOf(cast(immutable(ubyte)[])data);
57 	auto bomCode = bomCodeOf(bom);
58 	
59 	// Strip BOM if it exists
60 	if(data.length >= bomCode.length && data[0..bomCode.length] == bomCode)
61 		data = data[bomCode.length..$];
62 	
63 	if(isNonNativeEndian(bom))
64 	{
65 		auto tempData = data.dup;
66 		if(is16Bit(bom))
67 			byteSwap16(tempData);
68 		else if(is32Bit(bom))
69 			byteSwap32(tempData);
70 		
71 		return to!TOut(tempData);
72 	}
73 
74 	// No references to 'data' are maintained
75 	if(is8Bit(bom))
76 		return to!TOut(cast(string)data);
77 	else if(is16Bit(bom))
78 		return to!TOut(cast(wstring)data);
79 	else if(is32Bit(bom))
80 		return to!TOut(cast(dstring)data);
81 	else
82 		throw new Exception("Unhandled BOM type '%s'".format(bom));
83 }
84 
85 ushort byteSwapVal16(ushort value)
86 {
87 	return cast(ushort)( (value >> 8) | ((value & 0x00FF) << 8) );
88 }
89 
90 uint byteSwapVal32(uint value)
91 {
92 	return
93 		(value >> 24) |
94 		((value & 0x00FF_0000) >>  8) |
95 		((value & 0x0000_FF00) <<  8) |
96 		((value & 0x0000_00FF) << 24);
97 }
98 
99 private T byteSwap(T)(T value) if(is(T==ushort) || is(T==uint))
100 {
101 	static if(is(T==ushort))
102 		return byteSwapVal16(value);
103 	else static if(is(T==uint))
104 		return byteSwapVal32(value);
105 	else
106 		static assert(0, "T=='"+T.stringof+"' not handled");
107 }
108 
109 void byteSwapInPlace(T)(T[] data) if(is(T==ushort) || is(T==uint))
110 {
111 	foreach(ref value; data)
112 		value = byteSwap(value);
113 }
114 
115 private immutable(T)[] byteSwap(T)(immutable(T)[] data) if(is(T==ushort) || is(T==uint))
116 {
117 	T[] mutableData = data.dup;
118 	byteSwapInPlace(mutableData);
119 	
120 	// Neither this nor byteSwapInPlace squirrels away a copy
121 	return cast(immutable(T)[])mutableData;
122 }
123 
124 immutable(T)[] byteSwap16(T)(const(T)[] data)
125 {
126 	return cast(immutable(T)[])byteSwap(cast(immutable(ushort)[])data);
127 }
128 
129 immutable(T)[] byteSwap32(T)(const(T)[] data)
130 {
131 	return cast(immutable(T)[])byteSwap(cast(immutable(uint)[])data);
132 }
133 
134 T readStringz(T)(std.stream.File reader) if(isSomeString!T)
135 {
136 	Unqual!T str;
137 	static if(is(T==string))
138 		alias char TElem;
139 	else static if(is(T==wstring))
140 		alias wchar TElem;
141 	else static if(is(T==dstring))
142 		alias dchar TElem;
143 	else
144 		static assert("'"~T.stringof~"' not allowed.");
145 		
146 	TElem c;
147 	
148 	do
149 	{
150 		reader.read(c);
151 		str ~= c;
152 	} while(c != 0);
153 
154 	// No references saved, nothing can change it.
155 	return cast(T)(str[0..$-1]);
156 }
157 
158 //TODO*: Unittest this
159 // This assumes that data is already in native endianness
160 T toEndian(T)(T data, Endian en) if(is(T==ushort) || is(T==uint))
161 {
162 	if(en == endian)
163 		return data;
164 	else
165 		return byteSwap(data);
166 }
167 
168 /// Gets the full path to the currently running executable,
169 /// regardless of working directory or PATH env var or anything else.
170 /// Note that this is far more accurate and reliable than using args[0].
171 /+FilePath getExecFilePath()
172 {
173 	string file = new char[4*1024];
174 	int filenameLength;
175 	version (Win32)
176 		filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1);
177 	else version(OSX)
178 	{
179 		filenameLength = file.length-1;
180 		_NSGetExecutablePath(file.ptr, &filenameLength);
181 	}
182 	else
183         filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1);
184 
185 	auto fp = new FilePath(file[0..filenameLength]);
186 	fp.native();
187 	return fp;
188 }+/
189 /// ditto
190 string getExec()
191 {
192 	auto file = new char[4*1024];
193 	size_t filenameLength;
194 	version (Win32)
195 		filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1);
196 	else version(OSX)
197 	{
198 		filenameLength = file.length-1;
199 		_NSGetExecutablePath(file.ptr, &filenameLength);
200 	}
201 	else
202         filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1);
203 
204 	//auto fp = new FilePath(file[0..filenameLength]);
205 	return to!string(file[0..filenameLength]);
206 //	return getExecFilePath().toString().trim();
207 }
208 
209 /// Like getExec, but doesn't include the path.
210 string getExecName()
211 {
212 	return getExec().baseName();
213 //	return getExecFilePath().file().trim();
214 }
215 
216 /// Like getExec, but only returns the path (including trailing path separator).
217 string getExecPath()
218 {
219 	return getExec().dirName() ~ dirSep;
220 	//return getExecFilePath().path().trim();
221 }
222 
223 mixin(unittestSemiTwistDLib(q{
224 	// byteSwap
225 	mixin(deferEnsure!(q{ byteSwapVal16(0x1234     ) }, q{ _ == 0x3412      }));
226 	mixin(deferEnsure!(q{ byteSwapVal32(0x1234_5678) }, q{ _ == 0x7856_3412 }));
227 
228 	mixin(deferEnsure!(q{ byteSwap16(cast(immutable(ushort)[])[0x1234, 0x5678, 0x9ABC, 0xDEF0]) }, q{ _ == cast(ushort[])[0x3412, 0x7856, 0xBC9A, 0xF0DE] }));
229 	mixin(deferEnsure!(q{ byteSwap32(cast(immutable(uint)[]  )[0x1234____5678, 0x9ABC____DEF0]) }, q{ _ == cast(uint[]  )[0x7856_3412, 0xF0DE_BC9A]       }));
230 	
231 	// utfConvert
232 	mixin(deferEnsure!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")) }, q{ _== ("AB\nCD"~"\r"~"\nEF") }));
233 	mixin(deferEnsure!(q{ utfConvert!string ("ABCDEF") }, q{ _== ("ABCDEF" ) }));
234 	mixin(deferEnsure!(q{ utfConvert!dstring("ABCDEF") }, q{ _== ("ABCDEF"d) }));
235 	//TODO: Check into the weird disappearing \r:
236 	//mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").escapeDDQS() }));
237 	//mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").length }));
238 	//mixin(traceVal!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")).escapeDDQS() }));
239 }));