1 // SemiTwist Library 2 // Written in the D programming language. 3 4 module semitwist.util.io; 5 6 import std.traits; 7 import std.path; 8 import std.conv; 9 import std.file; 10 import std.stdio; 11 import std.stream; 12 import std..string; 13 import std.system; 14 15 import semitwist.util.all; 16 17 version(Win32) 18 import std.c.windows.windows; 19 else version(OSX) 20 private extern(C) int _NSGetExecutablePath(char* buf, uint* bufsize); 21 else 22 import std.c.linux.linux; 23 24 /++ 25 Reads any type of Unicode/UTF text file (UTF-8, UTF-16, UTF-32, big or little 26 endian), detects BOM, and automatically converts it to native endianness and 27 whatever string type is specified in TOut. 28 29 Examples: 30 string utf8 = readUTFFile!string ( "ANY_unicode_file.txt" ); 31 wstring utf16 = readUTFFile!wstring( "ANY_unicode_file.txt" ); 32 dstring utf32 = readUTFFile!dstring( "ANY_unicode_file.txt" ); 33 +/ 34 TOut readUTFFile(TOut, TFilename)(TFilename filename) 35 if(isSomeString!TOut && isSomeString!TFilename) 36 { 37 auto data = cast(immutable(ubyte)[])read(filename); 38 return utfConvert!TOut(data); 39 } 40 41 /++ 42 Converts any type of Unicode/UTF string with or without a BOM (UTF-8, UTF-16, 43 UTF-32, big or little endian), strips the BOM (if it exists), and automatically 44 converts it to native endianness and whatever string type is specified in TOut. 45 46 If there is no BOM, then UTF-8 is assumed. 47 48 Examples: 49 string utf8 = utfConvert!string ( anyUTFDataWithBOM ); 50 wstring utf16 = utfConvert!wstring( anyUTFDataWithBOM ); 51 dstring utf32 = utfConvert!dstring( anyUTFDataWithBOM ); 52 +/ 53 TOut utfConvert(TOut, TInChar)(immutable(TInChar)[] data) 54 if( isSomeString!TOut && (isSomeString!(immutable(TInChar)[]) || is(TInChar==ubyte)) ) 55 { 56 auto bom = bomOf(cast(immutable(ubyte)[])data); 57 auto bomCode = bomCodeOf(bom); 58 59 // Strip BOM if it exists 60 if(data.length >= bomCode.length && data[0..bomCode.length] == bomCode) 61 data = data[bomCode.length..$]; 62 63 if(isNonNativeEndian(bom)) 64 { 65 auto tempData = data.dup; 66 if(is16Bit(bom)) 67 byteSwap16(tempData); 68 else if(is32Bit(bom)) 69 byteSwap32(tempData); 70 71 return to!TOut(tempData); 72 } 73 74 // No references to 'data' are maintained 75 if(is8Bit(bom)) 76 return to!TOut(cast(string)data); 77 else if(is16Bit(bom)) 78 return to!TOut(cast(wstring)data); 79 else if(is32Bit(bom)) 80 return to!TOut(cast(dstring)data); 81 else 82 throw new Exception("Unhandled BOM type '%s'".format(bom)); 83 } 84 85 ushort byteSwapVal16(ushort value) 86 { 87 return cast(ushort)( (value >> 8) | ((value & 0x00FF) << 8) ); 88 } 89 90 uint byteSwapVal32(uint value) 91 { 92 return 93 (value >> 24) | 94 ((value & 0x00FF_0000) >> 8) | 95 ((value & 0x0000_FF00) << 8) | 96 ((value & 0x0000_00FF) << 24); 97 } 98 99 private T byteSwap(T)(T value) if(is(T==ushort) || is(T==uint)) 100 { 101 static if(is(T==ushort)) 102 return byteSwapVal16(value); 103 else static if(is(T==uint)) 104 return byteSwapVal32(value); 105 else 106 static assert(0, "T=='"+T.stringof+"' not handled"); 107 } 108 109 void byteSwapInPlace(T)(T[] data) if(is(T==ushort) || is(T==uint)) 110 { 111 foreach(ref value; data) 112 value = byteSwap(value); 113 } 114 115 private immutable(T)[] byteSwap(T)(immutable(T)[] data) if(is(T==ushort) || is(T==uint)) 116 { 117 T[] mutableData = data.dup; 118 byteSwapInPlace(mutableData); 119 120 // Neither this nor byteSwapInPlace squirrels away a copy 121 return cast(immutable(T)[])mutableData; 122 } 123 124 immutable(T)[] byteSwap16(T)(const(T)[] data) 125 { 126 return cast(immutable(T)[])byteSwap(cast(immutable(ushort)[])data); 127 } 128 129 immutable(T)[] byteSwap32(T)(const(T)[] data) 130 { 131 return cast(immutable(T)[])byteSwap(cast(immutable(uint)[])data); 132 } 133 134 T readStringz(T)(std.stream.File reader) if(isSomeString!T) 135 { 136 Unqual!T str; 137 static if(is(T==string)) 138 alias char TElem; 139 else static if(is(T==wstring)) 140 alias wchar TElem; 141 else static if(is(T==dstring)) 142 alias dchar TElem; 143 else 144 static assert("'"~T.stringof~"' not allowed."); 145 146 TElem c; 147 148 do 149 { 150 reader.read(c); 151 str ~= c; 152 } while(c != 0); 153 154 // No references saved, nothing can change it. 155 return cast(T)(str[0..$-1]); 156 } 157 158 //TODO*: Unittest this 159 // This assumes that data is already in native endianness 160 T toEndian(T)(T data, Endian en) if(is(T==ushort) || is(T==uint)) 161 { 162 if(en == endian) 163 return data; 164 else 165 return byteSwap(data); 166 } 167 168 /// Gets the full path to the currently running executable, 169 /// regardless of working directory or PATH env var or anything else. 170 /// Note that this is far more accurate and reliable than using args[0]. 171 /+FilePath getExecFilePath() 172 { 173 string file = new char[4*1024]; 174 int filenameLength; 175 version (Win32) 176 filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1); 177 else version(OSX) 178 { 179 filenameLength = file.length-1; 180 _NSGetExecutablePath(file.ptr, &filenameLength); 181 } 182 else 183 filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1); 184 185 auto fp = new FilePath(file[0..filenameLength]); 186 fp.native(); 187 return fp; 188 }+/ 189 /// ditto 190 string getExec() 191 { 192 auto file = new char[4*1024]; 193 size_t filenameLength; 194 version (Win32) 195 filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1); 196 else version(OSX) 197 { 198 filenameLength = file.length-1; 199 _NSGetExecutablePath(file.ptr, &filenameLength); 200 } 201 else 202 filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1); 203 204 //auto fp = new FilePath(file[0..filenameLength]); 205 return to!string(file[0..filenameLength]); 206 // return getExecFilePath().toString().trim(); 207 } 208 209 /// Like getExec, but doesn't include the path. 210 string getExecName() 211 { 212 return getExec().baseName(); 213 // return getExecFilePath().file().trim(); 214 } 215 216 /// Like getExec, but only returns the path (including trailing path separator). 217 string getExecPath() 218 { 219 return getExec().dirName() ~ dirSep; 220 //return getExecFilePath().path().trim(); 221 } 222 223 mixin(unittestSemiTwistDLib(q{ 224 // byteSwap 225 mixin(deferEnsure!(q{ byteSwapVal16(0x1234 ) }, q{ _ == 0x3412 })); 226 mixin(deferEnsure!(q{ byteSwapVal32(0x1234_5678) }, q{ _ == 0x7856_3412 })); 227 228 mixin(deferEnsure!(q{ byteSwap16(cast(immutable(ushort)[])[0x1234, 0x5678, 0x9ABC, 0xDEF0]) }, q{ _ == cast(ushort[])[0x3412, 0x7856, 0xBC9A, 0xF0DE] })); 229 mixin(deferEnsure!(q{ byteSwap32(cast(immutable(uint)[] )[0x1234____5678, 0x9ABC____DEF0]) }, q{ _ == cast(uint[] )[0x7856_3412, 0xF0DE_BC9A] })); 230 231 // utfConvert 232 mixin(deferEnsure!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")) }, q{ _== ("AB\nCD"~"\r"~"\nEF") })); 233 mixin(deferEnsure!(q{ utfConvert!string ("ABCDEF") }, q{ _== ("ABCDEF" ) })); 234 mixin(deferEnsure!(q{ utfConvert!dstring("ABCDEF") }, q{ _== ("ABCDEF"d) })); 235 //TODO: Check into the weird disappearing \r: 236 //mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").escapeDDQS() })); 237 //mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").length })); 238 //mixin(traceVal!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")).escapeDDQS() })); 239 }));