string.d - The provided D code is a function implementation…

/core/string.d

http://github.com/wilkie/djehuty · D · 1140 lines · 906 code · 189 blank · 45 comment · 278 complexity · 2eb43493567fa1bbb9a0d1e00481290c MD5 · raw file

/*
 * string.d
 *
 * This file contains the native String class. A string class that wraps a
 * string that is represented in the native preferred unicode class.
 *
 * Author: Dave Wilkinson
 *
 */

module core.string;

// a class to encapsulate the expected string format for
// the underlying OS platform in use

import core.definitions;
import core.unicode;
import core.variant;

import data.iterable;

import io.console;

import math.currency;

public import core.string;

string toStrv(Variadic vars) {
	string ret = "";
	foreach(var; vars) {
		ret ~= var.toString();
	}
	return ret;
}

string toStr(...) {
	Variadic vars = new Variadic(_arguments, _argptr);

	return toStrv(vars);
}

// Standard string functions (for C)

// strlen
size_t strlen(char* chr) {
	size_t ret = 0;
	while(*chr++) {
		ret++;
	}
	return ret;
}

size_t strlen(wchar* chr) {
	size_t ret = 0;
	while(*chr++) {
		ret++;
	}
	return ret;
}

void strncpy(char* dest, char* src, int length) {
	while(((*dest++ = *src++) != 0) && --length){}
	*(--dest) = '\0';
}

void strncat(char* dest, char* src, int length) {
	while(*dest++){}
	strncpy(--dest,src,length);
}

int strcmp(char* a, char* b) {
	while((*a == *b) && *a++ && *b++){}
	return *a - *b;
}

int strncmp(char* a, char* b, int length) {
	while(--length && (*a == *b) && *a++ && *b++){}
	return *a - *b;
}

void strcpy(char* dest, char* src) {
	while((*dest++ = *src++) != 0){}
}

void strcat(char* dest, char* src) {
	while(*dest++){}
	strcpy(--dest,src);
}

void strupr(char* str) {
	while(*str) {
		if (*str >= 'a' || *str <= 'z') {
			*str -= 32;
		}
		str++;
	}
}

void strlwr(char* str) {
    while(*str) {
		if (*str >= 'A' || *str <= 'Z') {
			*str += 32;
		}
		str++;
	}
}

string trim(string chrs) {
	size_t idx_s, idx_e;

	idx_e = chrs.length;

	while (idx_s < chrs.length && (chrs[idx_s] == ' ' || chrs[idx_s] == '\t' || chrs[idx_s] == '\n' || chrs[idx_s] == '\r')) {
		idx_s++;
	}

	while (idx_e > 0 && (chrs[idx_e-1] == ' ' || chrs[idx_e-1] == '\t' || chrs[idx_e-1] == '\n' || chrs[idx_e-1] == '\r')) {
		idx_e--;
	}

	if (idx_s >= idx_e) {
		return "";
	}

	return chrs[idx_s..idx_e].dup;
}

string[] split(string input, string delims) {
	string[] retstring;
	size_t last;

	foreach(size_t i, char c; input) {
		foreach(char d; delims) {
			if (c == d) {
				retstring ~= input[last..i];
				last = i+1;
			}
		}
	}
	
	retstring ~= input[last..$];

	return retstring;
}

string[] split(string input, char delim) {
	string[] retstring;
	size_t last;

	foreach(size_t i, char c; input) {
		if (c == delim) {
			retstring ~= input[last..i];
			last = i+1;
		}
	}

	retstring ~= input[last..$];

	return retstring;
}

template _nextInt(T) {
	bool _nextInt(T)(string str, out T value) {
		int curpos;
		if (str.length == 0) {
			return false;
		}

		for(curpos=0; curpos<str.length; curpos++) {
			if (str[curpos] != ' ' &&
					str[curpos] != '\t' &&
					str[curpos] != '\r' &&
					str[curpos] != '\n') {

				break;
			}
		}

		bool negative = false;

		if (str[curpos] == '-') {
			negative = true;
			curpos++;
			if (curpos == str.length) { return false; }
		}

		if (str[curpos] < '0' ||
				str[curpos] > '9') {

			return false;
		}

		long tmpval = 0;

		for (;curpos<str.length;curpos++) {
			if (str[curpos] < '0' ||
					str[curpos] > '9') {

				break;
			}

			tmpval *= 10;
			tmpval += cast(long)(str[curpos] - '0');
		}

		if (negative) { tmpval = -tmpval; }

		value = cast(T)tmpval;

		return true;
	}
}

// Description: This function will return the next integer value found in the string.
bool nextInt(string str, out int value) {
	return _nextInt!(int)(str, value);
}

bool nextInt(string str, out uint value) {
	return _nextInt!(uint)(str, value);
}

bool nextInt(string str, out long value) {
	return _nextInt!(long)(str, value);
}

bool nextInt(string str, out ulong value) {
	return _nextInt!(ulong)(str, value);
}

bool nextInt(string str, out short value) {
	return _nextInt!(short)(str, value);
}

bool nextInt(string str, out ushort value) {
	return _nextInt!(ushort)(str, value);
}

// Description: Will build and return a String object representing a slice of the current String.
// start: The position to start from.
// len: The length of the slice.  Pass -1 to get the remaining string.
string substring(string str, int start, int len = -1) {
	uint[] _indices = Unicode.calcIndices(str);

	if (start >= _indices.length || len == 0) {
		return "";
	}

	if (len < 0) { len = -1; }

	if (len >= 0 && start + len >= _indices.length) {
		len = -1;
	}

	// subdivide

	if (len == -1) {
		start = _indices[start];
		string ret = "";
		ret = str[start..$].dup;
		return ret;
	}

	// this is the index for one character past the
	// end of the substring of the original string...hence, len is
	// now the exclusive end of the range to slice the array.
	len = _indices[start+len];

	start = _indices[start];

	string ret = "";
	ret = str[start..len].dup;
	return ret;
}

string replace(string str, dchar find, dchar replace) {
	string ret = str.dup;
	uint[] _indices = Unicode.calcIndices(str);

	for(int i = 0; i < _indices.length; i++) {
		dchar cmpChar = Unicode.toUtf32Char(ret[_indices[i]..$]);
		if (cmpChar == find) {
			dchar[1] chrs = [replace];
			ret = ret[0.._indices[i]] ~ Unicode.toUtf8(chrs) ~ ret[_indices[i+1]..$];
			_indices = Unicode.calcIndices(ret);
		}
	}

	return ret;
}

int findReverse(string source, string search, uint start = uint.max) {
	if (start == uint.max) {
		start = source.length;
	}

	if (search == "") {
		return -1;
	}

	uint[] _indices = Unicode.calcIndices(source);
	uint[] search_indices = Unicode.calcIndices(search);

	bool found;

	int o;
	foreach_reverse(i, aPos; _indices[0..start]) {
		found = true;
		o=i-1;
		foreach(bPos; search_indices) {
			o++;
			if(o >= _indices.length) {
				found = false;
				break;
			}

			dchar aChr, bChr;

			aChr = Unicode.toUtf32Char(source[_indices[o]..$]);
			bChr = Unicode.toUtf32Char(search[bPos..$]);

			if(aChr != bChr) {
				found = false;
				break;
			}
		}
		if (found) {
			return i;
		}
	}

	return -1;
}

int find(string source, string search, uint start = 0) {
	// look through string for term search
	// in some, hopefully later on, efficient manner

	uint[] _indices = Unicode.calcIndices(source);
	uint[] search_indices = Unicode.calcIndices(search);

	if (search == "") {
		return -1;
	}

	if (start >= _indices.length) {
		return -1;
	}

	bool found;

	int o;

	foreach (i, aPos; _indices[start..$]) {
		found = true;
		o=i-1+start;
		foreach (bPos; search_indices) {
			o++;
			if (o >= _indices.length) {
				found = false;
				break;
			}

			dchar aChr, bChr;

			aChr = Unicode.toUtf32Char(source[_indices[o]..$]);
			bChr = Unicode.toUtf32Char(search[bPos..$]);

			if (aChr != bChr) {
				found = false;
				break;
			}
		}
		if (found) {
			return i+start;
		}
	}

	return -1;
}

string times(string str, uint amount) {
	if (amount == 0) {
		return "";
	}

	string ret = "";
	for(int i = 0; i < amount; i++) {
		ret ~= str;
	}
	return ret;
}

string format(string format, ...) {
	Variadic vars = new Variadic(_arguments, _argptr);
	return formatv(format, vars);
}

string formatv(string format, Variadic vars) {
	string ret = "";
	string specifier = "";
	bool inFormat = false;
	bool intoFormat = false;
	foreach(chr; format) {
		if (intoFormat && chr != '{') {
			intoFormat = false;
			inFormat = true;
			specifier = "";
		}

		if (inFormat) {
			// look for format end
			if (chr == '}') {
				inFormat = false;

				int index = 0;
				int width = 0;
				int precision = 0;
				int base = 10;
				bool unsigned = false;
				long value;
				ulong uvalue;
				float fvalue;
				double dvalue;

				bool formatIndex = false;
				bool formatNumber = false;
				bool formatFloat = false;
				bool formatDouble = false;
				bool formatUpper = false;
				bool formatCurrency = false;

				if (specifier.nextInt(index)) {
					string newSpecifier = specifier.substring(toStr(index).length);
					if (newSpecifier.length > 0 && newSpecifier[0] != ':') {
						// Not an index
					}
					else {
						specifier = newSpecifier;
						formatIndex = true;
					}
				}

				// interpret format specifier
				if (specifier.length > 0) {
					if (specifier[0] == ':') {
						specifier = specifier[1..$];
					}
				}
				if (specifier.length == 0) {
					specifier = ":";
				}

				switch(specifier[0]) {
					case 'd':
					case 'D':	// Decimal
						base = 10;
						formatNumber = true;
						specifier[1..$].nextInt(width);
						break;
					case 'x':
					case 'X':	// Hexidecimal
						base = 16;
						unsigned = true;
						formatNumber = true;
						specifier[1..$].nextInt(width);
						break;
					case 'o':
					case 'O':	// Octal
						base = 8;
						unsigned = true;
						formatNumber = true;
						specifier[1..$].nextInt(width);
						break;
					case 'u':
					case 'U':	// Unsigned Integer
						base = 10;
						unsigned = true;
						formatNumber = true;
						break;
					case 'c':	// Currency
						base = 10;
						unsigned = false;
						formatNumber = true;
						formatCurrency = true;
						break;
					default:
						// Other specifier series
						// Parse them

						width = 0;
						precision = 0;
						bool gettingPrecision = false;
						foreach(c; specifier) {

							// Zero Placeholders
							if (c == '0') {
								if (gettingPrecision) {
									precision++;
								}
								else {
									width++;
								}
							}
							else if (c == '.') {
								if (gettingPrecision) {
									throw new Exception("Format Exception");
								}
								gettingPrecision = true;
							}
						}
						if (width > 0 || precision > 0) {
							formatNumber = true;
						}
						break;
				}

				specifier = specifier[0..1];

				// Pull an argument off of the stack
				Variant var;
				if (formatIndex) {
					if (index < vars.length) {
						var = vars[index];
					}
					else {
						// TODO: More descriptive and uniform exception?
						throw new Exception("Invalid Format String");
					}
				}
				else {
					var = vars.next();
				}

				Type type = var.type;
				if (formatNumber) {
					switch (type) {
						case Type.Byte:
						case Type.Ubyte:
						case Type.Short:
						case Type.Ushort:
						case Type.Int:
						case Type.Uint:
						case Type.Long:
							value = var.to!(long);
							uvalue = var.to!(ulong);
							break;
						case Type.Ulong:
							unsigned = true;
							uvalue = var.to!(ulong);
							break;
						case Type.Float:
							fvalue = var.to!(float);
							formatFloat = true;
							break;
						case Type.Double:
							dvalue = var.to!(double);
							formatDouble = true;
							break;
						default:
							break;
					}

					string result = "";
					if (formatCurrency) {
						if (formatFloat) {
							result ~= (new Currency(fvalue)).toString();
						}
						else if (formatDouble) {
							result ~= (new Currency(dvalue)).toString();
						}
						else {
							result ~= var.toString();
						}
					}
					else if (formatFloat || formatDouble) {
						string[] foo;
						if (formatFloat) {
							foo = pftoa(fvalue, base);
						}
						else {
							foo = pdtoa(dvalue, base);
						}
						result = foo[0];
						while (result.length < width) {
							result = "0" ~ result;
						}
						string dec = foo[1];
						while (dec.length < precision) {
							dec ~= "0";
						}
						result ~= "." ~ dec;
					}
					else if (formatDouble) {
						result = dtoa(dvalue, base);
					}
					else {
						if (unsigned) {
							result = utoa(uvalue, base);
						}
						else {
							result = itoa(value, base);
						}

						while (result.length < width) {
							result = "0" ~ result;
						}
					}
					if (specifier.uppercase() == specifier) {
						result = result.uppercase();
					}
					ret ~= result;
				}
				else {
					ret ~= var.toString();
				}
			}
			specifier ~= chr;
		}
		else {
			// Go into a format specifier
			if (chr == '{' && intoFormat == false) {
				intoFormat = true;
				continue;
			}
			intoFormat = false;
			ret ~= chr;
		}
	}
	return ret;
}

string lowercase(string str) {
	string ret = str.dup; 
	foreach(ref chr; ret) {
		if (chr >= 'A' && chr <= 'Z') {
			chr = cast(char)((cast(byte)chr) + 32);
		}
	}
	return ret;
}

string uppercase(string str) {
	string ret = str.dup; 
	foreach(ref chr; ret) {
		if (chr >= 'a' && chr <= 'z') {
			chr = cast(char)((cast(byte)chr) - 32);
		}
	}
	return ret;
}

string charAt(string str, uint idx) {
	uint[] _indices = Unicode.calcIndices(str);
	int start, end;
	if (_indices.length > idx) {
		start = _indices[idx];
	}
	else {
		return null;
	}
	if (_indices.length > idx+1) {
		end = _indices[idx+1];
	}
	else {
		end = str.length;
	}
	return str[start..end];
}

string insertAt(string str, string what, uint idx) {
	uint[] _indices = Unicode.calcIndices(str);
	int pos;
	if (_indices.length > idx) {
		pos = _indices[idx];
	}
	else if (idx == _indices.length) {
		pos = str.length;
	}
	else {
		return null;
	}

	return str[0..pos] ~ what ~ str[pos..$];
}

int utflen(string str) {
	uint[] _indices = Unicode.calcIndices(str);
	return _indices.length;
}

int toInt(string str) {
	int ret;
	if (str.nextInt(ret)) {
		return ret;
	}
	return 0;
}

private:
string itoa(long val, uint base = 10) {
	int intlen;
	long tmp = val;

    bool negative;

    if (tmp < 0) {
        negative = true;
        tmp = -tmp;
        intlen = 2;
    }
    else {
        negative = false;
        intlen = 1;
    }

    while (tmp >= base) {
        tmp /= base;
        intlen++;
    }

    //allocate

    string ret = new char[intlen];

    intlen--;

    if (negative) {
        tmp = -val;
    } else {
        tmp = val;
    }

    do {
    	uint off = cast(uint)(tmp % base);
    	char replace;
    	if (off < 10) {
    		replace = cast(char)('0' + off);
    	}
    	else if (off < 36) {
    		off -= 10;
    		replace = cast(char)('a' + off);
    	}
        ret[intlen] = replace;
        tmp /= base;
        intlen--;
    } while (tmp != 0);


    if (negative) {
        ret[intlen] = '-';
    }

    return ret;
}

string utoa(ulong val, uint base = 10) {
	int intlen;
	ulong tmp = val;

    intlen = 1;

    while (tmp >= base) {
        tmp /= base;
        intlen++;
    }

    //allocate
    tmp = val;

    string ret = new char[intlen];

    intlen--;

    do {
    	uint off = cast(uint)(tmp % base);
    	char replace;
    	if (off < 10) {
    		replace = cast(char)('0' + off);
    	}
    	else if (off < 36) {
    		off -= 10;
    		replace = cast(char)('a' + off);
    	}
        ret[intlen] = replace;
        tmp /= base;
        intlen--;
    } while (tmp != 0);

    return ret;
}

private union intFloat {
	int l;
	float f;
}

private union longDouble {
	long l;
	double f;
}

private union longReal {
	struct inner {
		short exp;
		long frac;
	}

	inner l;
	real f;
}

string ctoa(cfloat val, uint base = 10) {
	if (val is cfloat.infinity) {
		return "inf";
	}
	else if (val.re !<>= 0.0 && val.im !<>= 0.0) {
		return "nan";
	}

	return ftoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
}

string ctoa(cdouble val, uint base = 10) {
	if (val is cdouble.infinity) {
		return "inf";
	}
	else if (val.re !<>= 0.0 && val.im !<>= 0.0) {
		return "nan";
	}

	return dtoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
}

string ctoa(creal val, uint base = 10) {
	if (val is creal.infinity) {
		return "inf";
	}
	else if (val is creal.nan) {
		return "nan";
	}

	return rtoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
}

string ftoa(float val, uint base = 10) {
	string[] foo = pftoa(val, base);

	string ret = foo[0];
	if (foo[1].length > 0) {
		ret ~= "." ~ foo[1];
	}
	return ret;
}

string[] pftoa(float val, uint base = 10) {
	if (val == float.infinity) {
		return ["inf",""];
	}
	else if (val !<>= 0.0) {
		return ["nan",""];
	}
	else if (val == 0.0) {
		return ["0",""];
	}

	long mantissa;
	long intPart;
	long fracPart;

	short exp;

	intFloat iF;
	iF.f = val;

	// Conform to the IEEE standard
	exp = ((iF.l >> 23) & 0xff) - 127;
	mantissa = (iF.l & 0x7fffff) | 0x800000;
	fracPart = 0;
	intPart = 0;

	if (exp >= 31) {
		return ["0",""];
	}
	else if (exp < -23) {
		return ["0",""];
	}
	else if (exp >= 23) {
		intPart = mantissa << (exp - 23);
	}
	else if (exp >= 0) {
		intPart = mantissa >> (23 - exp);
		fracPart = (mantissa << (exp + 1)) & 0xffffff;
	}
	else { // exp < 0
		fracPart = (mantissa & 0xffffff) >> (-(exp + 1));
	}

	string[] ret = ["",""];
	if (iF.l < 0) {
		ret[0] = "-";
	}

	ret[0] ~= itoa(intPart, base);

	for (uint k; k < 7; k++) {
		fracPart *= 10;
		ret[1] ~= cast(char)((fracPart >> 24) + '0');
		fracPart &= 0xffffff;
	}
	
	// round last digit
	bool roundUp = (ret[1][$-1] >= '5');
	ret[1] = ret[1][0..$-1];

	while (roundUp) {
		// Look for a completely empty float
		if (ret[0].length + ret[1].length == 0) {
			return ["0",""];
		}
		else if (ret[1].length > 0 && ret[1][$-1] == '9') {
			ret[1] = ret[1][0..$-1];
			continue;
		}
		ret[1][$-1]++;
		break;
	}

	// get rid of useless trailing zeroes
	foreach_reverse(uint i, chr; ret[1]) {
		if (chr != '0') {
			ret[1] = ret[1][0..i+1];
			break;
		}
	}

	return ret;
}

string dtoa(double val, uint base = 10) {
	string[] foo = pdtoa(val, base);

	string ret = foo[0];
	if (foo[1].length > 0) {
		ret ~= "." ~ foo[1];
	}
	
	return ret;
}

string[] pdtoa(double val, uint base = 10) {
	if (val is double.infinity) {
		return ["inf",""];
	}
	else if (val !<>= 0.0) {
		return ["nan",""];
	}
	else if (val == 0.0) {
		return ["0",""];
	}

	long mantissa;
	long intPart;
	long fracPart;

	long exp;

	longDouble iF;
	iF.f = val;

	// Conform to the IEEE standard
	exp = ((iF.l >> 52) & 0x7ff);
	if (exp == 0) {
		return ["0",""];
	}
	else if (exp == 0x7ff) {
		return ["inf",""];
	}
	exp -= 1023;

	mantissa = (iF.l & 0xfffffffffffff) | 0x10000000000000;
	fracPart = 0;
	intPart = 0;

	if (exp < -52) {
		return ["0",""];
	}
	else if (exp >= 52) {
		intPart = mantissa << (exp - 52);
	}
	else if (exp >= 0) {
		intPart = mantissa >> (52 - exp);
		fracPart = (mantissa << (exp + 1)) & 0x1fffffffffffff;
	}
	else { // exp < 0
		fracPart = (mantissa & 0x1fffffffffffff) >> (-(exp + 1));
	}

	string ret[] = ["", ""];
	if (iF.l < 0) {
		ret[0] = "-";
	}

	ret[0] ~= itoa(intPart, base);

	for (uint k; k < 7; k++) {
		fracPart *= 10;
		ret[1] ~= cast(char)((fracPart >> 53) + '0');
		fracPart &= 0x1fffffffffffff;
	}
	
	// round last digit
	bool roundUp = (ret[1][$-1] >= '5');
	ret[1] = ret[1][0..$-1];

	while (roundUp) {
		if (ret[0].length == 0 && ret[1].length == 0) {
			return ["0",""];
		}
		else if (ret[1][$-1] == '9') {
			ret[1] = ret[1][0..$-1];
			continue;
		}
		ret[1][$-1]++;
		break;
	}

	// get rid of useless zeroes (and point if necessary)
	foreach_reverse(uint i, chr; ret[1]) {
		if (chr != '0') {
			ret[1] = ret[1][0..i+1];
			break;
		}
	}

	return ret;
}

string rtoa(real val, uint base = 10) {
	static if (real.sizeof == 10) {
		// Support for 80-bit extended precision

		if (val is real.infinity) {
			return "inf";
		}
		else if (val !<>= 0.0) {
			return "nan";
		}
		else if (val == 0.0) {
			return "0";
		}

		long mantissa;
		long intPart;
		long fracPart;
	
		long exp;

		longReal iF;
		iF.f = val;

		// Conform to the IEEE standard
		exp = iF.l.exp & 0x7fff;
		if (exp == 0) {
			return "0";
		}
		else if (exp == 32767) {
			return "inf";
		}
		exp -= 16383;

		mantissa = iF.l.frac;
		fracPart = 0;
		intPart = 0;
	
		if (exp >= 31) {
			return "0";
		}
		else if (exp < -64) {
			return "0";
		}
		else if (exp >= 64) {
			intPart = mantissa << (exp - 64);
		}
		else if (exp >= 0) {
			intPart = mantissa >> (64 - exp);
			fracPart = mantissa << (exp + 1);
		}
		else { // exp < 0
			fracPart = mantissa >> (-(exp + 1));
		}

		string ret;
		if (iF.l.exp < 0) {
			ret = "-";
		}
	
		ret ~= itoa(intPart, base);
		ret ~= '.';

		for (uint k; k < 7; k++) {
			fracPart *= 10;
			ret ~= cast(char)((fracPart >> 64) + '0');
		}
		
		// round last digit
		bool roundUp = (ret[$-1] >= '5');
		ret = ret[0..$-1];
	
		while (roundUp) {
			if (ret.length == 0) {
				return "0";
			}
			else if (ret[$-1] == '.' || ret[$-1] == '9') {
				ret = ret[0..$-1];
				continue;
			}
			ret[$-1]++;
			break;
		}
	
		// get rid of useless zeroes (and point if necessary)
		foreach_reverse(uint i, chr; ret) {
			if (chr != '0' && chr != '.') {
				ret = ret[0..i+1];
				break;
			}
			else if (chr == '.') {
				ret = ret[0..i];
				break;
			}
		}

		return ret;
	}
	else {
		return ftoa(cast(double)val, base);
	}
}
Summary ✨

The provided D code is a function implementation for converting real numbers to strings with a specified base (defaulting to decimal). It handles various edge cases, such as infinity and NaN values, and rounds the result to prevent trailing zeros. The output string representation follows the standard format for the given base.
Alerts (2)

Complexity hotspot; line 113 (total complexity: 9)
113
Complexity hotspot; line 117 (total complexity: 9)
117