####
# Syntax: str_get_regexp (in_string, reg_exp);
# Return value: String
# Parameters:
#    in_string: string to retrieve from
#    reg_exp: the regular expression to retrieve
# Description: Searches for the reg_exp in the string in_string, and return the
#    mactched regexp.
####
function str_get_regexp (in instr, in regexp) {
	extern RLENGTH, RSTART;
	if (nargs() != 2)
		return (report_param_msg ("str_get_regexp"));

	if (match (instr, regexp)) {
		return (substr(instr, RSTART, RLENGTH));
	} else return ("");
}

####
# Syntax: str_replace (in_string, search_chars, replace_str);
# Return value: String
# Parameters:
#    in_string: string to be modified
#    search_chars: characters to search for.  This is treated as a group
#        of characters, not a string.  So "abc" will search for all occurences
#        of the character "a", "b", "c"; and they do not have to be in the
#        same sequence, or in proximity or each other.
#    replace_str: The string to replace for _each_ of the character specified
#        in the replace_chars parameter
# Description: Replaces all occurences of character in the search_chars with the replace_chars.
#    Each character in search_chars will be replaced by the whole replace_chars.
####
public function str_replace (in str, in search_str, in replace_str) {
	auto ra[], racount, out_str = "", i;

	if (nargs() != 3)
		return (report_param_msg ("str_replace"));
	racount = split (str, ra, search_str);

	for (i = 1; i < racount; i++)
	{
		out_str = out_str & ra[i] & replace_str;
	}
	out_str = out_str & ra[i];
	return out_str;
}

####
# Syntax: str_replace_regexp (in_string, search_string, replace_string);
# Return value: String
# Parameters:
#    in_string: string to be modified
#    search_string: string to search for.  By default, this will be a non
#        regular expression string.  If regular expression is to be used,
#        put an exclamation ("!") as the first character
#    replace_string: string to replace each occurence of search_string with
# Description: Replaces all occurences of the regular expression search_string with the replace_string.
#    Note that the search_str will be treated as regular expression characters, so "." is not a period.
#    If the replace_str start with a "!", then it can contain a special expression "$x", which
#    refers to the matched character (ala perl).  So you can escape out a list of characters
#    easily by writing:
#      str_replace_regexp (string, "[]^$[.*]", "!\\$x");
#    examples:
#      str_replace_regxp (string, "Hello", "Goodbye");
#    will replace all occurences of "Hello" to "Goodbye".
####
public function str_replace_regexp (in str, in search_str, in replace_str) {
	extern RLENGTH, RSTART;
	auto out_str = "", tmp_str = str, rc, regexp_replace = FALSE;
	auto tmp_replace_string, match_RLENGTH, match_RSTART;

	if (nargs() != 3)
		return (report_param_msg ("str_replace_regexp"));

	if (substr (replace_str, 1, 1) == "!") {
		regexp_replace = TRUE;
		replace_str = substr (replace_str, 2);
	}

	while (match (tmp_str, search_str)) {
		match_RLENGTH = RLENGTH;
		match_RSTART = RSTART;
		out_str = out_str &  substr (tmp_str, 1, match_RSTART - 1);
		if (regexp_replace) {
			tmp_replace_string = str_replace_regexp (replace_str, "\$x", substr (tmp_str, RSTART, RLENGTH));
			out_str = out_str & tmp_replace_string;
		} else {
			out_str = out_str & replace_str;
		}
		tmp_str = substr (tmp_str, match_RSTART + match_RLENGTH);
	}
	out_str = out_str & tmp_str;
	return out_str;
}


####
# Syntax: get_token (string, token_index_to_get[, separator]);
# Return value: the value of the token
# Parameters:
#    string: string to tokenize
#    token_index_to_get: the token index value, range of value, or information
#        to retrieve.  e.g.: "1", "2-5", "last", "count", "5-last".
#    separator: separator to be used to split up the string.  This can take
#        in multiple characters, and accepts regular expressions (use "!" as the
#        first character in the separator string)
# Description: Returns the value of the token string indexed, separated by the separator.
#    By default, the separator is "\t" (the tab character).
#    ** get_token can now handle regular expression in the separator parameter.
#       to use regular expression, precede the separator with a "!".
####
function get_token (in line, in tokenindex, in separator) {
	auto i, tra[], tracount = 0, outstr = "", startind = 1, endind = "last", tmp, tmpra[];
	auto tmp_str, match_RLENGTH, match_RSTART;
	extern RLENGTH, RSTART; # for regexp uses, stores the length of match and position of match

	if (nargs() == 2) separator = "\t";
	else if (nargs != 3)
		return (report_param_msg ("get_token"));

	# Find out if a range is specified ("x-y")
	if (match (tokenindex, "-")) {
		split (tokenindex, tmpra, "-");

		startind = tmpra[1];
		endind = tmpra[2];
	} else {
		# just one item
		startind = endind = tokenindex;
	}

	# See if the separator string uses 1 char or more
	if (length (separator) > 1) {
		# if the first char is a "!", then this is a regexp.
		if (substr (separator, 1, 1) == "!") {
			separator = substr (separator, 2);
		} else {
			# if using regular expression, escape special characters!
			separator = str_replace_regexp (separator, "[]^$[.*]", "!\\$x");
		}
		tmp_str = line;
		while (match (tmp_str, separator)) {
			match_RSTART = RSTART;
			match_RLENGTH = RLENGTH;
			tra[++tracount] = substr (tmp_str, 1, match_RSTART - 1);
			tmp_str = substr (tmp_str, match_RSTART + match_RLENGTH);
		}
		tra[++tracount] = tmp_str;
	} else {
		tracount = split (line, tra, separator);
	}

	if (tolower(startind) == "count") return tracount;
	if (tolower(startind) == "first" || startind <= 1) startind = 1;
	if (tolower(startind) == "last") startind = tracount;
	if (tolower(endind) == "last") endind = tracount;

	for (i = startind; i<= endind; i++) {
		if (outstr != "") outstr = outstr & separator;
		outstr = outstr & tra[i];
	}

	return outstr;
}
