GMLscripts.com

Discuss and collaborate on GML scripts
Invert

You are not logged in.

#1 2021-05-25 06:47:03

maras
Member
Registered: 2021-04-25
Posts: 22
Website

string_match - a RegEx like script

Not as powerful as RegEx but good enough for some basic stuff
Tutorial below the code

Expand/// string_match(str, match, show_err<optional>)
//
// checks if a string is formatted correctly
//
// returns
//     true/false    (if show_err = false)
//     true/string   (if show_err = true)
//
//     str             string to check, string
//     match           match format, string
//     show_err        show where the check failed (optional), bool
//
//    made for GMS2
//
/// GMLscripts.com/license

function string_match(str, match, show_err) {
	
	if is_undefined(show_err) show_err = false;
	
	if str == "" or match == ""
		{
		if show_err return "str_is_empty";
		return false;
		}
	
	var __compare_conditions_with_string = function(cond_str, str) {
		
		var __check_custom_condition = function(cond_str, str) {
			
			var __string_get_between = function(str, substr1, substr2, skip) {
	
				var first_found = false;
				var str_between = "";
	
				var times_found = 0;
	
				var str_length = string_length(str);
				var substr1_length = string_length(substr1);
				var substr2_length = string_length(substr2);
	
				for(var i = 1; i < str_length+1; i++)
					{
					if !first_found
						{
						if string_ord_at(str, i) == string_ord_at(substr1, 1)
							{
							var same_chars = true;
							for(var s = 0; s < substr1_length-1; s++)
								{
								same_chars = string_ord_at(str, i+s) == string_ord_at(substr1, s);
								if !same_chars break;
								}
				
							if same_chars
								{
								if is_undefined(skip) or (!is_undefined(skip) and times_found++ == skip)
									{
									first_found = true;
									i += substr1_length-1;
									}
								}
							}
						}
					else
						{
						if string_ord_at(str, i) == string_ord_at(substr2, 1)
							{
							var same_chars = true;
							for(var s = 0; s < substr2_length-1; s++)
								{
								same_chars = string_ord_at(str, i+s) == string_ord_at(substr2, s);
								if !same_chars break;
								}
							if same_chars return str_between;
							}
			
						str_between += string_char_at(str, i);
						}
					}
	
				if first_found str_between += string_char_at(str, i);
				return str_between;
			}
			
			var __check_chars_and_char_ranges = function(char_str, str, allow) {

				var ranges = [];
				var chars = "";
				var char_str_len = string_length(char_str);
	
				for(var i = 1; i < char_str_len+1; i++)
					{
					var ch = string_char_at(char_str, i);
					var ch1 = string_char_at(char_str, i+1);
					var ch2 = string_char_at(char_str, i+2);
		
					if ch1 == "-" and ch2 != "" and string_lettersdigits(ch) == ch and string_lettersdigits(ch2) == ch2
						{
						var arr_l = array_length(ranges);
						ranges[arr_l] = ord(ch);
						ranges[arr_l+1] = ord(ch2);
						i += 2;
						}
					else chars += ch;
					}
	
				var ranges_len = array_length(ranges);
				var chars_len = string_length(chars);
				var str_len = string_length(str);
	
				for(var i = 1; i < str_len+1; i++)
					{
					var this_ord = string_ord_at(str, i);
					var in_range = false;
					var in_chars = false;
		
					for(var i2 = 1; i2 < chars_len+1; i2++)
						{
						in_chars = string_ord_at(chars, i2) == this_ord;
						if in_chars { if allow break; else return chr(this_ord); }
						}
		
					for(var r = 0; r < ranges_len; r += 2)
						{
						in_range = clamp(this_ord, ranges[r], ranges[r+1]) == this_ord;
						if in_range { if allow break; else return chr(this_ord); }
						}
		
					if allow and !in_range and !in_chars return chr(this_ord);
					}
	
				return true;
			}
			
			var __string_split = function(str, substr) {
	
				var sub_len = string_length(substr);
				var n = string_count(substr, str);
				var arr = [];
	
				if n == 0 return [str];

				for(var i = 0; i < n; i++)
				    {
				    var p = string_pos(substr, str) - 1;
				    arr[i] = string_copy(str, 1, p);
				    str = string_delete(str, 1, p + sub_len);
				    }
				if str != "" arr[i] = str;
	
				return arr;
			}
	
			var separator = string_pos("==", cond_str) != 0 ? "==" : "!=";
			var split = __string_split(cond_str, separator);
	
			var first_part = string_pos(",", split[0]) != 0 ? __string_split(split[0], ",") : [split[0]];
			var second_part = split[1];
	
			var first_part_fixed = [];	
			var positions = [];
	
			var arr_len = array_length(first_part); 
			for(var i = 0, i2 = 0; i < arr_len; i++)
				{
				var s = first_part[i];
				if string_char_at(s, 1) == "<" and i+1 < arr_len
					{
					if string_pos("|>", first_part[i+1]) != 0
						{
						first_part_fixed[i2++] = s + "," + first_part[i+1];
						i++;
						continue;
						}
					}
				first_part_fixed[i2++] = s;
				}
	
			var arr_len = array_length(first_part_fixed); 
			for(var i = 0; i < arr_len; i++)
				{
				var s = first_part_fixed[i];
				var firstchar = string_char_at(s, 1);
		
				if firstchar == "<"
					{
					var str_btw = "";
					var str_len = string_length(s);
					for(var i2 = str_len; i2 > 0; i2--)
						{
						var ch = string_char_at(s, i2);
						if ch == "+" or ch == "-"
							{
							var btw = __string_get_between(s, "<|", "|>", 0);
					
							var _chars = "";
							var _char_str_len = string_length(btw);
							for(var _i = 1; _i < _char_str_len+1; _i++)
								{
								var _ch = string_char_at(btw, _i);
								var _ch1 = string_char_at(btw, _i+1);
								var _ch2 = string_char_at(btw, _i+2);
						
								if _ch1 == "-" and _ch2 != "" and string_lettersdigits(_ch) == _ch and string_lettersdigits(_ch2) == _ch2
									{
									_i += 2;
									var ord1 = ord(_ch);
									var ord2 = ord(_ch2);
									for(var rang = ord1; rang < ord2+1; rang++) { _chars += chr(rang); }
									}
								else _chars += _ch;
								}
					
							positions[array_length(positions)] = [_chars, ch, str_btw];
							break;
							}
						str_btw = ch + str_btw;
						}
					}
				else if firstchar == "n"
					{
					var ch1 = string_char_at(s, 2);
					var ch2 = string_char_at(s, 3);
					if ch1 == "-" and ch2 != "" and string_digits(ch2) == ch2
						{
						positions[array_length(positions)] = string_length(str) - real(ch2);
						}
					else positions[array_length(positions)] = string_length(str);
					}
				else if string_digits(s) == s positions[array_length(positions)] = real(s);
				}
	
			var comparing_with = __string_get_between(second_part, "<|", "|>", 0);
			var arr_len = array_length(positions);
			for(var i = 0; i < arr_len; i++)
				{
				var p = positions[i];
				if is_real(p)
					{
					var ch_at = string_char_at(str, p);
					var result = __check_chars_and_char_ranges(comparing_with, ch_at, separator == "==");
					if result != true return "forbidden_char: '"+result+"' in: '"+str+"' at_pos: '"+string(p)+"' condition: :("+cond_str;
					}
				else
					{
					var ranges = [];
					var chars = "";
					var char_str_len = string_length(p[0]);
	
					for(var i = 1; i < char_str_len+1; i++)
						{
						var ch = string_char_at(p[0], i);
						var ch1 = string_char_at(p[0], i+1);
						var ch2 = string_char_at(p[0], i+2);
		
						if ch1 == "-" and ch2 != "" and string_lettersdigits(ch) == ch and string_lettersdigits(ch2) == ch2
							{
							var arr_l = array_length(ranges);
							ranges[arr_l] = ord(ch);
							ranges[arr_l+1] = ord(ch2);
							i += 2;
							}
						else chars += ch;
						}
				
					var chars = p[0];
					var chars_len = string_length(chars);
					for(var i2 = 1; i2 < chars_len+1; i2++)
						{
						var curr_chr = string_char_at(chars, i2);			
						var str_len = string_length(str);
						for(var i3 = 1; i3 < str_len+1; i3++)
							{
							//show_debug_message(curr_chr+ " " +string_char_at(str, i3))
							if curr_chr == string_char_at(str, i3)
								{
								if string_digits(p[2]) == "" continue;
						
								var pos = p[1] == "+" ? i3 + p[2] : i3 - p[2];
								if pos > 0 and pos < str_len+1
									{
									//show_debug_message("CHECK: "+string_char_at(str, pos));
									var result = __check_chars_and_char_ranges(comparing_with, string_char_at(str, pos), separator == "==");
									if result != true return "forbidden_char: '"+result+"' in: '"+str+"' at_pos: '"+string(pos)+"' condition: :("+cond_str+")";
									}
								}
							}
						}
					}
				}
	
			return true;
		}
		
		var __check_chars_and_char_ranges = function(char_str, str, allow) {

			var ranges = [];
			var chars = "";
			var char_str_len = string_length(char_str);
	
			for(var i = 1; i < char_str_len+1; i++)
				{
				var ch = string_char_at(char_str, i);
				var ch1 = string_char_at(char_str, i+1);
				var ch2 = string_char_at(char_str, i+2);
		
				if ch1 == "-" and ch2 != "" and string_lettersdigits(ch) == ch and string_lettersdigits(ch2) == ch2
					{
					var arr_l = array_length(ranges);
					ranges[arr_l] = ord(ch);
					ranges[arr_l+1] = ord(ch2);
					i += 2;
					}
				else chars += ch;
				}
	
			var ranges_len = array_length(ranges);
			var chars_len = string_length(chars);
			var str_len = string_length(str);
	
			for(var i = 1; i < str_len+1; i++)
				{
				var this_ord = string_ord_at(str, i);
				var in_range = false;
				var in_chars = false;
		
				for(var i2 = 1; i2 < chars_len+1; i2++)
					{
					in_chars = string_ord_at(chars, i2) == this_ord;
					if in_chars { if allow break; else return chr(this_ord); }
					}
		
				for(var r = 0; r < ranges_len; r += 2)
					{
					in_range = clamp(this_ord, ranges[r], ranges[r+1]) == this_ord;
					if in_range { if allow break; else return chr(this_ord); }
					}
		
				if allow and !in_range and !in_chars return chr(this_ord);
				}
	
			return true;
		}
		
		var __check_str_length_is_in_range = function(range, str) {
			
			var __string_get_between = function(str, substr1, substr2, skip) {
	
				var first_found = false;
				var str_between = "";
	
				var times_found = 0;
	
				var str_length = string_length(str);
				var substr1_length = string_length(substr1);
				var substr2_length = string_length(substr2);
	
				for(var i = 1; i < str_length+1; i++)
					{
					if !first_found
						{
						if string_ord_at(str, i) == string_ord_at(substr1, 1)
							{
							var same_chars = true;
							for(var s = 0; s < substr1_length-1; s++)
								{
								same_chars = string_ord_at(str, i+s) == string_ord_at(substr1, s);
								if !same_chars break;
								}
				
							if same_chars
								{
								if is_undefined(skip) or (!is_undefined(skip) and times_found++ == skip)
									{
									first_found = true;
									i += substr1_length-1;
									}
								}
							}
						}
					else
						{
						if string_ord_at(str, i) == string_ord_at(substr2, 1)
							{
							var same_chars = true;
							for(var s = 0; s < substr2_length-1; s++)
								{
								same_chars = string_ord_at(str, i+s) == string_ord_at(substr2, s);
								if !same_chars break;
								}
							if same_chars return str_between;
							}
			
						str_between += string_char_at(str, i);
						}
					}
	
				if first_found str_between += string_char_at(str, i);
				return str_between;
			}
	
			var val_min = real(string_digits(__string_get_between(range, "(", "-", 0)));
			var val_max = real(string_digits(__string_get_between(range, "-", ")", 0)));
			var len = string_length(str);
	
			if val_min != 0 and val_max != 0 return clamp(len, val_min, val_max) == len;
			else
				{
				if val_max != 0 return len <= val_max;
				else return len >= val_min;
				}
		}
	
		var cond_str_len = string_length(cond_str);
		var str_btw = "";
	
		var getting_allowed_chars = false;
		var getting_not_allowed_chars = false;
		var getting_length_range = false
		var getting_custom_condition = false;
		var skipping_string = false;
	
		for(var i = 1; i < cond_str_len+1; i++)
			{
			var ch = string_char_at(cond_str, i);
			var ch1 = string_char_at(cond_str, i+1);
			var ch2 = string_char_at(cond_str, i+2);
		
			var is_collecting_str = getting_not_allowed_chars or getting_allowed_chars or getting_length_range or getting_custom_condition;
		
			switch(ch)
				{
				case "<":
					if !is_collecting_str
						{
						if ch1 == "!" and ch2 == "|"
							{
							getting_not_allowed_chars = true;
							str_btw = "";
							i += 2;
							continue;
							}
						else if ch1 == "|"
							{
							getting_allowed_chars = true;
							str_btw = "";
							i++;
							continue;
							}
						}
				
					if ch1 == "|" and getting_custom_condition skipping_string = true;
				break;
			
				case "|":
					if getting_allowed_chars and ch1 == ">"
						{
						getting_allowed_chars = false;
						var result = __check_chars_and_char_ranges(str_btw, str, true);
						if result != true return "forbidden_char: '"+result+"' in: '"+str+"' condition: '<|"+str_btw+"|>'";
						i++;
						continue;
						}
					else if getting_not_allowed_chars and ch1 == ">"
						{
						getting_not_allowed_chars = false;
						var result = __check_chars_and_char_ranges(str_btw, str, false);
						if result != true return "forbidden_char: '"+result+"' in: '"+str+"' condition: '<!|"+str_btw+"|>'";
						i++;
						continue;
						}
				
					if ch1 == ">" and getting_custom_condition skipping_string = false;
				break;
			
				case "L":
					if !is_collecting_str and ch1 == "("
						{
						getting_length_range = true;
						str_btw = "";
						continue;
						}
				break;
			
				case ")":
					if getting_length_range
						{
						getting_length_range = false;
						var result = __check_str_length_is_in_range(str_btw+")", str);
						if result != true return "str_length_not_in_range: '"+str+"'("+string(string_length(str))+") condition: L'"+str_btw+")'";
						continue;
						}
					else if getting_custom_condition and !skipping_string
						{
						getting_custom_condition = false;
						var result = __check_custom_condition(str_btw, str);
						if result != true return result;
						continue;
						}
				break;
			
				case ":":
					if !is_collecting_str and ch1 == "("
						{
						getting_custom_condition = true;
						str_btw = "";
						i++;
						continue;
						}
				break;
				}
		
			if is_collecting_str str_btw += ch;
			}
	
		return true;
	}
	
	var __divide_match_and_strings = function(m, str) {
	
		var m_len = string_length(m);
		var m_arr = [];
		var is_condition = false;
		var is_str = false;
		var str_btw = "";
	
		// SEPARATE MATCH ARRAYS AND STRINGS
		for(var i = 1; i < m_len+1; i++)
			{
			var ch = string_char_at(m, i);
			var ch1 = string_char_at(m, i+1);
		
			switch(ch)
				{
				case "[":
					if !is_condition and !is_str
						{
						is_condition = true;
						str_btw = "";
						continue;
						}
				break;
				case "]":
					if is_condition and (ch1 == "$" or ch1 == "")
						{
						is_condition = false;
						m_arr[array_length(m_arr)] = [str_btw];
						continue;
						}
				break;
				case "$":
					if !is_condition and !is_str
						{
						is_str = true;
						str_btw = "";
						continue;
						}
					else if is_str and (ch1 == "[" or ch1 == "")
						{
						is_str = false;
						m_arr[array_length(m_arr)] = str_btw;
						continue;
						}
				break;
				}
		
			if is_condition or is_str str_btw += ch;
			}
	
		// SPLIT STRINGS
		var m_arr_len = array_length(m_arr);
		var str_arr = [];

		for(var i = 0; i < m_arr_len; i++)
			{
			var c = m_arr[i];
			if is_array(m_arr[i]) continue;
		
			var p = string_pos(c, str)-1;
			var copy = string_copy(str, 1, p);
		
			if copy != "" str_arr[array_length(str_arr)] = copy;
			str = string_delete(str, 1, p + string_length(c));
			}
		if str != "" str_arr[array_length(str_arr)] = str;
	
		return [m_arr, str_arr];
	}
	
	var divided = __divide_match_and_strings(match, str);
	var match_arr = divided[0];
	var str_arr = divided[1];
	
	if array_length(match_arr) == 0 or array_length(str_arr) == 0
		{
		if show_err return "str_is_empty";
		return false;
		}

	var m_len = array_length(match_arr);
	var i2 = 0;
	
	for(var i = 0; i < m_len; i++)
		{
		var m = match_arr[i];
		if is_array(m)
			{
			if array_length(m) == 0
				{
				if show_err return "str_is_empty";
				return false;
				}
			
			if i2 >= array_length(str_arr) or str_arr[i2] == ""
				{
				if show_err return "string_not_complete";
				else return false;
				}
			
			var result = __compare_conditions_with_string(m[0], str_arr[i2++]);
			if result != true
				{
				if show_err return result;
				else return false;
				}
			}
		}
	
	return true;
}

txFF8nc.png

Last edited by maras (2021-06-29 15:51:00)

Offline

#2 2021-05-25 13:53:02

xot
Administrator
Registered: 2007-08-18
Posts: 1,238

Re: string_match - a RegEx like script

Thanks for the submission, this looks very impressive. It's rather complex and will need some testing and maybe some structural changes to better accommodate this library of scripts. I'm not happy with so many helper functions in the global scope. I want to look into making them method functions inside the primary function. It's not something I've dealt with up to now.


Abusing forum power since 1986.

Offline

#3 2021-05-25 15:40:50

maras
Member
Registered: 2021-04-25
Posts: 22
Website

Re: string_match - a RegEx like script

I actually hesitated a bit when I was submitting this script because of the length and complexity

xot wrote:

I'm not happy with so many helper functions in the global scope.

So... I rewrote all the functions and now they are all local
I'm new with GMS2 so I didn't even know that local functions are now a thing (finally tbh)

Last edited by maras (2021-05-25 15:45:46)

Offline

Board footer

Powered by FluxBB