GMLscripts.com

Discuss and collaborate on GML scripts
Invert

You are not logged in.

#1 2016-05-01 15:53:33

Juju
Member
Registered: 2015-10-01
Posts: 45

csv_to_grid

GMS2 VERSION

Expand/// @desc csv_to_grid
/// @param file
/// @param [force_strings]
/// @param [cell_delimiter]
/// @param [string_delimiter]
/// @param [mac_newline]
//  
//  CAUTION: Please ensure your files are in UTF-8 encoding.
//  
//  You may pass <undefined> to use the default value for optional arguments.
//  arg0   string   Filename for the source UTF-8 CSV file
//  arg1   bool     Whether to force all cells to be a string. Defaults to false
//  arg2   string   The delimiter used to separate cells. Defaults to a comma
//  arg3   string   The delimiter used to define strings in the CSV file. Defaults to a double-quote
//  arg4   bool     Newline compatibility mode for Mac (0A). Defaults to Windows standard newline (0D,0A)
//  
//  (c) Juju Adams 26th May 2017
//  @jujuadams

//Handle arguments
if ( argument_count < 1 ) or ( argument_count > 5 ) {
	show_error( "Incorrect number of arguments (" + string( argument_count ) + ")", false );
	return undefined;
}

var _filename         = argument[0];
var _force_strings    = false;
var _cell_delimiter   = chr(44); //comma
var _string_delimiter = chr(34); //double-quote
var _newline_alt      = false;

if ( argument_count >= 2 ) and ( !is_undefined( argument[1] ) ) _force_strings    = argument[1];
if ( argument_count >= 3 ) and ( !is_undefined( argument[2] ) ) _cell_delimiter   = argument[2];
if ( argument_count >= 4 ) and ( !is_undefined( argument[3] ) ) _string_delimiter = argument[3];
if ( argument_count >= 5 ) and ( !is_undefined( argument[4] ) ) _newline_alt      = argument[4];

//Check for silliness...
if ( string_length( _cell_delimiter ) != 1 ) or ( string_length( _string_delimiter ) != 1 ) {
	show_error( "Delimiters must be one character", false );
	return undefined;
}

//More variables...
var _cell_delimiter_ord  = ord( _cell_delimiter  );
var _string_delimiter_ord = ord( _string_delimiter );

var _sheet_width  = 0;
var _sheet_height = 1;
var _max_width    = 0;

var _prev_val   = 0;
var _val        = 0;
var _str        = "";
var _in_string  = false;
var _is_decimal = !_force_strings;
var _grid       = ds_grid_create( 1, 1 ); _grid[# 0, 0 ] = "";

//Load CSV file as a buffer
var _buffer = buffer_load( _filename );
var _size = buffer_get_size( _buffer );
buffer_seek( _buffer, buffer_seek_start, 0 );

//Handle byte order marks from some UTF-8 encoders (EF BB BF at the start of the file)
var _bom_a = buffer_read( _buffer, buffer_u8 );
var _bom_b = buffer_read( _buffer, buffer_u8 );
var _bom_c = buffer_read( _buffer, buffer_u8 );
if !( ( _bom_a == 239 ) and ( _bom_b == 187 ) and ( _bom_c == 191 ) ) {
	show_debug_message( "CAUTION: csv_to_grid: " + _filename + ": CSV file might not be UTF-8 encoded (no BOM)" );
	buffer_seek( _buffer, buffer_seek_start, 0 );
} else {
	_size -= 3;
}

//Iterate over the buffer
for( var _i = 0; _i < _size; _i++ ) {

	_prev_val = _val;
	var _val = buffer_read( _buffer, buffer_u8 );

	//Handle UTF-8 encoding
	if ( ( _val & 224 ) == 192 ) { //two-byte

		_val  = (                              _val & 31 ) <<  6;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 );
		_i++;

	} else if ( ( _val & 240 ) == 224 ) { //three-byte

		_val  = (                              _val & 15 ) << 12;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) <<  6;
		_val +=   buffer_read( _buffer, buffer_u8 ) & 63;
		_i += 2;

	} else if ( ( _val & 248 ) == 240 ) { //four-byte

		_val  = (                              _val &  7 ) << 18;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) << 12;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) <<  6;
		_val +=   buffer_read( _buffer, buffer_u8 ) & 63;
		_i += 3;

	}

	//If we've found a string delimiter
	if ( _val == _string_delimiter_ord ) {

		//This definitely isn't a decimal number!
		_is_decimal = false;

		//If we're in a string...
		if ( _in_string ) {

			//If the next character is a string delimiter itself, skip this character
			if ( buffer_peek( _buffer, buffer_tell( _buffer ), buffer_u8 ) == _string_delimiter_ord ) continue;

			//If the previous character is a string delimiter itself, add the string delimiter to the working string
			if ( _prev_val == _string_delimiter_ord ) {
			    _str += _string_delimiter;
			    continue;
			}

		}

		//Toggle "we're in a string" behaviour
		_in_string = !_in_string;
		continue;

	}
    
	if ( _newline_alt ) {
		var _newline = ( _val == 10 );
	} else {
		var _newline = ( _prev_val == 13 ) and ( _val == 10 );
        
		//If we've found a newline and we're in a string, skip over the chr(10) character
		if ( _in_string ) and ( _newline ) continue;
	}

	//If we've found a new cell
	if ( ( _val == _cell_delimiter_ord ) or ( _newline ) ) and ( !_in_string ) {

		_sheet_width++;

		//If this cell is now longer than the maximum width of the grid, expand the grid
		if ( _sheet_width > _max_width ) {

			_max_width = _sheet_width;
			ds_grid_resize( _grid, _max_width, _sheet_height );

			//Clear cells vertically above to overwrite the default 0-value
			if ( _sheet_height >= 2 ) ds_grid_set_region( _grid, _max_width-1, 0, _max_width-1, _sheet_height-2, "" );

		}

		//Write the working string to a grid cell
		if ( _is_decimal )
                {
                    if (_str == "") _str = 0; else _str = real( _str );
                }
                
		_grid[# _sheet_width-1, _sheet_height-1 ] = _str;

		_str = "";
		_in_string = false;
		_is_decimal = !_force_strings;

		//A newline outside of a string triggers a new line... unsurprisingly
		if ( _newline ) {

			//Clear cells horizontally to overwrite the default 0-value
			if ( _sheet_width < _max_width ) ds_grid_set_region( _grid, _sheet_width, _sheet_height-1, _max_width-1, _sheet_height-1, "" );

			_sheet_width = 0;
			_sheet_height++;
			ds_grid_resize( _grid, _max_width, _sheet_height );
		}

		continue;

	}

	//Check if we've read a "\n" dual-character
	if ( _prev_val == 92 ) and ( _val == 110 ) {
		_str = string_delete( _str, string_length( _str ), 1 ) + chr(13);
		continue;
	}
    
	//No newlines should appear outside of a string delimited cell
	if ( ( _val == 10 ) or ( _val == 13 ) ) and ( !_in_string ) continue;
    
	//Check if this character is outside valid decimal character range
	if ( _val != 45 ) and ( _val != 46 ) and ( ( _val < 48 ) or ( _val > 57 ) ) _is_decimal = false;

	//Finally add this character to the working string!
	_str += chr( _val );

}

//Catch hanging work string on end-of-file
if ( _str != "" ) {
	
	_sheet_width++;
	
	if ( _sheet_width > _max_width ) {
		_max_width = _sheet_width;
		ds_grid_resize( _grid, _max_width, _sheet_height );
		if ( _sheet_height >= 2 ) ds_grid_set_region( _grid, _max_width-1, 0, _max_width-1, _sheet_height-2, "" );
	}
	
	if ( _is_decimal ) _str = real( _str );
	_grid[# _sheet_width-1, _sheet_height-1 ] = _str;
	
}

//If the last character was a newline then we'll have an erroneous extra row at the bottom
if ( _newline ) ds_grid_resize( _grid, _max_width, _sheet_height-1 );

buffer_delete( _buffer );
return _grid;

Last edited by Juju (2020-08-19 06:43:18)

Offline

#2 2017-03-05 11:01:33

Juju
Member
Registered: 2015-10-01
Posts: 45

Re: csv_to_grid

GMS1

Expand///csv_to_grid( file, force strings, cell delimiter, string delimiter, mac newline )
//  
//  CAUTION: Please ensure your files are in UTF-8 encoding.
//  
//  You may pass <undefined> to use the default value for optional arguments.
//  arg0   string   Filename for the source UTF-8 CSV file
//  arg1   bool     Whether to force all cells to be a string. Defaults to false
//  arg2   string   The delimiter used to separate cells. Defaults to a comma
//  arg3   string   The delimiter used to define strings in the CSV file. Defaults to a double-quote
//  arg4   bool     Newline compatibility mode for Mac (0A). Defaults to Windows standard newline (0D,0A)
//  
//  (c) Juju Adams 26th May 2017
//  @jujuadams

//Handle arguments
if ( argument_count < 1 ) or ( argument_count > 5 ) {
	show_error( "Incorrect number of arguments (" + string( argument_count ) + ")", false );
	return undefined;
}

var _filename         = argument[0];
var _force_strings    = false;
var _cell_delimiter   = chr(44); //comma
var _string_delimiter = chr(34); //double-quote
var _newline_alt      = false;

if ( argument_count >= 2 ) and ( !is_undefined( argument[1] ) ) _force_strings    = argument[1];
if ( argument_count >= 3 ) and ( !is_undefined( argument[2] ) ) _cell_delimiter   = argument[2];
if ( argument_count >= 4 ) and ( !is_undefined( argument[3] ) ) _string_delimiter = argument[3];
if ( argument_count >= 5 ) and ( !is_undefined( argument[4] ) ) _newline_alt      = argument[4];

//Check for silliness...
if ( string_length( _cell_delimiter ) != 1 ) or ( string_length( _string_delimiter ) != 1 ) {
	show_error( "Delimiters must be one character", false );
	return undefined;
}

//More variables...
var _cell_delimiter_ord  = ord( _cell_delimiter  );
var _string_delimiter_ord = ord( _string_delimiter );

var _sheet_width  = 0;
var _sheet_height = 1;
var _max_width    = 0;

var _prev_val   = 0;
var _val        = 0;
var _str        = "";
var _in_string  = false;
var _is_decimal = !_force_strings;
var _grid       = ds_grid_create( 1, 1 ); _grid[# 0, 0 ] = "";

//Load CSV file as a buffer
var _buffer = buffer_load( _filename );
var _size = buffer_get_size( _buffer );
buffer_seek( _buffer, buffer_seek_start, 0 );

//Handle byte order marks from some UTF-8 encoders (EF BB BF at the start of the file)
var _bom_a = buffer_read( _buffer, buffer_u8 );
var _bom_b = buffer_read( _buffer, buffer_u8 );
var _bom_c = buffer_read( _buffer, buffer_u8 );
if !( ( _bom_a == 239 ) and ( _bom_b == 187 ) and ( _bom_c == 191 ) ) {
	show_debug_message( "CAUTION: csv_to_grid: " + _filename + ": CSV file might not be UTF-8 encoded (no BOM)" );
	buffer_seek( _buffer, buffer_seek_start, 0 );
} else {
	_size -= 3;
}

//Iterate over the buffer
for( var _i = 0; _i < _size; _i++ ) {

	_prev_val = _val;
	var _val = buffer_read( _buffer, buffer_u8 );

	//Handle UTF-8 encoding
	if ( ( _val & 224 ) == 192 ) { //two-byte

		_val  = (                              _val & 31 ) <<  6;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 );
		_i++;

	} else if ( ( _val & 240 ) == 224 ) { //three-byte

		_val  = (                              _val & 15 ) << 12;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) <<  6;
		_val +=   buffer_read( _buffer, buffer_u8 ) & 63;
		_i += 2;

	} else if ( ( _val & 248 ) == 240 ) { //four-byte

		_val  = (                              _val &  7 ) << 18;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) << 12;
		_val += ( buffer_read( _buffer, buffer_u8 ) & 63 ) <<  6;
		_val +=   buffer_read( _buffer, buffer_u8 ) & 63;
		_i += 3;

	}

	//If we've found a string delimiter
	if ( _val == _string_delimiter_ord ) {

		//This definitely isn't a decimal number!
		_is_decimal = false;

		//If we're in a string...
		if ( _in_string ) {

			//If the next character is a string delimiter itself, skip this character
			if ( buffer_peek( _buffer, buffer_tell( _buffer ), buffer_u8 ) == _string_delimiter_ord ) continue;

			//If the previous character is a string delimiter itself, add the string delimiter to the working string
			if ( _prev_val == _string_delimiter_ord ) {
			    _str += _string_delimiter;
			    continue;
			}

		}

		//Toggle "we're in a string" behaviour
		_in_string = !_in_string;
		continue;

	}
    
	if ( _newline_alt ) {
		var _newline = ( _val == 10 );
	} else {
		var _newline = ( _prev_val == 13 ) and ( _val == 10 );
        
		//If we've found a newline and we're in a string, skip over the chr(10) character
		if ( _in_string ) and ( _newline ) continue;
	}

	//If we've found a new cell
	if ( ( _val == _cell_delimiter_ord ) or ( _newline ) ) and ( !_in_string ) {

		_sheet_width++;

		//If this cell is now longer than the maximum width of the grid, expand the grid
		if ( _sheet_width > _max_width ) {

			_max_width = _sheet_width;
			ds_grid_resize( _grid, _max_width, _sheet_height );

			//Clear cells vertically above to overwrite the default 0-value
			if ( _sheet_height >= 2 ) ds_grid_set_region( _grid, _max_width-1, 0, _max_width-1, _sheet_height-2, "" );

		}

		//Write the working string to a grid cell
		if ( _is_decimal ) _str = real( _str );
		_grid[# _sheet_width-1, _sheet_height-1 ] = _str;

		_str = "";
		_in_string = false;
		_is_decimal = !_force_strings;

		//A newline outside of a string triggers a new line... unsurprisingly
		if ( _newline ) {

			//Clear cells horizontally to overwrite the default 0-value
			if ( _sheet_width < _max_width ) ds_grid_set_region( _grid, _sheet_width, _sheet_height-1, _max_width-1, _sheet_height-1, "" );

			_sheet_width = 0;
			_sheet_height++;
			ds_grid_resize( _grid, _max_width, _sheet_height );
		}

		continue;

	}

	//Check if we've read a "#" character
	if ( _prev_val == 35 ) {
		_str += chr(13);
		continue;
	}
    
	//No newlines should appear outside of a string delimited cell
	if ( ( _val == 10 ) or ( _val == 13 ) ) and ( !_in_string ) continue;
    
	//Check if this character is outside valid decimal character range
	if ( _val != 45 ) and ( _val != 46 ) and ( ( _val < 48 ) or ( _val > 57 ) ) _is_decimal = false;

	//Finally add this character to the working string!
	_str += chr( _val );

}

//Catch hanging work string on end-of-file
if ( _str != "" ) {
	
	_sheet_width++;
	
	if ( _sheet_width > _max_width ) {
		_max_width = _sheet_width;
		ds_grid_resize( _grid, _max_width, _sheet_height );
		if ( _sheet_height >= 2 ) ds_grid_set_region( _grid, _max_width-1, 0, _max_width-1, _sheet_height-2, "" );
	}
	
	if ( _is_decimal ) _str = real( _str );
	_grid[# _sheet_width-1, _sheet_height-1 ] = _str;
	
}

//If the last character was a newline then we'll have an erroneous extra row at the bottom
if ( _newline ) ds_grid_resize( _grid, _max_width, _sheet_height-1 );

buffer_delete( _buffer );
return _grid;

Last edited by Juju (2017-06-03 06:43:42)

Offline

#3 2017-03-05 11:02:07

Juju
Member
Registered: 2015-10-01
Posts: 45

Re: csv_to_grid

The "inverse".

Expand///grid_to_csv_string( grid, [field delimiter], [string delimiter], [newline] )
//  
//  arg0   ds_grid   The grid holding the data to be formatted/encoded to CSV
//  arg1   string    The field delimiter used to separate cells (cannot be a decimal point). Defaults to a comma
//  arg2   string    The string delimiter used to define strings in the CSV file. Defaults to a double-quote
//  arg3   string    The newline character(s) used to defined a new row. Defaults to the Windows standard (0D,0A)
//  
//  returns: A string containing a formatted CSV file, including line breaks
//  
//  (c) Juju Adams 5th March 2017 - All Rights Reserved
//  @jujuadams

var _grid             = undefined;
var _field_delimiter  = chr(44); //comma
var _string_delimiter = chr(34); //double-quote
var _newline          = chr(13) + chr(10); //default Windows (Notepad) newline

if ( argument_count <= 0 ) and ( argument_count > 4 ) {
    show_error( "Incorrect number of arguments (" + string( argument_count ) + ")", false );
    return "";
}

if ( argument_count >= 1 ) var _grid             = argument[0];
if ( argument_count >= 2 ) var _field_delimiter  = argument[1];
if ( argument_count >= 3 ) var _string_delimiter = argument[2];
if ( argument_count >= 4 ) var _newline          = argument[3];

if ( _field_delimiter == chr(46) ) { //decimal point
    show_error( "Field delimiter cannot be a decimal point!", false );
    return "";
}

var _str = "";
var _width  = ds_grid_width( _grid );
var _height = ds_grid_height( _grid );

for( var _y = 0; _y < _height; _y++ ) {
    for( var _x = 0; _x < _width; _x++ ) {
        
        var _value = _grid[# _x, _y ];
        if ( is_real( _value ) ) {
            _str += string( _value );
        } else if ( is_string( _value ) ) {
            if ( _value != "" ) { //Catch empty strings
                _str += _string_delimiter + string_replace_all( _value, _string_delimiter, _string_delimiter+_string_delimiter ) + _string_delimiter;
            }
        } else {
            show_debug_message( "ds_grid_to_csv: CAUTION - grid " + string( _grid ) + " value at " + string( _x ) + "," + string( _y ) + " is not a valid datatype!" );
        }
        
        if ( _x < _width-1 ) _str += _field_delimiter;
        
    }
    
    _str += _newline;
    
}

return _str;

Last edited by Juju (2017-12-12 05:31:37)

Offline

#4 2017-05-11 05:30:37

Juju
Member
Registered: 2015-10-01
Posts: 45

Re: csv_to_grid

Old GMS1 version.

Expand///csv_to_grid( file, [cell delimiter], [text delimiter] )
//  
//  Loads a .csv file into a ds_grid that is automatically sized to the data.
//  Useful for translations, dialogue trees, mods etc.
//  
//  January 2016
//  @jujuadams

//Collect the filename. You don't strictly have to do this but it's good practice for larger scripts in case you need to change things around

switch( argument_count ) {
    
    case 1:
        var file = argument[0];
        var cellDelimiter = "";
        var textDelimiter = "";
    break;
    
    case 2:
        var file = argument[0];
        var cellDelimiter = argument[1];
        var textDelimiter = "";
    break;
    
    case 3:
        var file = argument[0];
        var cellDelimiter = argument[1];
        var textDelimiter = argument[2];
    break;
    
    default:
        cout( "scr_juju_csv_load_to_grid: Error! Unsupported number of arguments" );
        return noone;
    break;
    
}

if ( cellDelimiter == "" ) cellDelimiter = ",";
if ( textDelimiter == "" ) textDelimiter = chr(34);


var cellDelimiterOrd = ord( cellDelimiter );
var textDelimiterOrd = ord( textDelimiter );

//
var buffer = buffer_create( 1, buffer_grow, 1 );
buffer_load_ext( buffer, file, 0 );
buffer_seek( buffer, buffer_seek_start, 0 );

//Initialise width and height of the spreadsheet
var sheetWidth = 0;
var sheetHeight = 1;

var prevVal = 0;
var nextVal = 0;
var val = 0;
var str = "";
var inText = false;
var grid = noone;

var size = buffer_get_size( buffer );
for( var i = 0; i < size; i++ ) {
    
    prevVal = val;
    var val = buffer_read( buffer, buffer_u8 );
    
    if ( val == 13 ) continue;
    
    if ( val == textDelimiterOrd ) {
        
        var nextVal = buffer_peek( buffer, buffer_tell( buffer ), buffer_u8 );
        
        if ( inText ) {
            if ( nextVal == textDelimiterOrd ) continue;
            if ( prevVal == textDelimiterOrd ) {
                str += textDelimiter;
                continue;
            }
        }
        
        inText = !inText;
        continue;
        
    }
    
    if ( inText ) and ( ( prevVal == 13 ) and ( val == 10 ) ) {   
        str += "#";
        continue;
    }
    
    if ( ( val == cellDelimiterOrd ) or ( ( prevVal == 13 ) and ( val == 10 ) ) ) and ( !inText ) {
        
        sheetWidth++;
        if ( grid == noone ) {
            grid = ds_grid_create( max( 1, sheetWidth ), max( 1, sheetHeight ) );
            ds_grid_clear( grid, "" );
        } else ds_grid_resize( grid, max( sheetWidth, ds_grid_width( grid ) ), sheetHeight );
        
        ds_grid_set( grid, sheetWidth - 1, sheetHeight - 1, str );
        str = "";
        inText = false;
        
        if ( val == 10 ) {
            sheetWidth = 0;
            sheetHeight++;
        }
        
        continue;
    }
    
    str += chr( val );
    
}

buffer_delete( buffer );

sheetWidth = ds_grid_width( grid );
sheetHeight = ds_grid_height( grid );
for( var yy = 0; yy < sheetHeight; yy++ ) {
    for( var xx = 0; xx < sheetWidth; xx++ ) {
        var val = ds_grid_get( grid, xx, yy );
        if ( !is_string( val ) ) ds_grid_set( grid, xx, yy, "" );
    }
}

//Return the grid, ready for use elsewhere
return grid;

Last edited by Juju (2017-05-26 03:54:53)

Offline

Board footer

Powered by FluxBB