You are not logged in.
Pages: 1
/// xml_to_json( file/buffer, [verbose?] )
//
// Basic XML to JSON converter. Not particularly robust but does the job for dev-facing applications.
// This script can be given either: a file path for loading directly from a file; or a buffer index
// that contains XML content (buffer mode is useful for handling REST API responses).
//
// Note that some more advanced XML functionality is not supported.
//
// Nodes are maps within a JSON. Attributes are affixed with an @ character
// e.g. "id=12" will be stored as "@id:0"
//
// No value is stored in the JSON as a true real number. Numeric information in the XML file is
// stored to the JSON as a string.
//
// As GM lacks the inverse of ds_map_add_map/ds_map_add_list, lists are stored in ds_map by using
// the usual integer index and adding 0.1 to that number. To check if a value in a ds_map is a
// linked map or linked list, use "if (floor(value)==value)" to check for a linked map and the
// logical opposite "!=" for a linked list.
//
// file/buffer a file path (string) or a buffer index
// [verbose?] send processing info to the compile form, boolean/optional (defaults false)
//
/// GMLscripts.com/license
var in = argument[0];
if ( argument_count >= 2 ) var verbose = argument[1] else var verbose = false;
var scriptName = "scr_juju_xml_to_json(" + string( in ) + ")";
//Treat the input as a file name/location
if ( is_string( in ) ) {
var f = file_bin_open( in, 0 );
if ( f < 0 ) {
show_debug_message( scriptName + ": Error! " + in + " cannot be opened!" );
return noone;
}
if ( verbose ) show_debug_message( scriptName + ": Loading " + in + ", size = " + string( file_bin_size( f ) ) );
var buffer = buffer_create( file_bin_size( f ), buffer_grow, 1 );
file_bin_close( f );
buffer_load_ext( buffer, in, 0 );
//Treat the input as a buffer
} else {
buffer = in;
}
buffer_seek( buffer, buffer_seek_start, 0 );
if ( verbose ) show_debug_message( scriptName + ": Buffer is " + string( buffer_get_size( buffer ) ) + " bytes" );
//Find ASCII codes for important functional symbols
var ordTagOpen = ord( "<" );
var ordTagClose = ord( ">" );
var ordQMark = ord( "?" );
var ordTerminate = ord( "/" );
var ordDQuote = ord( '"' );
var ordSQuote = ord( "'" );
var ordSpace = ord( " " );
var ordEquals = ord( "=" );
//Create JSON root
var json = ds_map_create();
ds_map_add( json, "_GMname", "_root" );
ds_map_add( json, "_GMparent", json );
//Set up state variables
var map = json;
var parentMap = undefined;
var insideTag = false;
var mapName = "";
var insideQuery = false;
var namingMap = false;
var insideString = false;
var keyName = "";
var cache = "";
var outerCache = "";
var terminate = false;
var get = undefined;
var list = undefined;
var val = undefined;
//Keep reading values until EoF
repeat( buffer_get_size( buffer ) ) {
//Strip a value off a buffer
val = buffer_read( buffer, buffer_u8 );
//If the character is a " quote mark
if ( ( val == ordDQuote ) or ( val == ordSQuote ) ) and ( insideTag ) {
insideString = !insideString;
} else {
//If we're inside a string
if ( insideString ) {
//Add the character to the cache
cache += chr( val );
} else {
//Branch out depending on what character was inputted
switch ( val ) {
case ordTagOpen: // <
if ( insideTag ) and ( verbose ) show_debug_message( scriptName + ": Warning! OpenTag inside tag!" );
insideTag = true;
namingMap = true;
keyName = "";
cache = "";
terminate = false;
insideQuery = false;
break;
case ordTagClose: // >
case ordSpace:
if ( val == ordSpace ) and ( !insideTag ) {
//If we've seen a space outside a tag, treat the space as part of a cache
outerCache += " ";
break;
}
//If we're inside a tag and we're not terminating this node in the JSON (i.e. we're specifying attributes of the node)
if ( insideTag ) and ( !terminate ) {
//If we're naming the node (the first attribute in the tag)
if ( namingMap ) {
if ( verbose ) show_debug_message( scriptName + ": Adding <" + cache + "> to <" + ds_map_find_value( map, "_GMname" ) + ">" );
//Juggle the state variables
mapName = cache;
cache = "";
namingMap = false;
parentMap = map;
//Add a new node and add some internal attributes (used to more efficiently traverse the JSON)
map = ds_map_create();
ds_map_add( map, "_GMname", mapName );
ds_map_add( map, "_GMparent", parentMap );
//Try to find this new node in the parent node
get = ds_map_find_value( parentMap, mapName );
//If the new node isn't already in the parent node
if ( is_undefined( get ) ) {
ds_map_add_map( parentMap, mapName, map );
//If the new node is a real number (i.e. there is already a ds_map or ds_list)
} else if ( !is_string( get ) ) {
//Dirty hack to tell if a value in the JSON is a ds_map or ds_list - ds_list have a fractional part (0.1) whereas ds_map don't
if ( floor( get ) == get ) {
//Create a list to replace the ds_map in the parent node. Note addition of fractional part!
list = ds_list_create() + 0.1;
ds_list_add( list, get ); ds_list_mark_as_map( list, ds_list_size( list ) - 1 ); //Add old value to the list
//Do a merry song and dance to replace the node in the parent (you can't straight up change a map for a list so we use an intermediate real value)
ds_map_replace( parentMap, mapName, noone );
ds_map_delete( parentMap, mapName );
ds_map_add_list( parentMap, mapName, list );
get = list;
if ( verbose ) show_debug_message( scriptName + ": Replacing <" + mapName + "> with list " + string( list ) );
}
//Add this new node to the list
ds_list_add( get, map ); ds_list_mark_as_map( get, ds_list_size( get ) - 1 );
if ( verbose ) show_debug_message( scriptName + ": Adding new entry to list " + string( list ) );
//If the node in the parent is actually a string - we treat this effectively the same as replace a ds_map above
} else {
//Create a list to replace the ds_map in the parent node. Note addition of fractional part!
list = ds_list_create() + 0.1;
ds_list_add( list, get ); //Add old value to the list
//Do a merry song and dance to replace the node in the parent (you can't straight up change a map for a list so we use an intermediate real value)
ds_map_replace( parentMap, mapName, noone );
ds_map_delete( parentMap, mapName );
ds_map_add_list( parentMap, mapName, list );
//Add this new node to the list
ds_list_add( list, map ); ds_list_mark_as_map( get, ds_list_size( list ) - 1 );
if ( verbose ) {
show_debug_message( scriptName + ": Replacing <" + mapName + "> with list " + string( list ) );
show_debug_message( scriptName + ": Adding new entry to list " + string( list ) );
}
}
//If we're not naming the node then we're adding an attribute
} else {
if ( keyName != "" ) {
keyName = "@" + keyName; //Affix an @ to the front to indicate an attribute
ds_map_add( map, keyName, cache );
keyName = "";
cache = "";
if ( verbose ) show_debug_message( scriptName + ": Adding " + keyName + "=" + cache + " to <" + ds_map_find_value( map, "_GMname" ) + ">" );
}
}
}
//If we've seen the close tag symbol rather than just a space
if ( val == ordTagClose ) { // <
if ( !insideTag ) and ( verbose ) show_debug_message( scriptName + ": Warning! CloseTag outisde tag!" );
//If the tag is being terminated or we're inside a header
if ( terminate ) or ( insideQuery ) {
if ( verbose ) show_debug_message( scriptName + ": Terminating <" + mapName + ">" );
if ( mapName != ds_map_find_value( map, "_GMname" ) ) and ( verbose ) show_debug_message( scriptName + ": Warning! Termiante mismatch to map name <" + ds_map_find_value( map, "_GMname" ) + ">" );
//If this node has no children nor attributes (remember that all nodes start with two internal properties _GMname and _GMparent)
if ( ds_map_size( map ) <= 2 ) {
//Try to find this new node in the parent node
get = ds_map_find_value( parentMap, mapName );
if ( verbose ) show_debug_message( scriptName + ": Looking for <" + mapName + "> in <" + ds_map_find_value( parentMap, "_GMname" ) + "> = " + string( get ) );
//Dirty hack to tell if a value in the JSON is a ds_map or ds_list - ds_list have a fractional part (0.1) whereas ds_map don't
if ( floor( get ) == get ) {
//Add the contents of the outerCache (outside a tag) as a string to the parent ds_map
ds_map_replace( parentMap, mapName, outerCache );
if ( verbose ) show_debug_message( scriptName + ": <" + mapName + '> empty, setting contents of tag in parent to "' + outerCache + '"' );
} else {
//Add the contents of the outerCache (outside a tag) as a string to the parent ds_list
ds_list_replace( get, ds_list_size( get ) - 1, outerCache );
if ( verbose ) show_debug_message( scriptName + ": <" + mapName + '> empty, adding "' + outerCache + '" to list ' + string( get ) );
}
//Remove this node - we don't need it any more
ds_map_destroy( map );
//If this node has some children or some attributes, clear up its internal properties
} else {
ds_map_delete( map, "_GMname" );
ds_map_delete( map, "_GMparent" );
}
//Traverse up a layer, back to the parent
map = parentMap;
mapName = ds_map_find_value( map, "_GMname" );
parentMap = ds_map_find_value( map, "_GMparent" );
}
//Reset some state variables
outerCache = "";
cache = "";
insideTag = false;
insideQuery = false;
}
break;
case ordQMark: // ?
if ( insideTag ) {
if ( namingMap ) cache += "?";
insideQuery = true;
} else {
outerCache += chr( val ); //If we're outside a tag, add the character to a different cache
}
break;
case ordEquals: // =
if ( insideTag ) and ( !namingMap ) {
keyName = cache;
cache = "";
} else {
cache += chr( val );
if ( !insideTag ) outerCache += chr( val ); //If we're outside a tag, add the character to a different cache
}
break;
case ordTerminate: // /
if ( insideTag ) {
terminate = true;
} else {
cache += chr( val );
if ( !insideTag ) outerCache += chr( val ); //If we're outside a tag, add the character to a different cache
}
break;
default: //Any other character
cache += chr( val );
if ( !insideTag ) outerCache += chr( val ); //If we're outside a tag, add the character to a different cache
break;
}
}
}
}
//Clean up the root of the JSON
ds_map_delete( json, "_GMname" );
ds_map_delete( json, "_GMparent" );
//Reset or delete the buffer
if ( is_string( in ) ) buffer_delete( buffer ) else buffer_seek( buffer, buffer_seek_start, 0 );
return json;
Last edited by Juju (2016-05-12 09:54:57)
Offline
Bit of a monster this one and not entirely a robust solution. The dirty real value hack to get GM to differentiate ds_map_add_map and ds_map_add_list is something that very well could break without warning in the future!
Still, it works most of the time and is good enough to use for controlled situations (dialogue tree/localisation file etc).
Last edited by Juju (2016-02-27 15:52:18)
Offline
I would have kissed you full on the mouth if you had given me this 12 months ago.
Abusing forum power since 1986.
Offline
Never too late xoxo
Any suggestions on things that need to be added? I realise that this has no error checking whatsoever which is a weakness.
Offline
Bug fix.
Offline
Another bug fix.
Offline
Pages: 1