OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: Images embedded in XML



On Sun, 8 Apr 2001, Eric Bohlman wrote:

> IMHO that's an extremely unfair comparison.  I very much doubt that in a few 
> minutes you could come up with a reliable, tested, general-purpose data 
> serializer, as opposed to a quick application-specific hack. 

The serializer is the *easy* part :-)

Here's some untested C code for serialiser and SAX-like parser:

// Nodes (like XML elements)
#define TAG_NODE_START 0
#define TAG_NODE_END 1

// UTF-8 string
#define TAG_STRING 2

// Signed and unsigned 32-bit integers
#define TAG_S32 3
#define TAG_U32 4

// Comment
#define TAG_COMMENT 5

// Declare namespace for node names
#define TAG_NAMESAPCE 6

// End of the data stream
#define TAG_END 255

// C is a bit lame - these are machine dependent :-(
typedef unsigned long int U32;
typedef signed long int S32;

// Serialiser:

class Serialiser {
protected:
	FILE *fp; // what we write to
	short int namespace_counter; // namespace number allocator

	void _write_tag(int elemtype) {
		fputc(elemtype,fp);
	}

	void _write_string(char *string) {
		U32 len = htonl(strlen(string));
		fwrite(&len,1,4,fp);
	}

public:
	Serialiser(FILE *_fp) {
		fp = _fp;
		namespace_counter = 0;
	}
	

	void write_node_start(char *node_type,short int namespace) {
		_write_tag(fp,TAG_NODE_START);
		namespace = htons(namespace);
		fwrite(&namespace,1,2,fp);
		_write_string(fp,node_type);
	}

	void write_node_end() {
		_write_tag(fp,TAG_NODE_END);
	}

	void write_string(char *string) {
		_write_tag(fp,TAG_STRING);
		_write_string(fp,string);
	}

	void write_S32(S32 i) {
		_write_tag(fp,TAG_S32);
		i = htonl(i);
		fwrite(&i,1,4,fp);
	}

	void write_U32(U32 i) {
		_write_tag(fp,TAG_U32);
		i = htonl(i);
		fwrite(&i,1,4,fp);
	}

	void write_comment(char *string) {
		_write_tag(fp,TAG_STRING);
		_write_string(fp,string);
	}

	short int write_namespace(char *uri) {
		_write_tag(fp,TAG_NAMESPACE);
		_write_string(fp,uri);
		return namespace_counter++;
	}
	
}

void write_end(FILE *fp) {
	_write_tag(fp,TAG_END);
}

};

// Parser

char *_read_string(FILE *fp) {
	U32 len;
	char *str;
	fread(&len,1,4,fp);
	len = ntohl(len);
	str = malloc(len+1);
	assert(str); // Throw an exception of we fail to malloc
	fread(str,1,len,fp);
	str[len] = '\0'; // close the string
	return str;
}

class EventSink {
	// All strings passed in are malloced and need to be free()ed one
	// day
	virtual void start_node(char *nodename,short int namespace,char 
		*namespace_uri) = 0;
	virtual void end_node() = 0;
	virtual void string(char *str) = 0;
	virtual void S32(S32 i) = 0;
	virtual void U32(U32 i) = 0;
	virtual void comment(char *string) { /* do nothing by default */ }
	virtual void namespace(short int namespace, char *uri) { /* do
		nothing by default */ }
};

// This is a hack; a better implementation would use a growable array
#define MAX_NAMESPACES 256

void parse(FILE *fp,EventSink &sink) {
	int tag;
	S32 s32;
	U32 u32;
	short int namespace;
	char *string;
	char *namespaces[MAX_NAMESPACES];
	short int namespace_counter;
	do {
		tag = fgetc(*fp);
		switch(tag) {
		case TAG_NODE_START:
		string = _read_string(fp);
		fread(&namespace,1,2,fp);
		namespace = ntohs(namespace);
		assert(namespace < namespace_counter); // No buffer
						// overruns, please!
		sink.start_node(string,namespace,namespaces[namespace]);
		break;

		case TAG_NODE_END:
		sink.end_node();
		break;

		case TAG_STRING:
		sink.string(_read_string(fp));
		break;

		case TAG_U32:
		fread(&u32,1,4,fp);
		sink.U32(ntohl(u32));
		break;

		case TAG_S32:
		fread(&s32,1,4,fp);
		sink.S32(ntohl(s32));
		break;

		case TAG_COMMENT:
		sink.comment(_read_string(fp));
		break;

		case TAG_NAMESPACE:
		string = _read_string(fp);
		namespaces[namespace_counter] = string;
		sink.namespace(namespace_counter,string);
		namespace_counter++;
		break;

		case TAG_END:
		break;

		default:
		// throw an exception?
		assert(1==2);

		}
	} while (tag != TAG_END);

	for(int i=0;i<namespace_counter;i++)
		free(namespaces[i]);
}

> Most of the time 
> spent writing an XML parser isn't spent on bare-metal coding; it's spent on 
> the less geeky aspects of programming like testing, requirements analysis, API 
> documentation and development (the API that's only for your own personal use 
> is always the easiest to develop, since you already have it internalized), 
> design for maintainability, and maintenance. 

Yes, I'm not disputing that, but if we compare like for like (actual
coding time on both sides - the time spent documenting and friends is
usually fairly proportional to this), the above code is simpler than an
XML parser of equivelant feature-set (I could add PIs and entity
declarations/references to it in another hour, too)

> Application-specific hacks are always more efficient than
> general-purpose libraries when the entire project is under the
> complete control of a lonergeek cowboy coder who gets to define all
> the requirements himself and who's going to be the only person ever to
> look at the code.  In the real world, things are a little different.

But a binary format doesn't need to be application-specific! That's not
what I'm talking about here! I'm talking about defining a binary format
that *replaces textual XML as we know it*.
 
ABS

-- 
                               Alaric B. Snell
 http://www.alaric-snell.com/  http://RFC.net/  http://www.warhead.org.uk/
   Any sufficiently advanced technology can be emulated in software