[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RE: Images embedded in XML
- From: "Al B. Snell" <alaric@alaric-snell.com>
- To: Eric Bohlman <ebohlman@earthlink.net>
- Date: Sun, 08 Apr 2001 13:14:10 +0100 (BST)
On Sun, 8 Apr 2001, Eric Bohlman wrote:
> IMHO that's an extremely unfair comparison. I very much doubt that in a few
> minutes you could come up with a reliable, tested, general-purpose data
> serializer, as opposed to a quick application-specific hack.
The serializer is the *easy* part :-)
Here's some untested C code for serialiser and SAX-like parser:
// Nodes (like XML elements)
#define TAG_NODE_START 0
#define TAG_NODE_END 1
// UTF-8 string
#define TAG_STRING 2
// Signed and unsigned 32-bit integers
#define TAG_S32 3
#define TAG_U32 4
// Comment
#define TAG_COMMENT 5
// Declare namespace for node names
#define TAG_NAMESAPCE 6
// End of the data stream
#define TAG_END 255
// C is a bit lame - these are machine dependent :-(
typedef unsigned long int U32;
typedef signed long int S32;
// Serialiser:
class Serialiser {
protected:
FILE *fp; // what we write to
short int namespace_counter; // namespace number allocator
void _write_tag(int elemtype) {
fputc(elemtype,fp);
}
void _write_string(char *string) {
U32 len = htonl(strlen(string));
fwrite(&len,1,4,fp);
}
public:
Serialiser(FILE *_fp) {
fp = _fp;
namespace_counter = 0;
}
void write_node_start(char *node_type,short int namespace) {
_write_tag(fp,TAG_NODE_START);
namespace = htons(namespace);
fwrite(&namespace,1,2,fp);
_write_string(fp,node_type);
}
void write_node_end() {
_write_tag(fp,TAG_NODE_END);
}
void write_string(char *string) {
_write_tag(fp,TAG_STRING);
_write_string(fp,string);
}
void write_S32(S32 i) {
_write_tag(fp,TAG_S32);
i = htonl(i);
fwrite(&i,1,4,fp);
}
void write_U32(U32 i) {
_write_tag(fp,TAG_U32);
i = htonl(i);
fwrite(&i,1,4,fp);
}
void write_comment(char *string) {
_write_tag(fp,TAG_STRING);
_write_string(fp,string);
}
short int write_namespace(char *uri) {
_write_tag(fp,TAG_NAMESPACE);
_write_string(fp,uri);
return namespace_counter++;
}
}
void write_end(FILE *fp) {
_write_tag(fp,TAG_END);
}
};
// Parser
char *_read_string(FILE *fp) {
U32 len;
char *str;
fread(&len,1,4,fp);
len = ntohl(len);
str = malloc(len+1);
assert(str); // Throw an exception of we fail to malloc
fread(str,1,len,fp);
str[len] = '\0'; // close the string
return str;
}
class EventSink {
// All strings passed in are malloced and need to be free()ed one
// day
virtual void start_node(char *nodename,short int namespace,char
*namespace_uri) = 0;
virtual void end_node() = 0;
virtual void string(char *str) = 0;
virtual void S32(S32 i) = 0;
virtual void U32(U32 i) = 0;
virtual void comment(char *string) { /* do nothing by default */ }
virtual void namespace(short int namespace, char *uri) { /* do
nothing by default */ }
};
// This is a hack; a better implementation would use a growable array
#define MAX_NAMESPACES 256
void parse(FILE *fp,EventSink &sink) {
int tag;
S32 s32;
U32 u32;
short int namespace;
char *string;
char *namespaces[MAX_NAMESPACES];
short int namespace_counter;
do {
tag = fgetc(*fp);
switch(tag) {
case TAG_NODE_START:
string = _read_string(fp);
fread(&namespace,1,2,fp);
namespace = ntohs(namespace);
assert(namespace < namespace_counter); // No buffer
// overruns, please!
sink.start_node(string,namespace,namespaces[namespace]);
break;
case TAG_NODE_END:
sink.end_node();
break;
case TAG_STRING:
sink.string(_read_string(fp));
break;
case TAG_U32:
fread(&u32,1,4,fp);
sink.U32(ntohl(u32));
break;
case TAG_S32:
fread(&s32,1,4,fp);
sink.S32(ntohl(s32));
break;
case TAG_COMMENT:
sink.comment(_read_string(fp));
break;
case TAG_NAMESPACE:
string = _read_string(fp);
namespaces[namespace_counter] = string;
sink.namespace(namespace_counter,string);
namespace_counter++;
break;
case TAG_END:
break;
default:
// throw an exception?
assert(1==2);
}
} while (tag != TAG_END);
for(int i=0;i<namespace_counter;i++)
free(namespaces[i]);
}
> Most of the time
> spent writing an XML parser isn't spent on bare-metal coding; it's spent on
> the less geeky aspects of programming like testing, requirements analysis, API
> documentation and development (the API that's only for your own personal use
> is always the easiest to develop, since you already have it internalized),
> design for maintainability, and maintenance.
Yes, I'm not disputing that, but if we compare like for like (actual
coding time on both sides - the time spent documenting and friends is
usually fairly proportional to this), the above code is simpler than an
XML parser of equivelant feature-set (I could add PIs and entity
declarations/references to it in another hour, too)
> Application-specific hacks are always more efficient than
> general-purpose libraries when the entire project is under the
> complete control of a lonergeek cowboy coder who gets to define all
> the requirements himself and who's going to be the only person ever to
> look at the code. In the real world, things are a little different.
But a binary format doesn't need to be application-specific! That's not
what I'm talking about here! I'm talking about defining a binary format
that *replaces textual XML as we know it*.
ABS
--
Alaric B. Snell
http://www.alaric-snell.com/ http://RFC.net/ http://www.warhead.org.uk/
Any sufficiently advanced technology can be emulated in software