[
Lists Home |
Date Index |
Thread Index
]
- To: <xml-dev@lists.xml.org>
- Subject: sax and entities
- From: "Scott Purcell" <spurcell@vertisinc.com>
- Date: Fri, 20 Jun 2003 07:52:31 -0500
- Thread-index: AcM3KtAPaa5szJq8Qjq/obq7LgL2sw==
- Thread-topic: sax and entities
Hello,
I am using the org.xml.sax classes and extending the DefaultHandler. See code below.
I am parsing the XML (below class file) which has entity references in the file: eg: ® .
The xml has entity references to turn ® to its equivalent. But instead of parsing, I get the following error:
exception: org.xml.sax.SAXParseException: invalid Character Entitiy
Has anyone had this issue with entities? Maybe I am handling this improperly?
Thanks,
Scott
// code
public xmlProductUploadParser(String filename, String db_id) throws Exception
{
System.out.println("FOO2");
this.filename = filename;
DefaultHandler handler = new xmlProductUploadParser(db_id);
// Use the validating parser
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(true);
System.out.println("FOO3");
try {
// Set up output stream
out = new OutputStreamWriter(System.out, "UTF8");
// Parse the input
SAXParser saxParser = factory.newSAXParser();
saxParser.parse( new File(filename), handler);
} catch (SAXParseException spe) {
// Error generated by the parser
System.out.println("\n** Parsing error" + ", line " + spe.getLineNumber()+ ", uri " + spe.getSystemId());
System.out.println(" " + spe.getMessage() );
// Use the contained exception, if any
Exception x = spe;
if (spe.getException() != null)
x = spe.getException();
x.printStackTrace();
} catch (SAXException sxe) {
// Error generated by this application
// (or a parser-initialization error)
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
x.printStackTrace();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();
} catch (IOException ioe) {
// I/O error
ioe.printStackTrace();
}
}
// dtd (partial)
<?xml version='1.0' encoding='ISO-8859-1' standalone='no'?>
<!DOCTYPE XML_PRODUCT_GROUP [<!ELEMENT XML_PRODUCT_GROUP (LANGUAGE, TEXT_DATA*)>
<!ELEMENT LANGUAGE (ORIGINAL_LANGUAGE, TRANSLATION_LANGUAGE, VERSION_NO, ORIGINAL_LANGUAGE_ID, TRANSLATION_LANGUAGE_ID)>
<!ELEMENT ORIGINAL_LANGUAGE (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE (#PCDATA)>
<!ELEMENT VERSION_NO (#PCDATA)>
<!ELEMENT ORIGINAL_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TEXT_DATA (PRODUCT_ID, TECH_SPEC_GRP_FOOTNOTE*, TECH_SPEC_GRP_INFO*, TECH_SPEC_INFO*, TECH_SPEC_FOOTNOTE*, TECH_SPEC_DATA_FOOTNOTE*, TECH_SPEC_DATA_INFO*, CAT_DATA*, DATA*, FOOTNOTE*, OPEN_META*)>
<!ELEMENT PRODUCT_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FOOTNOTE (TECH_SPEC_GRP_FNOTE_ID, TECH_SPEC_GRP_FNOTE)>
<!ELEMENT TECH_SPEC_GRP_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_INFO (TECH_SPEC_GRP_ID, HEADING, LONG_DESCRIPTION)>
<!ELEMENT TECH_SPEC_GRP_ID (#PCDATA)>
<!ELEMENT HEADING (#PCDATA)>
<!ELEMENT LONG_DESCRIPTION (#PCDATA)>
<!ELEMENT TECH_SPEC_INFO (TECH_SPEC_ID, TECH_SPEC_NAME)>
<!ELEMENT TECH_SPEC_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_NAME (#PCDATA)>
<!ELEMENT TECH_SPEC_FOOTNOTE (TECH_SPEC_FNOTEID, TECH_SPEC_FNOTE)>
<!ELEMENT TECH_SPEC_FNOTEID (#PCDATA)>
<!ELEMENT TECH_SPEC_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FOOTNOTE (TECH_SPEC_DATA_FNOTE_ID, TECH_SPEC_DATA_FNOTE)>
<!ELEMENT TECH_SPEC_DATA_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_INFO (TECH_SPEC_DATA_ID, TECH_SPEC_DATA_VALUETEXT)>
<!ELEMENT TECH_SPEC_DATA_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_VALUETEXT (#PCDATA)>
<!ELEMENT CAT_DATA (CAT_ARG_ID, CAT_ARG_NAME, CAT_ARG_NOTE)>
<!ELEMENT CAT_ARG_ID (#PCDATA)>
<!ELEMENT CAT_ARG_NAME (#PCDATA)>
<!ELEMENT CAT_ARG_NOTE (#PCDATA)>
<!ELEMENT DATA (DATA_ID, ATTRIBUTE_ID, TEXT_VALUE, SORT_ORDER)>
<!ELEMENT DATA_ID (#PCDATA)>
<!ELEMENT ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT TEXT_VALUE (#PCDATA)>
<!ELEMENT SORT_ORDER (#PCDATA)>
<!ELEMENT FOOTNOTE (FOOT_DATA_ID, FOOT_ATTRIBUTE_ID, FOOT_TEXT_VALUE, FOOT_SORT_ORDER)>
<!ELEMENT FOOT_DATA_ID (#PCDATA)>
<!ELEMENT FOOT_ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT FOOT_TEXT_VALUE (#PCDATA)>
<!ELEMENT FOOT_SORT_ORDER (#PCDATA)>
<!ELEMENT OPEN_META (OPEN_META_ID, OPEN_META_VALUE)>
<!ELEMENT OPEN_META_ID (#PCDATA)>
<!ELEMENT OPEN_META_VALUE (#PCDATA)>
<!ENTITY nbsp " ">
<!-- no-break space -->
<!ENTITY iexcl "¡">
<!-- inverted exclamation mark -->
<!ENTITY cent "¢">
<!-- cent sign -->
<!ENTITY pound "£">
<!-- pound sterling sign -->
<!ENTITY curren "¤">
<!-- general currency sign -->
<!ENTITY yen "¥">
<!-- yen sign -->
<!ENTITY brvbar "¦">
<!-- broken (vertical) bar -->
<!ENTITY sect "§">
<!-- section sign -->
<!ENTITY uml "¨">
<!-- umlaut (dieresis) -->
<!ENTITY copy "©">
<!-- copyright sign -->
<!ENTITY ordf "ª">
<!-- ordinal indicator, feminine -->
<!ENTITY laquo "«">
<!-- angle quotation mark, left -->
<!ENTITY not "¬">
<!-- not sign -->
<!ENTITY shy "">
<!-- soft hyphen -->
<!ENTITY reg "®">
<!-- registered sign -->
<!ENTITY macr "¯">
<!-- macron -->
<!ENTITY deg "°">
<!-- degree sign -->
<!ENTITY plusmn "±">
<!-- plus-or-minus sign -->
<!ENTITY sup2 "²">
<!-- superscript two -->
<!ENTITY sup3 "³">
<!-- superscript three -->
<!ENTITY acute "´">
<!-- acute accent -->
<!ENTITY micro "µ">
<!-- micro sign -->
<!ENTITY para "¶">
<!-- pilcrow (paragraph sign) -->
<!ENTITY middot "·">
<!-- middle dot -->
<!ENTITY cedil "¸">
<!-- cedilla -->
<!ENTITY sup1 "¹">
<!-- superscript one -->
<!ENTITY ordm "º">
<!-- ordinal indicator, masculine -->
<!ENTITY raquo "»">
<!-- angle quotation mark, right -->
<!ENTITY frac14 "¼">
<!-- fraction one-quarter -->
<!ENTITY frac12 "½">
<!-- fraction one-half -->
<!ENTITY frac34 "¾">
<!-- fraction three-quarters -->
<!ENTITY iquest "¿">
<!-- inverted question mark -->
<!ENTITY Agrave "À">
<!-- capital A, grave accent -->
<!ENTITY Aacute "Á">
<!-- capital A, acute accent -->
<!ENTITY Acirc "Â">
<!-- capital A, circumflex accent -->
<!ENTITY Atilde "Ã">
<!-- capital A, tilde -->
<!ENTITY Auml "Ä">
<!-- capital A, dieresis or umlaut mark -->
<!ENTITY Aring "Å">
<!-- capital A, ring -->
<!ENTITY AElig "Æ">
<!-- capital AE diphthong (ligature) -->
<!ENTITY Ccedil "Ç">
<!-- capital C, cedilla -->
<!ENTITY Egrave "È">
<!-- capital E, grave accent -->
<!ENTITY Eacute "É">
<!-- capital E, acute accent -->
<!ENTITY Ecirc "Ê">
<!-- capital E, circumflex accent -->
<!ENTITY Euml "Ë">
<!-- capital E, dieresis or umlaut mark -->
<!ENTITY Igrave "Ì">
<!-- capital I, grave accent -->
<!ENTITY Iacute "Í">
<!-- capital I, acute accent -->
<!ENTITY Icirc "Î">
<!-- capital I, circumflex accent -->
<!ENTITY Iuml "Ï">
<!-- capital I, dieresis or umlaut mark -->
<!ENTITY ETH "Ð">
<!-- capital Eth, Icelandic -->
<!ENTITY Ntilde "Ñ">
<!-- capital N, tilde -->
<!ENTITY Ograve "Ò">
<!-- capital O, grave accent -->
<!ENTITY Oacute "Ó">
<!-- capital O, acute accent -->
<!ENTITY Ocirc "Ô">
<!-- capital O, circumflex accent -->
<!ENTITY Otilde "Õ">
<!-- capital O, tilde -->
<!ENTITY Ouml "Ö">
<!-- capital O, dieresis or umlaut mark -->
<!ENTITY times "×">
<!-- multiply sign -->
<!ENTITY Oslash "Ø">
<!-- capital O, slash -->
<!ENTITY Ugrave "Ù">
<!-- capital U, grave accent -->
<!ENTITY Uacute "Ú">
<!-- capital U, acute accent -->
<!ENTITY Ucirc "Û">
<!-- capital U, circumflex accent -->
<!ENTITY Uuml "Ü">
<!-- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute "Ý">
<!-- capital Y, acute accent -->
<!ENTITY THORN "Þ">
<!-- capital THORN, Icelandic -->
<!ENTITY szlig "ß">
<!-- small sharp s, German (sz ligature) -->
<!ENTITY agrave "à">
<!-- small a, grave accent -->
<!ENTITY aacute "á">
<!-- small a, acute accent -->
<!ENTITY acirc "â">
<!-- small a, circumflex accent -->
<!ENTITY atilde "ã">
<!-- small a, tilde -->
<!ENTITY auml "ä">
<!-- small a, dieresis or umlaut mark -->
<!ENTITY aring "å">
<!-- small a, ring -->
<!ENTITY aelig "æ">
<!-- small ae diphthong (ligature) -->
<!ENTITY ccedil "ç">
<!-- small c, cedilla -->
<!ENTITY egrave "è">
<!-- small e, grave accent -->
<!ENTITY eacute "é">
<!-- small e, acute accent -->
<!ENTITY ecirc "ê">
<!-- small e, circumflex accent -->
<!ENTITY euml "ë">
<!-- small e, dieresis or umlaut mark -->
<!ENTITY igrave "ì">
<!-- small i, grave accent -->
<!ENTITY iacute "í">
<!-- small i, acute accent -->
<!ENTITY icirc "î">
<!-- small i, circumflex accent -->
<!ENTITY iuml "ï">
<!-- small i, dieresis or umlaut mark -->
<!ENTITY eth "ð">
<!-- small eth, Icelandic -->
<!ENTITY ntilde "ñ">
<!-- small n, tilde -->
<!ENTITY ograve "ò">
<!-- small o, grave accent -->
<!ENTITY oacute "ó">
<!-- small o, acute accent -->
<!ENTITY ocirc "ô">
<!-- small o, circumflex accent -->
<!ENTITY otilde "õ">
<!-- small o, tilde -->
<!ENTITY ouml "ö">
<!-- small o, dieresis or umlaut mark -->
<!ENTITY divide "÷">
<!-- divide sign -->
<!ENTITY oslash "ø">
<!-- small o, slash -->
<!ENTITY ugrave "ù">
<!-- small u, grave accent -->
<!ENTITY uacute "ú">
<!-- small u, acute accent -->
<!ENTITY ucirc "û">
<!-- small u, circumflex accent -->
<!ENTITY uuml "ü">
<!-- small u, dieresis or umlaut mark -->
<!ENTITY yacute "ý">
<!-- small y, acute accent -->
<!ENTITY thorn "þ">
<!-- small thorn, Icelandic -->
<!ENTITY yuml "ÿ">
<!-- small y, dieresis or umlaut mark -->
<!ENTITY bull "·">
<!-- bullet -->
<!ENTITY trade "(tm)">
<!--trade mark-->
<!ENTITY ndash "-">
<!--En dash -->
<!ENTITY ldquo """>
<!--EM left dbl quote than -->
<!ENTITY rdquo """>
<!--EM right dbl quote -->
<!ENTITY lsquo "'">
<!--EM left sing quote -->
<!ENTITY rsquo "'">
<!--EM right sing quote -->
<!ENTITY hellp "...">
<!--EM elipse -->
<!ENTITY mdash "-">
<!--EM dash -->
]>
<XML_PRODUCT_GROUP>
<LANGUAGE>
<ORIGINAL_LANGUAGE>English</ORIGINAL_LANGUAGE>
<TRANSLATION_LANGUAGE>Portugese</TRANSLATION_LANGUAGE>
<VERSION_NO>5</VERSION_NO>
<ORIGINAL_LANGUAGE_ID>eng</ORIGINAL_LANGUAGE_ID>
<TRANSLATION_LANGUAGE_ID>por</TRANSLATION_LANGUAGE_ID>
</LANGUAGE>
|