OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

 


 

   sax and entities

[ Lists Home | Date Index | Thread Index ]
  • To: <xml-dev@lists.xml.org>
  • Subject: sax and entities
  • From: "Scott Purcell" <spurcell@vertisinc.com>
  • Date: Fri, 20 Jun 2003 07:52:31 -0500
  • Thread-index: AcM3KtAPaa5szJq8Qjq/obq7LgL2sw==
  • Thread-topic: sax and entities

Hello,
I am using the org.xml.sax classes and extending the DefaultHandler. See code below.

I am parsing the XML (below class file) which has entity references in the file: eg: &reg; .
The xml has entity references to turn &reg to its equivalent. But instead of parsing, I get the following error:
exception: org.xml.sax.SAXParseException: invalid Character Entitiy

Has anyone had this issue with entities? Maybe I am handling this improperly?

Thanks,
Scott

// code
public xmlProductUploadParser(String filename, String db_id) throws Exception
{
System.out.println("FOO2");
this.filename = filename;

DefaultHandler handler = new xmlProductUploadParser(db_id);
// Use the validating parser
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(true);
System.out.println("FOO3");
try {
// Set up output stream
out = new OutputStreamWriter(System.out, "UTF8");

// Parse the input
SAXParser saxParser = factory.newSAXParser();
saxParser.parse( new File(filename), handler);

} catch (SAXParseException spe) {
// Error generated by the parser
System.out.println("\n** Parsing error" + ", line " + spe.getLineNumber()+ ", uri " + spe.getSystemId());
System.out.println(" " + spe.getMessage() );

// Use the contained exception, if any
Exception x = spe;
if (spe.getException() != null)
x = spe.getException();
x.printStackTrace();

} catch (SAXException sxe) {
// Error generated by this application
// (or a parser-initialization error)
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
x.printStackTrace();

} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();

} catch (IOException ioe) {
// I/O error
ioe.printStackTrace();
}

}


// dtd (partial)
<?xml version='1.0' encoding='ISO-8859-1' standalone='no'?>
<!DOCTYPE XML_PRODUCT_GROUP [<!ELEMENT XML_PRODUCT_GROUP (LANGUAGE, TEXT_DATA*)>
<!ELEMENT LANGUAGE (ORIGINAL_LANGUAGE, TRANSLATION_LANGUAGE, VERSION_NO, ORIGINAL_LANGUAGE_ID, TRANSLATION_LANGUAGE_ID)>
<!ELEMENT ORIGINAL_LANGUAGE (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE (#PCDATA)>
<!ELEMENT VERSION_NO (#PCDATA)>
<!ELEMENT ORIGINAL_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TEXT_DATA (PRODUCT_ID, TECH_SPEC_GRP_FOOTNOTE*, TECH_SPEC_GRP_INFO*, TECH_SPEC_INFO*, TECH_SPEC_FOOTNOTE*, TECH_SPEC_DATA_FOOTNOTE*, TECH_SPEC_DATA_INFO*, CAT_DATA*, DATA*, FOOTNOTE*, OPEN_META*)>
<!ELEMENT PRODUCT_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FOOTNOTE (TECH_SPEC_GRP_FNOTE_ID, TECH_SPEC_GRP_FNOTE)>
<!ELEMENT TECH_SPEC_GRP_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_INFO (TECH_SPEC_GRP_ID, HEADING, LONG_DESCRIPTION)>
<!ELEMENT TECH_SPEC_GRP_ID (#PCDATA)>
<!ELEMENT HEADING (#PCDATA)>
<!ELEMENT LONG_DESCRIPTION (#PCDATA)>
<!ELEMENT TECH_SPEC_INFO (TECH_SPEC_ID, TECH_SPEC_NAME)>
<!ELEMENT TECH_SPEC_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_NAME (#PCDATA)>
<!ELEMENT TECH_SPEC_FOOTNOTE (TECH_SPEC_FNOTEID, TECH_SPEC_FNOTE)>
<!ELEMENT TECH_SPEC_FNOTEID (#PCDATA)>
<!ELEMENT TECH_SPEC_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FOOTNOTE (TECH_SPEC_DATA_FNOTE_ID, TECH_SPEC_DATA_FNOTE)>
<!ELEMENT TECH_SPEC_DATA_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_INFO (TECH_SPEC_DATA_ID, TECH_SPEC_DATA_VALUETEXT)>
<!ELEMENT TECH_SPEC_DATA_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_VALUETEXT (#PCDATA)>
<!ELEMENT CAT_DATA (CAT_ARG_ID, CAT_ARG_NAME, CAT_ARG_NOTE)>
<!ELEMENT CAT_ARG_ID (#PCDATA)>
<!ELEMENT CAT_ARG_NAME (#PCDATA)>
<!ELEMENT CAT_ARG_NOTE (#PCDATA)>
<!ELEMENT DATA (DATA_ID, ATTRIBUTE_ID, TEXT_VALUE, SORT_ORDER)>
<!ELEMENT DATA_ID (#PCDATA)>
<!ELEMENT ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT TEXT_VALUE (#PCDATA)>
<!ELEMENT SORT_ORDER (#PCDATA)>
<!ELEMENT FOOTNOTE (FOOT_DATA_ID, FOOT_ATTRIBUTE_ID, FOOT_TEXT_VALUE, FOOT_SORT_ORDER)>
<!ELEMENT FOOT_DATA_ID (#PCDATA)>
<!ELEMENT FOOT_ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT FOOT_TEXT_VALUE (#PCDATA)>
<!ELEMENT FOOT_SORT_ORDER (#PCDATA)>
<!ELEMENT OPEN_META (OPEN_META_ID, OPEN_META_VALUE)>
<!ELEMENT OPEN_META_ID (#PCDATA)>
<!ELEMENT OPEN_META_VALUE (#PCDATA)>
<!ENTITY nbsp " ">
<!-- no-break space -->
<!ENTITY iexcl "">
<!-- inverted exclamation mark -->
<!ENTITY cent "">
<!-- cent sign -->
<!ENTITY pound "">
<!-- pound sterling sign -->
<!ENTITY curren "">
<!-- general currency sign -->
<!ENTITY yen "">
<!-- yen sign -->
<!ENTITY brvbar "">
<!-- broken (vertical) bar -->
<!ENTITY sect "">
<!-- section sign -->
<!ENTITY uml "">
<!-- umlaut (dieresis) -->
<!ENTITY copy "">
<!-- copyright sign -->
<!ENTITY ordf "">
<!-- ordinal indicator, feminine -->
<!ENTITY laquo "">
<!-- angle quotation mark, left -->
<!ENTITY not "">
<!-- not sign -->
<!ENTITY shy "">
<!-- soft hyphen -->
<!ENTITY reg "">
<!-- registered sign -->
<!ENTITY macr "">
<!-- macron -->
<!ENTITY deg "">
<!-- degree sign -->
<!ENTITY plusmn "">
<!-- plus-or-minus sign -->
<!ENTITY sup2 "">
<!-- superscript two -->
<!ENTITY sup3 "">
<!-- superscript three -->
<!ENTITY acute "">
<!-- acute accent -->
<!ENTITY micro "">
<!-- micro sign -->
<!ENTITY para "">
<!-- pilcrow (paragraph sign) -->
<!ENTITY middot "">
<!-- middle dot -->
<!ENTITY cedil "">
<!-- cedilla -->
<!ENTITY sup1 "">
<!-- superscript one -->
<!ENTITY ordm "">
<!-- ordinal indicator, masculine -->
<!ENTITY raquo "">
<!-- angle quotation mark, right -->
<!ENTITY frac14 "">
<!-- fraction one-quarter -->
<!ENTITY frac12 "">
<!-- fraction one-half -->
<!ENTITY frac34 "">
<!-- fraction three-quarters -->
<!ENTITY iquest "">
<!-- inverted question mark -->
<!ENTITY Agrave "">
<!-- capital A, grave accent -->
<!ENTITY Aacute "">
<!-- capital A, acute accent -->
<!ENTITY Acirc "">
<!-- capital A, circumflex accent -->
<!ENTITY Atilde "">
<!-- capital A, tilde -->
<!ENTITY Auml "">
<!-- capital A, dieresis or umlaut mark -->
<!ENTITY Aring "">
<!-- capital A, ring -->
<!ENTITY AElig "">
<!-- capital AE diphthong (ligature) -->
<!ENTITY Ccedil "">
<!-- capital C, cedilla -->
<!ENTITY Egrave "">
<!-- capital E, grave accent -->
<!ENTITY Eacute "">
<!-- capital E, acute accent -->
<!ENTITY Ecirc "">
<!-- capital E, circumflex accent -->
<!ENTITY Euml "">
<!-- capital E, dieresis or umlaut mark -->
<!ENTITY Igrave "">
<!-- capital I, grave accent -->
<!ENTITY Iacute "">
<!-- capital I, acute accent -->
<!ENTITY Icirc "">
<!-- capital I, circumflex accent -->
<!ENTITY Iuml "">
<!-- capital I, dieresis or umlaut mark -->
<!ENTITY ETH "">
<!-- capital Eth, Icelandic -->
<!ENTITY Ntilde "">
<!-- capital N, tilde -->
<!ENTITY Ograve "">
<!-- capital O, grave accent -->
<!ENTITY Oacute "">
<!-- capital O, acute accent -->
<!ENTITY Ocirc "">
<!-- capital O, circumflex accent -->
<!ENTITY Otilde "">
<!-- capital O, tilde -->
<!ENTITY Ouml "">
<!-- capital O, dieresis or umlaut mark -->
<!ENTITY times "">
<!-- multiply sign -->
<!ENTITY Oslash "">
<!-- capital O, slash -->
<!ENTITY Ugrave "">
<!-- capital U, grave accent -->
<!ENTITY Uacute "">
<!-- capital U, acute accent -->
<!ENTITY Ucirc "">
<!-- capital U, circumflex accent -->
<!ENTITY Uuml "">
<!-- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute "">
<!-- capital Y, acute accent -->
<!ENTITY THORN "">
<!-- capital THORN, Icelandic -->
<!ENTITY szlig "">
<!-- small sharp s, German (sz ligature) -->
<!ENTITY agrave "">
<!-- small a, grave accent -->
<!ENTITY aacute "">
<!-- small a, acute accent -->
<!ENTITY acirc "">
<!-- small a, circumflex accent -->
<!ENTITY atilde "">
<!-- small a, tilde -->
<!ENTITY auml "">
<!-- small a, dieresis or umlaut mark -->
<!ENTITY aring "">
<!-- small a, ring -->
<!ENTITY aelig "">
<!-- small ae diphthong (ligature) -->
<!ENTITY ccedil "">
<!-- small c, cedilla -->
<!ENTITY egrave "">
<!-- small e, grave accent -->
<!ENTITY eacute "">
<!-- small e, acute accent -->
<!ENTITY ecirc "">
<!-- small e, circumflex accent -->
<!ENTITY euml "">
<!-- small e, dieresis or umlaut mark -->
<!ENTITY igrave "">
<!-- small i, grave accent -->
<!ENTITY iacute "">
<!-- small i, acute accent -->
<!ENTITY icirc "">
<!-- small i, circumflex accent -->
<!ENTITY iuml "">
<!-- small i, dieresis or umlaut mark -->
<!ENTITY eth "">
<!-- small eth, Icelandic -->
<!ENTITY ntilde "">
<!-- small n, tilde -->
<!ENTITY ograve "">
<!-- small o, grave accent -->
<!ENTITY oacute "">
<!-- small o, acute accent -->
<!ENTITY ocirc "">
<!-- small o, circumflex accent -->
<!ENTITY otilde "">
<!-- small o, tilde -->
<!ENTITY ouml "">
<!-- small o, dieresis or umlaut mark -->
<!ENTITY divide "">
<!-- divide sign -->
<!ENTITY oslash "">
<!-- small o, slash -->
<!ENTITY ugrave "">
<!-- small u, grave accent -->
<!ENTITY uacute "">
<!-- small u, acute accent -->
<!ENTITY ucirc "">
<!-- small u, circumflex accent -->
<!ENTITY uuml "">
<!-- small u, dieresis or umlaut mark -->
<!ENTITY yacute "">
<!-- small y, acute accent -->
<!ENTITY thorn "">
<!-- small thorn, Icelandic -->
<!ENTITY yuml "">
<!-- small y, dieresis or umlaut mark -->
<!ENTITY bull "">
<!-- bullet -->
<!ENTITY trade "(tm)">
<!--trade mark-->
<!ENTITY ndash "-">
<!--En dash -->
<!ENTITY ldquo """>
<!--EM left dbl quote than -->
<!ENTITY rdquo """>
<!--EM right dbl quote -->
<!ENTITY lsquo "'">
<!--EM left sing quote -->
<!ENTITY rsquo "'">
<!--EM right sing quote -->
<!ENTITY hellp "...">
<!--EM elipse -->
<!ENTITY mdash "-">
<!--EM dash -->
]>
<XML_PRODUCT_GROUP>
<LANGUAGE>
<ORIGINAL_LANGUAGE>English</ORIGINAL_LANGUAGE>
<TRANSLATION_LANGUAGE>Portugese</TRANSLATION_LANGUAGE>
<VERSION_NO>5</VERSION_NO>
<ORIGINAL_LANGUAGE_ID>eng</ORIGINAL_LANGUAGE_ID>
<TRANSLATION_LANGUAGE_ID>por</TRANSLATION_LANGUAGE_ID>
</LANGUAGE>









 

News | XML in Industry | Calendar | XML Registry
Marketplace | Resources | MyXML.org | Sponsors | Privacy Statement

Copyright 2001 XML.org. This site is hosted by OASIS