[Update] XMLEventWriter and numeric character references
From XMLStreamWriterImpl:
for (int index = 0; index < end; index++) {
char ch = content.charAt(index);
if (fEncoder != null && !fEncoder.canEncode(ch)){
fWriter.write(content, startWritePos, index - startWritePos );
// Escape this char as underlying encoder cannot handle it
fWriter.write( "&#x" );
fWriter.write(Integer.toHexString(ch));
fWriter.write( ';' );
startWritePos = index + 1;
continue;
}
So yeah, short of a custom XMLStreamWriter
I can add a clause to my processor:
while (eventReader.hasNext()) {
XMLEvent e = eventReader.nextEvent();
if (e.isCharacters() && e.asCharacters().getData().length() == 2) {
if (Character.isHighSurrogate(e.asCharacters().getData().charAt(0))
&&
Character.isLowSurrogate(e.asCharacters().getData().charAt(1))) {
int cp = Character.toCodePoint(e.asCharacters().getData().charAt(0),
e.asCharacters().getData().charAt(1));
eventWriter.add(eventFactory.createEntityReference("#x"
+ Integer.toHexString(cp).toUpperCase(), null));
} else
eventWriter.add(e);
} else
eventWriter.add(e);
}