1
2
3
4
5
6
7
8 package org.dom4j.io;
9
10 import java.io.InputStream;
11 import java.io.Reader;
12 import java.util.Iterator;
13
14 import javax.xml.namespace.QName;
15 import javax.xml.stream.XMLEventReader;
16 import javax.xml.stream.XMLInputFactory;
17 import javax.xml.stream.XMLStreamConstants;
18 import javax.xml.stream.XMLStreamException;
19 import javax.xml.stream.events.Attribute;
20 import javax.xml.stream.events.Characters;
21 import javax.xml.stream.events.Comment;
22 import javax.xml.stream.events.EndElement;
23 import javax.xml.stream.events.EntityReference;
24 import javax.xml.stream.events.Namespace;
25 import javax.xml.stream.events.ProcessingInstruction;
26 import javax.xml.stream.events.StartDocument;
27 import javax.xml.stream.events.StartElement;
28 import javax.xml.stream.events.XMLEvent;
29
30 import org.dom4j.CharacterData;
31 import org.dom4j.Document;
32 import org.dom4j.DocumentFactory;
33 import org.dom4j.Element;
34 import org.dom4j.Entity;
35 import org.dom4j.Node;
36
37 /***
38 * Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
39 * {@link XMLEventReader}.
40 *
41 * @author Christian Niles
42 */
43 public class STAXEventReader {
44 /*** Reference to the DocumentFactory used to build DOM4J nodes. */
45 private DocumentFactory factory;
46
47 /*** A StAX input factory, used to construct streams from IO streams. */
48 private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
49
50 /***
51 * Constructs a default <code>STAXEventReader</code> instance with a
52 * default {@link DocumentFactory}.
53 */
54 public STAXEventReader() {
55 this.factory = DocumentFactory.getInstance();
56 }
57
58 /***
59 * Constructs a <code>STAXEventReader</code> instance that uses the
60 * specified {@link DocumentFactory}to construct DOM4J {@link Node}s.
61 *
62 * @param factory
63 * The DocumentFactory to use when constructing DOM4J nodes, or
64 * <code>null</code> if a default should be used.
65 */
66 public STAXEventReader(DocumentFactory factory) {
67 if (factory != null) {
68 this.factory = factory;
69 } else {
70 this.factory = DocumentFactory.getInstance();
71 }
72 }
73
74 /***
75 * Sets the DocumentFactory to be used when constructing DOM4J nodes.
76 *
77 * @param documentFactory
78 * The DocumentFactory to use when constructing DOM4J nodes, or
79 * <code>null</code> if a default should be used.
80 */
81 public void setDocumentFactory(DocumentFactory documentFactory) {
82 if (documentFactory != null) {
83 this.factory = documentFactory;
84 } else {
85 this.factory = DocumentFactory.getInstance();
86 }
87 }
88
89 /***
90 * Constructs a StAX event stream from the provided I/O stream and reads a
91 * DOM4J document from it.
92 *
93 * @param is
94 * The I/O stream from which the Document will be read.
95 *
96 * @return The Document that was read from the stream.
97 *
98 * @throws XMLStreamException
99 * If an error occurs reading content from the stream.
100 */
101 public Document readDocument(InputStream is) throws XMLStreamException {
102 return readDocument(is, null);
103 }
104
105 /***
106 * Constructs a StAX event stream from the provided I/O character stream and
107 * reads a DOM4J document from it.
108 *
109 * @param reader
110 * The character stream from which the Document will be read.
111 *
112 * @return The Document that was read from the stream.
113 *
114 * @throws XMLStreamException
115 * If an error occurs reading content from the stream.
116 */
117 public Document readDocument(Reader reader) throws XMLStreamException {
118 return readDocument(reader, null);
119 }
120
121 /***
122 * Constructs a StAX event stream from the provided I/O stream and reads a
123 * DOM4J document from it.
124 *
125 * @param is
126 * The I/O stream from which the Document will be read.
127 * @param systemId
128 * A system id used to resolve entities.
129 *
130 * @return The Document that was read from the stream.
131 *
132 * @throws XMLStreamException
133 * If an error occurs reading content from the stream.
134 */
135 public Document readDocument(InputStream is, String systemId)
136 throws XMLStreamException {
137 XMLEventReader eventReader = inputFactory.createXMLEventReader(
138 systemId, is);
139
140 try {
141 return readDocument(eventReader);
142 } finally {
143 eventReader.close();
144 }
145 }
146
147 /***
148 * Constructs a StAX event stream from the provided I/O character stream and
149 * reads a DOM4J document from it.
150 *
151 * @param reader
152 * The character stream from which the Document will be read.
153 * @param systemId
154 * A system id used to resolve entities.
155 *
156 * @return The Document that was read from the stream.
157 *
158 * @throws XMLStreamException
159 * If an error occurs reading content from the stream.
160 */
161 public Document readDocument(Reader reader, String systemId)
162 throws XMLStreamException {
163 XMLEventReader eventReader = inputFactory.createXMLEventReader(
164 systemId, reader);
165
166 try {
167 return readDocument(eventReader);
168 } finally {
169 eventReader.close();
170 }
171 }
172
173 /***
174 * Reads a {@link Node}from the event stream. If the next event is a
175 * {@link StartElement}, all events until the closing {@link EndElement}
176 * will be read, and the resulting nodes will be added to the returned
177 * {@link Element}.
178 *
179 * <p>
180 * <strong>Pre-Conditions </strong>: The stream must be positioned before an
181 * event other than an <code>EndElement</code>,<code>EndDocument</code>,
182 * or any DTD-related events, which are not currently supported.
183 * </p>
184 *
185 * @param reader
186 * The reader from which events will be read.
187 *
188 * @return A DOM4J {@link Node}constructed from the read events.
189 *
190 * @throws XMLStreamException
191 * If an error occurs reading from the stream, or the stream was
192 * positioned before an unsupported event.
193 */
194 public Node readNode(XMLEventReader reader) throws XMLStreamException {
195 XMLEvent event = reader.peek();
196
197 if (event.isStartElement()) {
198 return readElement(reader);
199 } else if (event.isCharacters()) {
200 return readCharacters(reader);
201 } else if (event.isStartDocument()) {
202 return readDocument(reader);
203 } else if (event.isProcessingInstruction()) {
204 return readProcessingInstruction(reader);
205 } else if (event.isEntityReference()) {
206 return readEntityReference(reader);
207 } else if (event.isAttribute()) {
208 return readAttribute(reader);
209 } else if (event.isNamespace()) {
210 return readNamespace(reader);
211 } else {
212 throw new XMLStreamException("Unsupported event: " + event);
213 }
214 }
215
216 /***
217 * Reads a DOM4J {@link Document}from the provided stream. The stream
218 * should be positioned at the start of a document, or before a {@link
219 * StartElement} event.
220 *
221 * @param reader
222 * The event stream from which to read the {@link Document}.
223 *
224 * @return The {@link Document}that was read from the stream.
225 *
226 * @throws XMLStreamException
227 * If an error occurs reading events from the stream.
228 */
229 public Document readDocument(XMLEventReader reader)
230 throws XMLStreamException {
231 Document doc = null;
232
233 while (reader.hasNext()) {
234 XMLEvent nextEvent = reader.peek();
235 int type = nextEvent.getEventType();
236
237 switch (type) {
238 case XMLStreamConstants.START_DOCUMENT:
239
240 StartDocument event = (StartDocument) reader.nextEvent();
241
242 if (doc == null) {
243
244 if (event.encodingSet()) {
245 String encodingScheme = event
246 .getCharacterEncodingScheme();
247 doc = factory.createDocument(encodingScheme);
248 } else {
249 doc = factory.createDocument();
250 }
251 } else {
252
253 String msg = "Unexpected StartDocument event";
254 throw new XMLStreamException(msg, event.getLocation());
255 }
256
257 break;
258
259 case XMLStreamConstants.END_DOCUMENT:
260 case XMLStreamConstants.SPACE:
261 case XMLStreamConstants.CHARACTERS:
262
263
264 reader.nextEvent();
265
266 break;
267
268 default:
269
270 if (doc == null) {
271
272 doc = factory.createDocument();
273 }
274
275 Node n = readNode(reader);
276 doc.add(n);
277 }
278 }
279
280 return doc;
281 }
282
283 /***
284 * Reads a DOM4J Element from the provided event stream. The stream must be
285 * positioned before an {@link StartElement}event. In addition to the
286 * initial start event, all events up to and including the closing {@link
287 * EndElement} will be read, and included with the returned element.
288 *
289 * @param eventReader
290 * The event stream from which to read the Element.
291 *
292 * @return The Element that was read from the stream.
293 *
294 * @throws XMLStreamException
295 * If an error occured reading events from the stream, or the
296 * stream was not positioned before a {@linkStartElement}event.
297 */
298 public Element readElement(XMLEventReader eventReader)
299 throws XMLStreamException {
300 XMLEvent event = eventReader.peek();
301
302 if (event.isStartElement()) {
303
304 StartElement startTag = eventReader.nextEvent().asStartElement();
305 Element elem = createElement(startTag);
306
307
308 while (true) {
309 if (!eventReader.hasNext()) {
310 String msg = "Unexpected end of stream while reading"
311 + " element content";
312 throw new XMLStreamException(msg);
313 }
314
315 XMLEvent nextEvent = eventReader.peek();
316
317 if (nextEvent.isEndElement()) {
318 EndElement endElem = eventReader.nextEvent().asEndElement();
319
320 if (!endElem.getName().equals(startTag.getName())) {
321 throw new XMLStreamException("Expected "
322 + startTag.getName() + " end-tag, but found"
323 + endElem.getName());
324 }
325
326 break;
327 }
328
329 Node child = readNode(eventReader);
330 elem.add(child);
331 }
332
333 return elem;
334 } else {
335 throw new XMLStreamException("Expected Element event, found: "
336 + event);
337 }
338 }
339
340 /***
341 * Constructs a DOM4J Attribute from the provided event stream. The stream
342 * must be positioned before an {@link Attribute}event.
343 *
344 * @param reader
345 * The event stream from which to read the Attribute.
346 *
347 * @return The Attribute that was read from the stream.
348 *
349 * @throws XMLStreamException
350 * If an error occured reading events from the stream, or the
351 * stream was not positioned before an {@linkAttribute}event.
352 */
353 public org.dom4j.Attribute readAttribute(XMLEventReader reader)
354 throws XMLStreamException {
355 XMLEvent event = reader.peek();
356
357 if (event.isAttribute()) {
358 Attribute attr = (Attribute) reader.nextEvent();
359
360 return createAttribute(null, attr);
361 } else {
362 throw new XMLStreamException("Expected Attribute event, found: "
363 + event);
364 }
365 }
366
367 /***
368 * Constructs a DOM4J Namespace from the provided event stream. The stream
369 * must be positioned before a {@link Namespace}event.
370 *
371 * @param reader
372 * The event stream from which to read the Namespace.
373 *
374 * @return The Namespace that was read from the stream.
375 *
376 * @throws XMLStreamException
377 * If an error occured reading events from the stream, or the
378 * stream was not positioned before a {@linkNamespace}event.
379 */
380 public org.dom4j.Namespace readNamespace(XMLEventReader reader)
381 throws XMLStreamException {
382 XMLEvent event = reader.peek();
383
384 if (event.isNamespace()) {
385 Namespace ns = (Namespace) reader.nextEvent();
386
387 return createNamespace(ns);
388 } else {
389 throw new XMLStreamException("Expected Namespace event, found: "
390 + event);
391 }
392 }
393
394 /***
395 * Constructs a DOM4J Text or CDATA section from the provided event stream.
396 * The stream must be positioned before a {@link Characters}event.
397 *
398 * @param reader
399 * The event stream from which to read the Text or CDATA.
400 *
401 * @return The Text or CDATA that was read from the stream.
402 *
403 * @throws XMLStreamException
404 * If an error occured reading events from the stream, or the
405 * stream was not positioned before a {@linkCharacters}event.
406 */
407 public CharacterData readCharacters(XMLEventReader reader)
408 throws XMLStreamException {
409 XMLEvent event = reader.peek();
410
411 if (event.isCharacters()) {
412 Characters characters = reader.nextEvent().asCharacters();
413
414 return createCharacterData(characters);
415 } else {
416 throw new XMLStreamException("Expected Characters event, found: "
417 + event);
418 }
419 }
420
421 /***
422 * Constructs a DOM4J Comment from the provided event stream. The stream
423 * must be positioned before a {@link Comment}event.
424 *
425 * @param reader
426 * The event stream from which to read the Comment.
427 *
428 * @return The Comment that was read from the stream.
429 *
430 * @throws XMLStreamException
431 * If an error occured reading events from the stream, or the
432 * stream was not positioned before a {@linkComment}event.
433 */
434 public org.dom4j.Comment readComment(XMLEventReader reader)
435 throws XMLStreamException {
436 XMLEvent event = reader.peek();
437
438 if (event instanceof Comment) {
439 return createComment((Comment) reader.nextEvent());
440 } else {
441 throw new XMLStreamException("Expected Comment event, found: "
442 + event);
443 }
444 }
445
446 /***
447 * Constructs a DOM4J Entity from the provided event stream. The stream must
448 * be positioned before an {@link EntityReference}event.
449 *
450 * @param reader
451 * The event stream from which to read the {@link
452 * EntityReference}.
453 *
454 * @return The {@link org.dom4j.Entity}that was read from the stream.
455 *
456 * @throws XMLStreamException
457 * If an error occured reading events from the stream, or the
458 * stream was not positioned before an {@linkEntityReference}
459 * event.
460 */
461 public Entity readEntityReference(XMLEventReader reader)
462 throws XMLStreamException {
463 XMLEvent event = reader.peek();
464
465 if (event.isEntityReference()) {
466 EntityReference entityRef = (EntityReference) reader.nextEvent();
467
468 return createEntity(entityRef);
469 } else {
470 throw new XMLStreamException("Expected EntityRef event, found: "
471 + event);
472 }
473 }
474
475 /***
476 * Constructs a DOM4J ProcessingInstruction from the provided event stream.
477 * The stream must be positioned before a {@link ProcessingInstruction}
478 * event.
479 *
480 * @param reader
481 * The event stream from which to read the ProcessingInstruction.
482 *
483 * @return The ProcessingInstruction that was read from the stream.
484 *
485 * @throws XMLStreamException
486 * If an error occured reading events from the stream, or the
487 * stream was not positioned before a {@link
488 * ProcessingInstruction} event.
489 */
490 public org.dom4j.ProcessingInstruction readProcessingInstruction(
491 XMLEventReader reader) throws XMLStreamException {
492 XMLEvent event = reader.peek();
493
494 if (event.isProcessingInstruction()) {
495 ProcessingInstruction pi = (ProcessingInstruction) reader
496 .nextEvent();
497
498 return createProcessingInstruction(pi);
499 } else {
500 throw new XMLStreamException("Expected PI event, found: " + event);
501 }
502 }
503
504 /***
505 * Constructs a new DOM4J Element from the provided StartElement event. All
506 * attributes and namespaces will be added to the returned element.
507 *
508 * @param startEvent
509 * The StartElement event from which to construct the new DOM4J
510 * Element.
511 *
512 * @return The Element constructed from the provided StartElement event.
513 */
514 public Element createElement(StartElement startEvent) {
515 QName qname = startEvent.getName();
516 org.dom4j.QName elemName = createQName(qname);
517
518 Element elem = factory.createElement(elemName);
519
520
521 for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
522 Attribute attr = (Attribute) i.next();
523 elem.addAttribute(createQName(attr.getName()), attr.getValue());
524 }
525
526
527 for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
528 Namespace ns = (Namespace) i.next();
529 elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
530 }
531
532 return elem;
533 }
534
535 /***
536 * Constructs a new DOM4J Attribute from the provided StAX Attribute event.
537 *
538 * @param elem
539 * DOCUMENT ME!
540 * @param attr
541 * The Attribute event from which to construct the new DOM4J
542 * Attribute.
543 *
544 * @return The Attribute constructed from the provided Attribute event.
545 */
546 public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
547 return factory.createAttribute(elem, createQName(attr.getName()), attr
548 .getValue());
549 }
550
551 /***
552 * Constructs a new DOM4J Namespace from the provided StAX Namespace event.
553 *
554 * @param ns
555 * The Namespace event from which to construct the new DOM4J
556 * Namespace.
557 *
558 * @return The Namespace constructed from the provided Namespace event.
559 */
560 public org.dom4j.Namespace createNamespace(Namespace ns) {
561 return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
562 }
563
564 /***
565 * Constructs a new DOM4J Text or CDATA object from the provided Characters
566 * event.
567 *
568 * @param characters
569 * The Characters event from which to construct the new DOM4J
570 * Text or CDATA object.
571 *
572 * @return The Text or CDATA object constructed from the provided Characters
573 * event.
574 */
575 public CharacterData createCharacterData(Characters characters) {
576 String data = characters.getData();
577
578 if (characters.isCData()) {
579 return factory.createCDATA(data);
580 } else {
581 return factory.createText(data);
582 }
583 }
584
585 /***
586 * Constructs a new DOM4J Comment from the provided StAX Comment event.
587 *
588 * @param comment
589 * The Comment event from which to construct the new DOM4J
590 * Comment.
591 *
592 * @return The Comment constructed from the provided Comment event.
593 */
594 public org.dom4j.Comment createComment(Comment comment) {
595 return factory.createComment(comment.getText());
596 }
597
598 /***
599 * Constructs a new DOM4J Entity from the provided StAX EntityReference
600 * event.
601 *
602 * @param entityRef
603 * The EntityReference event from which to construct the new
604 * DOM4J Entity.
605 *
606 * @return The Entity constructed from the provided EntityReference event.
607 */
608 public org.dom4j.Entity createEntity(EntityReference entityRef) {
609 return factory.createEntity(entityRef.getName(), entityRef
610 .getDeclaration().getReplacementText());
611 }
612
613 /***
614 * Constructs a new DOM4J ProcessingInstruction from the provided StAX
615 * ProcessingInstruction event.
616 *
617 * @param pi
618 * The ProcessingInstruction event from which to construct the
619 * new DOM4J ProcessingInstruction.
620 *
621 * @return The ProcessingInstruction constructed from the provided
622 * ProcessingInstruction event.
623 */
624 public org.dom4j.ProcessingInstruction createProcessingInstruction(
625 ProcessingInstruction pi) {
626 return factory
627 .createProcessingInstruction(pi.getTarget(), pi.getData());
628 }
629
630 /***
631 * Constructs a new DOM4J QName from the provided JAXP QName.
632 *
633 * @param qname
634 * The JAXP QName from which to create a DOM4J QName.
635 *
636 * @return The newly constructed DOM4J QName.
637 */
638 public org.dom4j.QName createQName(QName qname) {
639 return factory.createQName(qname.getLocalPart(), qname.getPrefix(),
640 qname.getNamespaceURI());
641 }
642 }
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679