1
2
3
4
5
6
7
8 package org.dom4j.io;
9
10 import java.io.BufferedReader;
11 import java.io.CharArrayReader;
12 import java.io.File;
13 import java.io.FileReader;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.io.InputStreamReader;
17 import java.io.Reader;
18 import java.net.URL;
19
20 import org.dom4j.Document;
21 import org.dom4j.DocumentException;
22 import org.dom4j.DocumentFactory;
23 import org.dom4j.Element;
24 import org.dom4j.ElementHandler;
25 import org.dom4j.QName;
26
27 import org.xmlpull.v1.XmlPullParser;
28 import org.xmlpull.v1.XmlPullParserException;
29 import org.xmlpull.v1.XmlPullParserFactory;
30
31 /***
32 * <p>
33 * <code>XPP3Reader</code> is a Reader of DOM4J documents that uses the fast
34 * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x </a>.
35 * It is very fast for use in SOAP style environments.
36 * </p>
37 *
38 * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard </a>
39 * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40 * @version $Revision: 1.3 $
41 */
42 public class XPP3Reader {
43 /*** <code>DocumentFactory</code> used to create new document objects */
44 private DocumentFactory factory;
45
46 /*** <code>XmlPullParser</code> used to parse XML */
47 private XmlPullParser xppParser;
48
49 /*** <code>XmlPullParser</code> used to parse XML */
50 private XmlPullParserFactory xppFactory;
51
52 /*** DispatchHandler to call when each <code>Element</code> is encountered */
53 private DispatchHandler dispatchHandler;
54
55 public XPP3Reader() {
56 }
57
58 public XPP3Reader(DocumentFactory factory) {
59 this.factory = factory;
60 }
61
62 /***
63 * <p>
64 * Reads a Document from the given <code>File</code>
65 * </p>
66 *
67 * @param file
68 * is the <code>File</code> to read from.
69 *
70 * @return the newly created Document instance
71 *
72 * @throws DocumentException
73 * if an error occurs during parsing.
74 * @throws IOException
75 * if a URL could not be made for the given File
76 * @throws XmlPullParserException
77 * DOCUMENT ME!
78 */
79 public Document read(File file) throws DocumentException, IOException,
80 XmlPullParserException {
81 String systemID = file.getAbsolutePath();
82
83 return read(new BufferedReader(new FileReader(file)), systemID);
84 }
85
86 /***
87 * <p>
88 * Reads a Document from the given <code>URL</code>
89 * </p>
90 *
91 * @param url
92 * <code>URL</code> to read from.
93 *
94 * @return the newly created Document instance
95 *
96 * @throws DocumentException
97 * if an error occurs during parsing.
98 * @throws IOException
99 * DOCUMENT ME!
100 * @throws XmlPullParserException
101 * DOCUMENT ME!
102 */
103 public Document read(URL url) throws DocumentException, IOException,
104 XmlPullParserException {
105 String systemID = url.toExternalForm();
106
107 return read(createReader(url.openStream()), systemID);
108 }
109
110 /***
111 * <p>
112 * Reads a Document from the given URL or filename.
113 * </p>
114 *
115 * <p>
116 * If the systemID contains a <code>':'</code> character then it is
117 * assumed to be a URL otherwise its assumed to be a file name. If you want
118 * finer grained control over this mechansim then please explicitly pass in
119 * either a {@link URL}or a {@link File}instance instead of a {@link
120 * String} to denote the source of the document.
121 * </p>
122 *
123 * @param systemID
124 * is a URL for a document or a file name.
125 *
126 * @return the newly created Document instance
127 *
128 * @throws DocumentException
129 * if an error occurs during parsing.
130 * @throws IOException
131 * if a URL could not be made for the given File
132 * @throws XmlPullParserException
133 * DOCUMENT ME!
134 */
135 public Document read(String systemID) throws DocumentException,
136 IOException, XmlPullParserException {
137 if (systemID.indexOf(':') >= 0) {
138
139 return read(new URL(systemID));
140 } else {
141
142 return read(new File(systemID));
143 }
144 }
145
146 /***
147 * <p>
148 * Reads a Document from the given stream
149 * </p>
150 *
151 * @param in
152 * <code>InputStream</code> to read from.
153 *
154 * @return the newly created Document instance
155 *
156 * @throws DocumentException
157 * if an error occurs during parsing.
158 * @throws IOException
159 * DOCUMENT ME!
160 * @throws XmlPullParserException
161 * DOCUMENT ME!
162 */
163 public Document read(InputStream in) throws DocumentException, IOException,
164 XmlPullParserException {
165 return read(createReader(in));
166 }
167
168 /***
169 * <p>
170 * Reads a Document from the given <code>Reader</code>
171 * </p>
172 *
173 * @param reader
174 * is the reader for the input
175 *
176 * @return the newly created Document instance
177 *
178 * @throws DocumentException
179 * if an error occurs during parsing.
180 * @throws IOException
181 * DOCUMENT ME!
182 * @throws XmlPullParserException
183 * DOCUMENT ME!
184 */
185 public Document read(Reader reader) throws DocumentException, IOException,
186 XmlPullParserException {
187 getXPPParser().setInput(reader);
188
189 return parseDocument();
190 }
191
192 /***
193 * <p>
194 * Reads a Document from the given array of characters
195 * </p>
196 *
197 * @param text
198 * is the text to parse
199 *
200 * @return the newly created Document instance
201 *
202 * @throws DocumentException
203 * if an error occurs during parsing.
204 * @throws IOException
205 * DOCUMENT ME!
206 * @throws XmlPullParserException
207 * DOCUMENT ME!
208 */
209 public Document read(char[] text) throws DocumentException, IOException,
210 XmlPullParserException {
211 getXPPParser().setInput(new CharArrayReader(text));
212
213 return parseDocument();
214 }
215
216 /***
217 * <p>
218 * Reads a Document from the given stream
219 * </p>
220 *
221 * @param in
222 * <code>InputStream</code> to read from.
223 * @param systemID
224 * is the URI for the input
225 *
226 * @return the newly created Document instance
227 *
228 * @throws DocumentException
229 * if an error occurs during parsing.
230 * @throws IOException
231 * DOCUMENT ME!
232 * @throws XmlPullParserException
233 * DOCUMENT ME!
234 */
235 public Document read(InputStream in, String systemID)
236 throws DocumentException, IOException, XmlPullParserException {
237 return read(createReader(in), systemID);
238 }
239
240 /***
241 * <p>
242 * Reads a Document from the given <code>Reader</code>
243 * </p>
244 *
245 * @param reader
246 * is the reader for the input
247 * @param systemID
248 * is the URI for the input
249 *
250 * @return the newly created Document instance
251 *
252 * @throws DocumentException
253 * if an error occurs during parsing.
254 * @throws IOException
255 * DOCUMENT ME!
256 * @throws XmlPullParserException
257 * DOCUMENT ME!
258 */
259 public Document read(Reader reader, String systemID)
260 throws DocumentException, IOException, XmlPullParserException {
261 Document document = read(reader);
262 document.setName(systemID);
263
264 return document;
265 }
266
267
268
269 public XmlPullParser getXPPParser() throws XmlPullParserException {
270 if (xppParser == null) {
271 xppParser = getXPPFactory().newPullParser();
272 }
273
274 return xppParser;
275 }
276
277 public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278 if (xppFactory == null) {
279 xppFactory = XmlPullParserFactory.newInstance();
280 }
281
282 xppFactory.setNamespaceAware(true);
283
284 return xppFactory;
285 }
286
287 public void setXPPFactory(XmlPullParserFactory xPPfactory) {
288 this.xppFactory = xPPfactory;
289 }
290
291 /***
292 * DOCUMENT ME!
293 *
294 * @return the <code>DocumentFactory</code> used to create document
295 * objects
296 */
297 public DocumentFactory getDocumentFactory() {
298 if (factory == null) {
299 factory = DocumentFactory.getInstance();
300 }
301
302 return factory;
303 }
304
305 /***
306 * <p>
307 * This sets the <code>DocumentFactory</code> used to create new
308 * documents. This method allows the building of custom DOM4J tree objects
309 * to be implemented easily using a custom derivation of
310 * {@link DocumentFactory}
311 * </p>
312 *
313 * @param documentFactory
314 * <code>DocumentFactory</code> used to create DOM4J objects
315 */
316 public void setDocumentFactory(DocumentFactory documentFactory) {
317 this.factory = documentFactory;
318 }
319
320 /***
321 * Adds the <code>ElementHandler</code> to be called when the specified
322 * path is encounted.
323 *
324 * @param path
325 * is the path to be handled
326 * @param handler
327 * is the <code>ElementHandler</code> to be called by the event
328 * based processor.
329 */
330 public void addHandler(String path, ElementHandler handler) {
331 getDispatchHandler().addHandler(path, handler);
332 }
333
334 /***
335 * Removes the <code>ElementHandler</code> from the event based processor,
336 * for the specified path.
337 *
338 * @param path
339 * is the path to remove the <code>ElementHandler</code> for.
340 */
341 public void removeHandler(String path) {
342 getDispatchHandler().removeHandler(path);
343 }
344
345 /***
346 * When multiple <code>ElementHandler</code> instances have been
347 * registered, this will set a default <code>ElementHandler</code> to be
348 * called for any path which does <b>NOT </b> have a handler registered.
349 *
350 * @param handler
351 * is the <code>ElementHandler</code> to be called by the event
352 * based processor.
353 */
354 public void setDefaultHandler(ElementHandler handler) {
355 getDispatchHandler().setDefaultHandler(handler);
356 }
357
358
359
360 protected Document parseDocument() throws DocumentException, IOException,
361 XmlPullParserException {
362 DocumentFactory df = getDocumentFactory();
363 Document document = df.createDocument();
364 Element parent = null;
365 XmlPullParser pp = getXPPParser();
366 pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
367
368 while (true) {
369 int type = pp.nextToken();
370
371 switch (type) {
372 case XmlPullParser.PROCESSING_INSTRUCTION: {
373 String text = pp.getText();
374 int loc = text.indexOf(" ");
375
376 if (loc >= 0) {
377 String target = text.substring(0, loc);
378 String txt = text.substring(loc + 1);
379 document.addProcessingInstruction(target, txt);
380 } else {
381 document.addProcessingInstruction(text, "");
382 }
383
384 break;
385 }
386
387 case XmlPullParser.COMMENT: {
388 if (parent != null) {
389 parent.addComment(pp.getText());
390 } else {
391 document.addComment(pp.getText());
392 }
393
394 break;
395 }
396
397 case XmlPullParser.CDSECT: {
398 if (parent != null) {
399 parent.addCDATA(pp.getText());
400 } else {
401 String msg = "Cannot have text content outside of the "
402 + "root document";
403 throw new DocumentException(msg);
404 }
405
406 break;
407 }
408
409 case XmlPullParser.ENTITY_REF:
410 break;
411
412 case XmlPullParser.END_DOCUMENT:
413 return document;
414
415 case XmlPullParser.START_TAG: {
416 QName qname = (pp.getPrefix() == null) ? df.createQName(pp
417 .getName(), pp.getNamespace()) : df.createQName(pp
418 .getName(), pp.getPrefix(), pp.getNamespace());
419 Element newElement = df.createElement(qname);
420 int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
421 int nsEnd = pp.getNamespaceCount(pp.getDepth());
422
423 for (int i = nsStart; i < nsEnd; i++) {
424 if (pp.getNamespacePrefix(i) != null) {
425 newElement.addNamespace(pp.getNamespacePrefix(i),
426 pp.getNamespaceUri(i));
427 }
428 }
429
430 for (int i = 0; i < pp.getAttributeCount(); i++) {
431 QName qa = (pp.getAttributePrefix(i) == null) ? df
432 .createQName(pp.getAttributeName(i)) : df
433 .createQName(pp.getAttributeName(i), pp
434 .getAttributePrefix(i), pp
435 .getAttributeNamespace(i));
436 newElement.addAttribute(qa, pp.getAttributeValue(i));
437 }
438
439 if (parent != null) {
440 parent.add(newElement);
441 } else {
442 document.add(newElement);
443 }
444
445 parent = newElement;
446
447 break;
448 }
449
450 case XmlPullParser.END_TAG: {
451 if (parent != null) {
452 parent = parent.getParent();
453 }
454
455 break;
456 }
457
458 case XmlPullParser.TEXT: {
459 String text = pp.getText();
460
461 if (parent != null) {
462 parent.addText(text);
463 } else {
464 String msg = "Cannot have text content outside of the "
465 + "root document";
466 throw new DocumentException(msg);
467 }
468
469 break;
470 }
471
472 default:
473 break;
474 }
475 }
476 }
477
478 protected DispatchHandler getDispatchHandler() {
479 if (dispatchHandler == null) {
480 dispatchHandler = new DispatchHandler();
481 }
482
483 return dispatchHandler;
484 }
485
486 protected void setDispatchHandler(DispatchHandler dispatchHandler) {
487 this.dispatchHandler = dispatchHandler;
488 }
489
490 /***
491 * Factory method to create a Reader from the given InputStream.
492 *
493 * @param in
494 * DOCUMENT ME!
495 *
496 * @return DOCUMENT ME!
497 *
498 * @throws IOException
499 * DOCUMENT ME!
500 */
501 protected Reader createReader(InputStream in) throws IOException {
502 return new BufferedReader(new InputStreamReader(in));
503 }
504 }
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541