1
2
3
4
5
6
7
8 package org.dom4j.io;
9
10 import java.io.BufferedReader;
11 import java.io.File;
12 import java.io.FileReader;
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.io.Reader;
17 import java.net.URL;
18
19 import org.dom4j.Document;
20 import org.dom4j.DocumentException;
21 import org.dom4j.DocumentFactory;
22 import org.dom4j.Element;
23 import org.dom4j.ElementHandler;
24 import org.dom4j.xpp.ProxyXmlStartTag;
25
26 import org.gjt.xpp.XmlEndTag;
27 import org.gjt.xpp.XmlPullParser;
28 import org.gjt.xpp.XmlPullParserException;
29 import org.gjt.xpp.XmlPullParserFactory;
30
31 /***
32 * <p>
33 * <code>XPPReader</code> is a Reader of DOM4J documents that uses the fast <a
34 * href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x </a>. It
35 * does not currently support comments, CDATA or ProcessingInstructions or
36 * validation but it is very fast for use in SOAP style environments.
37 * </p>
38 *
39 * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40 * @version $Revision: 1.7 $
41 */
42 public class XPPReader {
43 /*** <code>DocumentFactory</code> used to create new document objects */
44 private DocumentFactory factory;
45
46 /*** <code>XmlPullParser</code> used to parse XML */
47 private XmlPullParser xppParser;
48
49 /*** <code>XmlPullParser</code> used to parse XML */
50 private XmlPullParserFactory xppFactory;
51
52 /*** DispatchHandler to call when each <code>Element</code> is encountered */
53 private DispatchHandler dispatchHandler;
54
55 public XPPReader() {
56 }
57
58 public XPPReader(DocumentFactory factory) {
59 this.factory = factory;
60 }
61
62 /***
63 * <p>
64 * Reads a Document from the given <code>File</code>
65 * </p>
66 *
67 * @param file
68 * is the <code>File</code> to read from.
69 *
70 * @return the newly created Document instance
71 *
72 * @throws DocumentException
73 * if an error occurs during parsing.
74 * @throws IOException
75 * if a URL could not be made for the given File
76 * @throws XmlPullParserException
77 * DOCUMENT ME!
78 */
79 public Document read(File file) throws DocumentException, IOException,
80 XmlPullParserException {
81 String systemID = file.getAbsolutePath();
82
83 return read(new BufferedReader(new FileReader(file)), systemID);
84 }
85
86 /***
87 * <p>
88 * Reads a Document from the given <code>URL</code>
89 * </p>
90 *
91 * @param url
92 * <code>URL</code> to read from.
93 *
94 * @return the newly created Document instance
95 *
96 * @throws DocumentException
97 * if an error occurs during parsing.
98 * @throws IOException
99 * DOCUMENT ME!
100 * @throws XmlPullParserException
101 * DOCUMENT ME!
102 */
103 public Document read(URL url) throws DocumentException, IOException,
104 XmlPullParserException {
105 String systemID = url.toExternalForm();
106
107 return read(createReader(url.openStream()), systemID);
108 }
109
110 /***
111 * <p>
112 * Reads a Document from the given URL or filename.
113 * </p>
114 *
115 * <p>
116 * If the systemID contains a <code>':'</code> character then it is
117 * assumed to be a URL otherwise its assumed to be a file name. If you want
118 * finer grained control over this mechansim then please explicitly pass in
119 * either a {@link URL}or a {@link File}instance instead of a {@link
120 * String} to denote the source of the document.
121 * </p>
122 *
123 * @param systemID
124 * is a URL for a document or a file name.
125 *
126 * @return the newly created Document instance
127 *
128 * @throws DocumentException
129 * if an error occurs during parsing.
130 * @throws IOException
131 * if a URL could not be made for the given File
132 * @throws XmlPullParserException
133 * DOCUMENT ME!
134 */
135 public Document read(String systemID) throws DocumentException,
136 IOException, XmlPullParserException {
137 if (systemID.indexOf(':') >= 0) {
138
139 return read(new URL(systemID));
140 } else {
141
142 return read(new File(systemID));
143 }
144 }
145
146 /***
147 * <p>
148 * Reads a Document from the given stream
149 * </p>
150 *
151 * @param in
152 * <code>InputStream</code> to read from.
153 *
154 * @return the newly created Document instance
155 *
156 * @throws DocumentException
157 * if an error occurs during parsing.
158 * @throws IOException
159 * DOCUMENT ME!
160 * @throws XmlPullParserException
161 * DOCUMENT ME!
162 */
163 public Document read(InputStream in) throws DocumentException, IOException,
164 XmlPullParserException {
165 return read(createReader(in));
166 }
167
168 /***
169 * <p>
170 * Reads a Document from the given <code>Reader</code>
171 * </p>
172 *
173 * @param reader
174 * is the reader for the input
175 *
176 * @return the newly created Document instance
177 *
178 * @throws DocumentException
179 * if an error occurs during parsing.
180 * @throws IOException
181 * DOCUMENT ME!
182 * @throws XmlPullParserException
183 * DOCUMENT ME!
184 */
185 public Document read(Reader reader) throws DocumentException, IOException,
186 XmlPullParserException {
187 getXPPParser().setInput(reader);
188
189 return parseDocument();
190 }
191
192 /***
193 * <p>
194 * Reads a Document from the given array of characters
195 * </p>
196 *
197 * @param text
198 * is the text to parse
199 *
200 * @return the newly created Document instance
201 *
202 * @throws DocumentException
203 * if an error occurs during parsing.
204 * @throws IOException
205 * DOCUMENT ME!
206 * @throws XmlPullParserException
207 * DOCUMENT ME!
208 */
209 public Document read(char[] text) throws DocumentException, IOException,
210 XmlPullParserException {
211 getXPPParser().setInput(text);
212
213 return parseDocument();
214 }
215
216 /***
217 * <p>
218 * Reads a Document from the given stream
219 * </p>
220 *
221 * @param in
222 * <code>InputStream</code> to read from.
223 * @param systemID
224 * is the URI for the input
225 *
226 * @return the newly created Document instance
227 *
228 * @throws DocumentException
229 * if an error occurs during parsing.
230 * @throws IOException
231 * DOCUMENT ME!
232 * @throws XmlPullParserException
233 * DOCUMENT ME!
234 */
235 public Document read(InputStream in, String systemID)
236 throws DocumentException, IOException, XmlPullParserException {
237 return read(createReader(in), systemID);
238 }
239
240 /***
241 * <p>
242 * Reads a Document from the given <code>Reader</code>
243 * </p>
244 *
245 * @param reader
246 * is the reader for the input
247 * @param systemID
248 * is the URI for the input
249 *
250 * @return the newly created Document instance
251 *
252 * @throws DocumentException
253 * if an error occurs during parsing.
254 * @throws IOException
255 * DOCUMENT ME!
256 * @throws XmlPullParserException
257 * DOCUMENT ME!
258 */
259 public Document read(Reader reader, String systemID)
260 throws DocumentException, IOException, XmlPullParserException {
261 Document document = read(reader);
262 document.setName(systemID);
263
264 return document;
265 }
266
267
268
269 public XmlPullParser getXPPParser() throws XmlPullParserException {
270 if (xppParser == null) {
271 xppParser = getXPPFactory().newPullParser();
272 }
273
274 return xppParser;
275 }
276
277 public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278 if (xppFactory == null) {
279 xppFactory = XmlPullParserFactory.newInstance();
280 }
281
282 return xppFactory;
283 }
284
285 public void setXPPFactory(XmlPullParserFactory xPPFactory) {
286 this.xppFactory = xPPFactory;
287 }
288
289 /***
290 * DOCUMENT ME!
291 *
292 * @return the <code>DocumentFactory</code> used to create document
293 * objects
294 */
295 public DocumentFactory getDocumentFactory() {
296 if (factory == null) {
297 factory = DocumentFactory.getInstance();
298 }
299
300 return factory;
301 }
302
303 /***
304 * <p>
305 * This sets the <code>DocumentFactory</code> used to create new
306 * documents. This method allows the building of custom DOM4J tree objects
307 * to be implemented easily using a custom derivation of
308 * {@link DocumentFactory}
309 * </p>
310 *
311 * @param documentFactory
312 * <code>DocumentFactory</code> used to create DOM4J objects
313 */
314 public void setDocumentFactory(DocumentFactory documentFactory) {
315 this.factory = documentFactory;
316 }
317
318 /***
319 * Adds the <code>ElementHandler</code> to be called when the specified
320 * path is encounted.
321 *
322 * @param path
323 * is the path to be handled
324 * @param handler
325 * is the <code>ElementHandler</code> to be called by the event
326 * based processor.
327 */
328 public void addHandler(String path, ElementHandler handler) {
329 getDispatchHandler().addHandler(path, handler);
330 }
331
332 /***
333 * Removes the <code>ElementHandler</code> from the event based processor,
334 * for the specified path.
335 *
336 * @param path
337 * is the path to remove the <code>ElementHandler</code> for.
338 */
339 public void removeHandler(String path) {
340 getDispatchHandler().removeHandler(path);
341 }
342
343 /***
344 * When multiple <code>ElementHandler</code> instances have been
345 * registered, this will set a default <code>ElementHandler</code> to be
346 * called for any path which does <b>NOT </b> have a handler registered.
347 *
348 * @param handler
349 * is the <code>ElementHandler</code> to be called by the event
350 * based processor.
351 */
352 public void setDefaultHandler(ElementHandler handler) {
353 getDispatchHandler().setDefaultHandler(handler);
354 }
355
356
357
358 protected Document parseDocument() throws DocumentException, IOException,
359 XmlPullParserException {
360 Document document = getDocumentFactory().createDocument();
361 Element parent = null;
362 XmlPullParser parser = getXPPParser();
363 parser.setNamespaceAware(true);
364
365 ProxyXmlStartTag startTag = new ProxyXmlStartTag();
366 XmlEndTag endTag = xppFactory.newEndTag();
367
368 while (true) {
369 int type = parser.next();
370
371 switch (type) {
372 case XmlPullParser.END_DOCUMENT:
373 return document;
374
375 case XmlPullParser.START_TAG: {
376 parser.readStartTag(startTag);
377
378 Element newElement = startTag.getElement();
379
380 if (parent != null) {
381 parent.add(newElement);
382 } else {
383 document.add(newElement);
384 }
385
386 parent = newElement;
387
388 break;
389 }
390
391 case XmlPullParser.END_TAG: {
392 parser.readEndTag(endTag);
393
394 if (parent != null) {
395 parent = parent.getParent();
396 }
397
398 break;
399 }
400
401 case XmlPullParser.CONTENT: {
402 String text = parser.readContent();
403
404 if (parent != null) {
405 parent.addText(text);
406 } else {
407 String msg = "Cannot have text content outside of the "
408 + "root document";
409 throw new DocumentException(msg);
410 }
411
412 break;
413 }
414
415 default:
416 throw new DocumentException("Error: unknown type: " + type);
417 }
418 }
419 }
420
421 protected DispatchHandler getDispatchHandler() {
422 if (dispatchHandler == null) {
423 dispatchHandler = new DispatchHandler();
424 }
425
426 return dispatchHandler;
427 }
428
429 protected void setDispatchHandler(DispatchHandler dispatchHandler) {
430 this.dispatchHandler = dispatchHandler;
431 }
432
433 /***
434 * Factory method to create a Reader from the given InputStream.
435 *
436 * @param in
437 * DOCUMENT ME!
438 *
439 * @return DOCUMENT ME!
440 *
441 * @throws IOException
442 * DOCUMENT ME!
443 */
444 protected Reader createReader(InputStream in) throws IOException {
445 return new BufferedReader(new InputStreamReader(in));
446 }
447 }
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484