View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.io.BufferedReader;
11  import java.io.CharArrayReader;
12  import java.io.File;
13  import java.io.FileReader;
14  import java.io.IOException;
15  import java.io.InputStream;
16  import java.io.InputStreamReader;
17  import java.io.Reader;
18  import java.net.URL;
19  
20  import org.dom4j.Document;
21  import org.dom4j.DocumentException;
22  import org.dom4j.DocumentFactory;
23  import org.dom4j.Element;
24  import org.dom4j.ElementHandler;
25  import org.dom4j.QName;
26  
27  import org.xmlpull.v1.XmlPullParser;
28  import org.xmlpull.v1.XmlPullParserException;
29  import org.xmlpull.v1.XmlPullParserFactory;
30  
31  /***
32   * <p>
33   * <code>XPP3Reader</code> is a Reader of DOM4J documents that uses the fast
34   * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x </a>.
35   * It is very fast for use in SOAP style environments.
36   * </p>
37   * 
38   * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard </a>
39   * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40   * @version $Revision: 1.3 $
41   */
42  public class XPP3Reader {
43      /*** <code>DocumentFactory</code> used to create new document objects */
44      private DocumentFactory factory;
45  
46      /*** <code>XmlPullParser</code> used to parse XML */
47      private XmlPullParser xppParser;
48  
49      /*** <code>XmlPullParser</code> used to parse XML */
50      private XmlPullParserFactory xppFactory;
51  
52      /*** DispatchHandler to call when each <code>Element</code> is encountered */
53      private DispatchHandler dispatchHandler;
54  
55      public XPP3Reader() {
56      }
57  
58      public XPP3Reader(DocumentFactory factory) {
59          this.factory = factory;
60      }
61  
62      /***
63       * <p>
64       * Reads a Document from the given <code>File</code>
65       * </p>
66       * 
67       * @param file
68       *            is the <code>File</code> to read from.
69       * 
70       * @return the newly created Document instance
71       * 
72       * @throws DocumentException
73       *             if an error occurs during parsing.
74       * @throws IOException
75       *             if a URL could not be made for the given File
76       * @throws XmlPullParserException
77       *             DOCUMENT ME!
78       */
79      public Document read(File file) throws DocumentException, IOException,
80              XmlPullParserException {
81          String systemID = file.getAbsolutePath();
82  
83          return read(new BufferedReader(new FileReader(file)), systemID);
84      }
85  
86      /***
87       * <p>
88       * Reads a Document from the given <code>URL</code>
89       * </p>
90       * 
91       * @param url
92       *            <code>URL</code> to read from.
93       * 
94       * @return the newly created Document instance
95       * 
96       * @throws DocumentException
97       *             if an error occurs during parsing.
98       * @throws IOException
99       *             DOCUMENT ME!
100      * @throws XmlPullParserException
101      *             DOCUMENT ME!
102      */
103     public Document read(URL url) throws DocumentException, IOException,
104             XmlPullParserException {
105         String systemID = url.toExternalForm();
106 
107         return read(createReader(url.openStream()), systemID);
108     }
109 
110     /***
111      * <p>
112      * Reads a Document from the given URL or filename.
113      * </p>
114      * 
115      * <p>
116      * If the systemID contains a <code>':'</code> character then it is
117      * assumed to be a URL otherwise its assumed to be a file name. If you want
118      * finer grained control over this mechansim then please explicitly pass in
119      * either a {@link URL}or a {@link File}instance instead of a {@link
120      * String} to denote the source of the document.
121      * </p>
122      * 
123      * @param systemID
124      *            is a URL for a document or a file name.
125      * 
126      * @return the newly created Document instance
127      * 
128      * @throws DocumentException
129      *             if an error occurs during parsing.
130      * @throws IOException
131      *             if a URL could not be made for the given File
132      * @throws XmlPullParserException
133      *             DOCUMENT ME!
134      */
135     public Document read(String systemID) throws DocumentException,
136             IOException, XmlPullParserException {
137         if (systemID.indexOf(':') >= 0) {
138             // lets assume its a URL
139             return read(new URL(systemID));
140         } else {
141             // lets assume that we are given a file name
142             return read(new File(systemID));
143         }
144     }
145 
146     /***
147      * <p>
148      * Reads a Document from the given stream
149      * </p>
150      * 
151      * @param in
152      *            <code>InputStream</code> to read from.
153      * 
154      * @return the newly created Document instance
155      * 
156      * @throws DocumentException
157      *             if an error occurs during parsing.
158      * @throws IOException
159      *             DOCUMENT ME!
160      * @throws XmlPullParserException
161      *             DOCUMENT ME!
162      */
163     public Document read(InputStream in) throws DocumentException, IOException,
164             XmlPullParserException {
165         return read(createReader(in));
166     }
167 
168     /***
169      * <p>
170      * Reads a Document from the given <code>Reader</code>
171      * </p>
172      * 
173      * @param reader
174      *            is the reader for the input
175      * 
176      * @return the newly created Document instance
177      * 
178      * @throws DocumentException
179      *             if an error occurs during parsing.
180      * @throws IOException
181      *             DOCUMENT ME!
182      * @throws XmlPullParserException
183      *             DOCUMENT ME!
184      */
185     public Document read(Reader reader) throws DocumentException, IOException,
186             XmlPullParserException {
187         getXPPParser().setInput(reader);
188 
189         return parseDocument();
190     }
191 
192     /***
193      * <p>
194      * Reads a Document from the given array of characters
195      * </p>
196      * 
197      * @param text
198      *            is the text to parse
199      * 
200      * @return the newly created Document instance
201      * 
202      * @throws DocumentException
203      *             if an error occurs during parsing.
204      * @throws IOException
205      *             DOCUMENT ME!
206      * @throws XmlPullParserException
207      *             DOCUMENT ME!
208      */
209     public Document read(char[] text) throws DocumentException, IOException,
210             XmlPullParserException {
211         getXPPParser().setInput(new CharArrayReader(text));
212 
213         return parseDocument();
214     }
215 
216     /***
217      * <p>
218      * Reads a Document from the given stream
219      * </p>
220      * 
221      * @param in
222      *            <code>InputStream</code> to read from.
223      * @param systemID
224      *            is the URI for the input
225      * 
226      * @return the newly created Document instance
227      * 
228      * @throws DocumentException
229      *             if an error occurs during parsing.
230      * @throws IOException
231      *             DOCUMENT ME!
232      * @throws XmlPullParserException
233      *             DOCUMENT ME!
234      */
235     public Document read(InputStream in, String systemID)
236             throws DocumentException, IOException, XmlPullParserException {
237         return read(createReader(in), systemID);
238     }
239 
240     /***
241      * <p>
242      * Reads a Document from the given <code>Reader</code>
243      * </p>
244      * 
245      * @param reader
246      *            is the reader for the input
247      * @param systemID
248      *            is the URI for the input
249      * 
250      * @return the newly created Document instance
251      * 
252      * @throws DocumentException
253      *             if an error occurs during parsing.
254      * @throws IOException
255      *             DOCUMENT ME!
256      * @throws XmlPullParserException
257      *             DOCUMENT ME!
258      */
259     public Document read(Reader reader, String systemID)
260             throws DocumentException, IOException, XmlPullParserException {
261         Document document = read(reader);
262         document.setName(systemID);
263 
264         return document;
265     }
266 
267     // Properties
268     // -------------------------------------------------------------------------
269     public XmlPullParser getXPPParser() throws XmlPullParserException {
270         if (xppParser == null) {
271             xppParser = getXPPFactory().newPullParser();
272         }
273 
274         return xppParser;
275     }
276 
277     public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278         if (xppFactory == null) {
279             xppFactory = XmlPullParserFactory.newInstance();
280         }
281 
282         xppFactory.setNamespaceAware(true);
283 
284         return xppFactory;
285     }
286 
287     public void setXPPFactory(XmlPullParserFactory xPPfactory) {
288         this.xppFactory = xPPfactory;
289     }
290 
291     /***
292      * DOCUMENT ME!
293      * 
294      * @return the <code>DocumentFactory</code> used to create document
295      *         objects
296      */
297     public DocumentFactory getDocumentFactory() {
298         if (factory == null) {
299             factory = DocumentFactory.getInstance();
300         }
301 
302         return factory;
303     }
304 
305     /***
306      * <p>
307      * This sets the <code>DocumentFactory</code> used to create new
308      * documents. This method allows the building of custom DOM4J tree objects
309      * to be implemented easily using a custom derivation of
310      * {@link DocumentFactory}
311      * </p>
312      * 
313      * @param documentFactory
314      *            <code>DocumentFactory</code> used to create DOM4J objects
315      */
316     public void setDocumentFactory(DocumentFactory documentFactory) {
317         this.factory = documentFactory;
318     }
319 
320     /***
321      * Adds the <code>ElementHandler</code> to be called when the specified
322      * path is encounted.
323      * 
324      * @param path
325      *            is the path to be handled
326      * @param handler
327      *            is the <code>ElementHandler</code> to be called by the event
328      *            based processor.
329      */
330     public void addHandler(String path, ElementHandler handler) {
331         getDispatchHandler().addHandler(path, handler);
332     }
333 
334     /***
335      * Removes the <code>ElementHandler</code> from the event based processor,
336      * for the specified path.
337      * 
338      * @param path
339      *            is the path to remove the <code>ElementHandler</code> for.
340      */
341     public void removeHandler(String path) {
342         getDispatchHandler().removeHandler(path);
343     }
344 
345     /***
346      * When multiple <code>ElementHandler</code> instances have been
347      * registered, this will set a default <code>ElementHandler</code> to be
348      * called for any path which does <b>NOT </b> have a handler registered.
349      * 
350      * @param handler
351      *            is the <code>ElementHandler</code> to be called by the event
352      *            based processor.
353      */
354     public void setDefaultHandler(ElementHandler handler) {
355         getDispatchHandler().setDefaultHandler(handler);
356     }
357 
358     // Implementation methods
359     // -------------------------------------------------------------------------
360     protected Document parseDocument() throws DocumentException, IOException,
361             XmlPullParserException {
362         DocumentFactory df = getDocumentFactory();
363         Document document = df.createDocument();
364         Element parent = null;
365         XmlPullParser pp = getXPPParser();
366         pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
367 
368         while (true) {
369             int type = pp.nextToken();
370 
371             switch (type) {
372                 case XmlPullParser.PROCESSING_INSTRUCTION: {
373                     String text = pp.getText();
374                     int loc = text.indexOf(" ");
375 
376                     if (loc >= 0) {
377                         String target = text.substring(0, loc);
378                         String txt = text.substring(loc + 1);
379                         document.addProcessingInstruction(target, txt);
380                     } else {
381                         document.addProcessingInstruction(text, "");
382                     }
383 
384                     break;
385                 }
386 
387                 case XmlPullParser.COMMENT: {
388                     if (parent != null) {
389                         parent.addComment(pp.getText());
390                     } else {
391                         document.addComment(pp.getText());
392                     }
393 
394                     break;
395                 }
396 
397                 case XmlPullParser.CDSECT: {
398                     if (parent != null) {
399                         parent.addCDATA(pp.getText());
400                     } else {
401                         String msg = "Cannot have text content outside of the "
402                                 + "root document";
403                         throw new DocumentException(msg);
404                     }
405 
406                     break;
407                 }
408 
409                 case XmlPullParser.ENTITY_REF:
410                     break;
411 
412                 case XmlPullParser.END_DOCUMENT:
413                     return document;
414 
415                 case XmlPullParser.START_TAG: {
416                     QName qname = (pp.getPrefix() == null) ? df.createQName(pp
417                             .getName(), pp.getNamespace()) : df.createQName(pp
418                             .getName(), pp.getPrefix(), pp.getNamespace());
419                     Element newElement = df.createElement(qname);
420                     int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
421                     int nsEnd = pp.getNamespaceCount(pp.getDepth());
422 
423                     for (int i = nsStart; i < nsEnd; i++) {
424                         if (pp.getNamespacePrefix(i) != null) {
425                             newElement.addNamespace(pp.getNamespacePrefix(i),
426                                     pp.getNamespaceUri(i));
427                         }
428                     }
429 
430                     for (int i = 0; i < pp.getAttributeCount(); i++) {
431                         QName qa = (pp.getAttributePrefix(i) == null) ? df
432                                 .createQName(pp.getAttributeName(i)) : df
433                                 .createQName(pp.getAttributeName(i), pp
434                                         .getAttributePrefix(i), pp
435                                         .getAttributeNamespace(i));
436                         newElement.addAttribute(qa, pp.getAttributeValue(i));
437                     }
438 
439                     if (parent != null) {
440                         parent.add(newElement);
441                     } else {
442                         document.add(newElement);
443                     }
444 
445                     parent = newElement;
446 
447                     break;
448                 }
449 
450                 case XmlPullParser.END_TAG: {
451                     if (parent != null) {
452                         parent = parent.getParent();
453                     }
454 
455                     break;
456                 }
457 
458                 case XmlPullParser.TEXT: {
459                     String text = pp.getText();
460 
461                     if (parent != null) {
462                         parent.addText(text);
463                     } else {
464                         String msg = "Cannot have text content outside of the "
465                                 + "root document";
466                         throw new DocumentException(msg);
467                     }
468 
469                     break;
470                 }
471 
472                 default:
473                     break;
474             }
475         }
476     }
477 
478     protected DispatchHandler getDispatchHandler() {
479         if (dispatchHandler == null) {
480             dispatchHandler = new DispatchHandler();
481         }
482 
483         return dispatchHandler;
484     }
485 
486     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
487         this.dispatchHandler = dispatchHandler;
488     }
489 
490     /***
491      * Factory method to create a Reader from the given InputStream.
492      * 
493      * @param in
494      *            DOCUMENT ME!
495      * 
496      * @return DOCUMENT ME!
497      * 
498      * @throws IOException
499      *             DOCUMENT ME!
500      */
501     protected Reader createReader(InputStream in) throws IOException {
502         return new BufferedReader(new InputStreamReader(in));
503     }
504 }
505 
506 /*
507  * Redistribution and use of this software and associated documentation
508  * ("Software"), with or without modification, are permitted provided that the
509  * following conditions are met:
510  * 
511  * 1. Redistributions of source code must retain copyright statements and
512  * notices. Redistributions must also contain a copy of this document.
513  * 
514  * 2. Redistributions in binary form must reproduce the above copyright notice,
515  * this list of conditions and the following disclaimer in the documentation
516  * and/or other materials provided with the distribution.
517  * 
518  * 3. The name "DOM4J" must not be used to endorse or promote products derived
519  * from this Software without prior written permission of MetaStuff, Ltd. For
520  * written permission, please contact dom4j-info@metastuff.com.
521  * 
522  * 4. Products derived from this Software may not be called "DOM4J" nor may
523  * "DOM4J" appear in their names without prior written permission of MetaStuff,
524  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
525  * 
526  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
527  * 
528  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
529  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
530  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
531  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
532  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
533  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
534  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
535  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
536  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
537  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
538  * POSSIBILITY OF SUCH DAMAGE.
539  * 
540  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
541  */