View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.io.BufferedReader;
11  import java.io.File;
12  import java.io.FileReader;
13  import java.io.IOException;
14  import java.io.InputStream;
15  import java.io.InputStreamReader;
16  import java.io.Reader;
17  import java.net.URL;
18  
19  import org.dom4j.Document;
20  import org.dom4j.DocumentException;
21  import org.dom4j.DocumentFactory;
22  import org.dom4j.Element;
23  import org.dom4j.ElementHandler;
24  import org.dom4j.xpp.ProxyXmlStartTag;
25  
26  import org.gjt.xpp.XmlEndTag;
27  import org.gjt.xpp.XmlPullParser;
28  import org.gjt.xpp.XmlPullParserException;
29  import org.gjt.xpp.XmlPullParserFactory;
30  
31  /***
32   * <p>
33   * <code>XPPReader</code> is a Reader of DOM4J documents that uses the fast <a
34   * href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x </a>. It
35   * does not currently support comments, CDATA or ProcessingInstructions or
36   * validation but it is very fast for use in SOAP style environments.
37   * </p>
38   * 
39   * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40   * @version $Revision: 1.7 $
41   */
42  public class XPPReader {
43      /*** <code>DocumentFactory</code> used to create new document objects */
44      private DocumentFactory factory;
45  
46      /*** <code>XmlPullParser</code> used to parse XML */
47      private XmlPullParser xppParser;
48  
49      /*** <code>XmlPullParser</code> used to parse XML */
50      private XmlPullParserFactory xppFactory;
51  
52      /*** DispatchHandler to call when each <code>Element</code> is encountered */
53      private DispatchHandler dispatchHandler;
54  
55      public XPPReader() {
56      }
57  
58      public XPPReader(DocumentFactory factory) {
59          this.factory = factory;
60      }
61  
62      /***
63       * <p>
64       * Reads a Document from the given <code>File</code>
65       * </p>
66       * 
67       * @param file
68       *            is the <code>File</code> to read from.
69       * 
70       * @return the newly created Document instance
71       * 
72       * @throws DocumentException
73       *             if an error occurs during parsing.
74       * @throws IOException
75       *             if a URL could not be made for the given File
76       * @throws XmlPullParserException
77       *             DOCUMENT ME!
78       */
79      public Document read(File file) throws DocumentException, IOException,
80              XmlPullParserException {
81          String systemID = file.getAbsolutePath();
82  
83          return read(new BufferedReader(new FileReader(file)), systemID);
84      }
85  
86      /***
87       * <p>
88       * Reads a Document from the given <code>URL</code>
89       * </p>
90       * 
91       * @param url
92       *            <code>URL</code> to read from.
93       * 
94       * @return the newly created Document instance
95       * 
96       * @throws DocumentException
97       *             if an error occurs during parsing.
98       * @throws IOException
99       *             DOCUMENT ME!
100      * @throws XmlPullParserException
101      *             DOCUMENT ME!
102      */
103     public Document read(URL url) throws DocumentException, IOException,
104             XmlPullParserException {
105         String systemID = url.toExternalForm();
106 
107         return read(createReader(url.openStream()), systemID);
108     }
109 
110     /***
111      * <p>
112      * Reads a Document from the given URL or filename.
113      * </p>
114      * 
115      * <p>
116      * If the systemID contains a <code>':'</code> character then it is
117      * assumed to be a URL otherwise its assumed to be a file name. If you want
118      * finer grained control over this mechansim then please explicitly pass in
119      * either a {@link URL}or a {@link File}instance instead of a {@link
120      * String} to denote the source of the document.
121      * </p>
122      * 
123      * @param systemID
124      *            is a URL for a document or a file name.
125      * 
126      * @return the newly created Document instance
127      * 
128      * @throws DocumentException
129      *             if an error occurs during parsing.
130      * @throws IOException
131      *             if a URL could not be made for the given File
132      * @throws XmlPullParserException
133      *             DOCUMENT ME!
134      */
135     public Document read(String systemID) throws DocumentException,
136             IOException, XmlPullParserException {
137         if (systemID.indexOf(':') >= 0) {
138             // lets assume its a URL
139             return read(new URL(systemID));
140         } else {
141             // lets assume that we are given a file name
142             return read(new File(systemID));
143         }
144     }
145 
146     /***
147      * <p>
148      * Reads a Document from the given stream
149      * </p>
150      * 
151      * @param in
152      *            <code>InputStream</code> to read from.
153      * 
154      * @return the newly created Document instance
155      * 
156      * @throws DocumentException
157      *             if an error occurs during parsing.
158      * @throws IOException
159      *             DOCUMENT ME!
160      * @throws XmlPullParserException
161      *             DOCUMENT ME!
162      */
163     public Document read(InputStream in) throws DocumentException, IOException,
164             XmlPullParserException {
165         return read(createReader(in));
166     }
167 
168     /***
169      * <p>
170      * Reads a Document from the given <code>Reader</code>
171      * </p>
172      * 
173      * @param reader
174      *            is the reader for the input
175      * 
176      * @return the newly created Document instance
177      * 
178      * @throws DocumentException
179      *             if an error occurs during parsing.
180      * @throws IOException
181      *             DOCUMENT ME!
182      * @throws XmlPullParserException
183      *             DOCUMENT ME!
184      */
185     public Document read(Reader reader) throws DocumentException, IOException,
186             XmlPullParserException {
187         getXPPParser().setInput(reader);
188 
189         return parseDocument();
190     }
191 
192     /***
193      * <p>
194      * Reads a Document from the given array of characters
195      * </p>
196      * 
197      * @param text
198      *            is the text to parse
199      * 
200      * @return the newly created Document instance
201      * 
202      * @throws DocumentException
203      *             if an error occurs during parsing.
204      * @throws IOException
205      *             DOCUMENT ME!
206      * @throws XmlPullParserException
207      *             DOCUMENT ME!
208      */
209     public Document read(char[] text) throws DocumentException, IOException,
210             XmlPullParserException {
211         getXPPParser().setInput(text);
212 
213         return parseDocument();
214     }
215 
216     /***
217      * <p>
218      * Reads a Document from the given stream
219      * </p>
220      * 
221      * @param in
222      *            <code>InputStream</code> to read from.
223      * @param systemID
224      *            is the URI for the input
225      * 
226      * @return the newly created Document instance
227      * 
228      * @throws DocumentException
229      *             if an error occurs during parsing.
230      * @throws IOException
231      *             DOCUMENT ME!
232      * @throws XmlPullParserException
233      *             DOCUMENT ME!
234      */
235     public Document read(InputStream in, String systemID)
236             throws DocumentException, IOException, XmlPullParserException {
237         return read(createReader(in), systemID);
238     }
239 
240     /***
241      * <p>
242      * Reads a Document from the given <code>Reader</code>
243      * </p>
244      * 
245      * @param reader
246      *            is the reader for the input
247      * @param systemID
248      *            is the URI for the input
249      * 
250      * @return the newly created Document instance
251      * 
252      * @throws DocumentException
253      *             if an error occurs during parsing.
254      * @throws IOException
255      *             DOCUMENT ME!
256      * @throws XmlPullParserException
257      *             DOCUMENT ME!
258      */
259     public Document read(Reader reader, String systemID)
260             throws DocumentException, IOException, XmlPullParserException {
261         Document document = read(reader);
262         document.setName(systemID);
263 
264         return document;
265     }
266 
267     // Properties
268     // -------------------------------------------------------------------------
269     public XmlPullParser getXPPParser() throws XmlPullParserException {
270         if (xppParser == null) {
271             xppParser = getXPPFactory().newPullParser();
272         }
273 
274         return xppParser;
275     }
276 
277     public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278         if (xppFactory == null) {
279             xppFactory = XmlPullParserFactory.newInstance();
280         }
281 
282         return xppFactory;
283     }
284 
285     public void setXPPFactory(XmlPullParserFactory xPPFactory) {
286         this.xppFactory = xPPFactory;
287     }
288 
289     /***
290      * DOCUMENT ME!
291      * 
292      * @return the <code>DocumentFactory</code> used to create document
293      *         objects
294      */
295     public DocumentFactory getDocumentFactory() {
296         if (factory == null) {
297             factory = DocumentFactory.getInstance();
298         }
299 
300         return factory;
301     }
302 
303     /***
304      * <p>
305      * This sets the <code>DocumentFactory</code> used to create new
306      * documents. This method allows the building of custom DOM4J tree objects
307      * to be implemented easily using a custom derivation of
308      * {@link DocumentFactory}
309      * </p>
310      * 
311      * @param documentFactory
312      *            <code>DocumentFactory</code> used to create DOM4J objects
313      */
314     public void setDocumentFactory(DocumentFactory documentFactory) {
315         this.factory = documentFactory;
316     }
317 
318     /***
319      * Adds the <code>ElementHandler</code> to be called when the specified
320      * path is encounted.
321      * 
322      * @param path
323      *            is the path to be handled
324      * @param handler
325      *            is the <code>ElementHandler</code> to be called by the event
326      *            based processor.
327      */
328     public void addHandler(String path, ElementHandler handler) {
329         getDispatchHandler().addHandler(path, handler);
330     }
331 
332     /***
333      * Removes the <code>ElementHandler</code> from the event based processor,
334      * for the specified path.
335      * 
336      * @param path
337      *            is the path to remove the <code>ElementHandler</code> for.
338      */
339     public void removeHandler(String path) {
340         getDispatchHandler().removeHandler(path);
341     }
342 
343     /***
344      * When multiple <code>ElementHandler</code> instances have been
345      * registered, this will set a default <code>ElementHandler</code> to be
346      * called for any path which does <b>NOT </b> have a handler registered.
347      * 
348      * @param handler
349      *            is the <code>ElementHandler</code> to be called by the event
350      *            based processor.
351      */
352     public void setDefaultHandler(ElementHandler handler) {
353         getDispatchHandler().setDefaultHandler(handler);
354     }
355 
356     // Implementation methods
357     // -------------------------------------------------------------------------
358     protected Document parseDocument() throws DocumentException, IOException,
359             XmlPullParserException {
360         Document document = getDocumentFactory().createDocument();
361         Element parent = null;
362         XmlPullParser parser = getXPPParser();
363         parser.setNamespaceAware(true);
364 
365         ProxyXmlStartTag startTag = new ProxyXmlStartTag();
366         XmlEndTag endTag = xppFactory.newEndTag();
367 
368         while (true) {
369             int type = parser.next();
370 
371             switch (type) {
372                 case XmlPullParser.END_DOCUMENT:
373                     return document;
374 
375                 case XmlPullParser.START_TAG: {
376                     parser.readStartTag(startTag);
377 
378                     Element newElement = startTag.getElement();
379 
380                     if (parent != null) {
381                         parent.add(newElement);
382                     } else {
383                         document.add(newElement);
384                     }
385 
386                     parent = newElement;
387 
388                     break;
389                 }
390 
391                 case XmlPullParser.END_TAG: {
392                     parser.readEndTag(endTag);
393 
394                     if (parent != null) {
395                         parent = parent.getParent();
396                     }
397 
398                     break;
399                 }
400 
401                 case XmlPullParser.CONTENT: {
402                     String text = parser.readContent();
403 
404                     if (parent != null) {
405                         parent.addText(text);
406                     } else {
407                         String msg = "Cannot have text content outside of the "
408                                 + "root document";
409                         throw new DocumentException(msg);
410                     }
411 
412                     break;
413                 }
414 
415                 default:
416                     throw new DocumentException("Error: unknown type: " + type);
417             }
418         }
419     }
420 
421     protected DispatchHandler getDispatchHandler() {
422         if (dispatchHandler == null) {
423             dispatchHandler = new DispatchHandler();
424         }
425 
426         return dispatchHandler;
427     }
428 
429     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
430         this.dispatchHandler = dispatchHandler;
431     }
432 
433     /***
434      * Factory method to create a Reader from the given InputStream.
435      * 
436      * @param in
437      *            DOCUMENT ME!
438      * 
439      * @return DOCUMENT ME!
440      * 
441      * @throws IOException
442      *             DOCUMENT ME!
443      */
444     protected Reader createReader(InputStream in) throws IOException {
445         return new BufferedReader(new InputStreamReader(in));
446     }
447 }
448 
449 /*
450  * Redistribution and use of this software and associated documentation
451  * ("Software"), with or without modification, are permitted provided that the
452  * following conditions are met:
453  * 
454  * 1. Redistributions of source code must retain copyright statements and
455  * notices. Redistributions must also contain a copy of this document.
456  * 
457  * 2. Redistributions in binary form must reproduce the above copyright notice,
458  * this list of conditions and the following disclaimer in the documentation
459  * and/or other materials provided with the distribution.
460  * 
461  * 3. The name "DOM4J" must not be used to endorse or promote products derived
462  * from this Software without prior written permission of MetaStuff, Ltd. For
463  * written permission, please contact dom4j-info@metastuff.com.
464  * 
465  * 4. Products derived from this Software may not be called "DOM4J" nor may
466  * "DOM4J" appear in their names without prior written permission of MetaStuff,
467  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
468  * 
469  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
470  * 
471  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
472  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
473  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
474  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
475  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
476  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
477  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
478  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
479  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
480  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
481  * POSSIBILITY OF SUCH DAMAGE.
482  * 
483  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
484  */