1
2
3
4
5
6
7
8 package org.dom4j.io;
9
10 import java.io.File;
11 import java.io.InputStream;
12 import java.io.Reader;
13 import java.net.URL;
14 import java.util.HashMap;
15 import java.util.Iterator;
16 import java.util.Map;
17
18 import org.dom4j.Document;
19 import org.dom4j.DocumentException;
20 import org.dom4j.DocumentFactory;
21
22 import org.xml.sax.InputSource;
23 import org.xml.sax.SAXException;
24 import org.xml.sax.XMLReader;
25
26 /***
27 * The SAXModifier reads, modifies and writes XML documents using SAX.
28 *
29 * <p>
30 * Registered {@link ElementModifier}objects can provide modifications to (part
31 * of) the xml tree, while the document is still being processed. This makes it
32 * possible to change large xml documents without having them in memory.
33 * </p>
34 *
35 * <p>
36 * The modified document is written when the {@link XMLWriter}is specified.
37 * </p>
38 *
39 * @author Wonne Keysers (Realsoftware.be)
40 *
41 * @see org.dom4j.io.SAXReader
42 * @see org.dom4j.io.XMLWriter
43 */
44 public class SAXModifier {
45 private XMLWriter xmlWriter;
46
47 private XMLReader xmlReader;
48
49 private boolean pruneElements;
50
51 private SAXModifyReader modifyReader;
52
53 private HashMap modifiers = new HashMap();
54
55 /***
56 * Creates a new modifier. <br>
57 * The XMLReader to parse the source will be created via the
58 * org.xml.sax.driver system property or JAXP if the system property is not
59 * set.
60 */
61 public SAXModifier() {
62 }
63
64 /***
65 * Creates a new modifier. <br>
66 * The XMLReader to parse the source will be created via the
67 * org.xml.sax.driver system property or JAXP if the system property is not
68 * set.
69 *
70 * @param pruneElements
71 * Set to true when the modified document must NOT be kept in
72 * memory.
73 */
74 public SAXModifier(boolean pruneElements) {
75 this.pruneElements = pruneElements;
76 }
77
78 /***
79 * Creates a new modifier that will the specified {@link
80 * org.xml.sax.XMLReader} to parse the source.
81 *
82 * @param xmlReader
83 * The XMLReader to use
84 */
85 public SAXModifier(XMLReader xmlReader) {
86 this.xmlReader = xmlReader;
87 }
88
89 /***
90 * Creates a new modifier that will the specified {@link
91 * org.xml.sax.XMLReader} to parse the source.
92 *
93 * @param xmlReader
94 * The XMLReader to use
95 * @param pruneElements
96 * Set to true when the modified document must NOT be kept in
97 * memory.
98 */
99 public SAXModifier(XMLReader xmlReader, boolean pruneElements) {
100 this.xmlReader = xmlReader;
101 }
102
103 /***
104 * Reads a Document from the given {@link java.io.File}and writes it to the
105 * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
106 * objects are invoked on the fly.
107 *
108 * @param source
109 * is the <code>File</code> to read from.
110 *
111 * @return the newly created Document instance
112 *
113 * @throws DocumentException
114 * DocumentException org.dom4j.DocumentException} if an error
115 * occurs during parsing.
116 */
117 public Document modify(File source) throws DocumentException {
118 try {
119 return installModifyReader().read(source);
120 } catch (SAXModifyException ex) {
121 Throwable cause = ex.getCause();
122 throw new DocumentException(cause.getMessage(), cause);
123 }
124 }
125
126 /***
127 * Reads a Document from the given {@link org.xml.sax.InputSource}and
128 * writes it to the specified {@link XMLWriter}using SAX. Registered
129 * {@link ElementModifier}objects are invoked on the fly.
130 *
131 * @param source
132 * is the <code>org.xml.sax.InputSource</code> to read from.
133 *
134 * @return the newly created Document instance
135 *
136 * @throws DocumentException
137 * DocumentException org.dom4j.DocumentException} if an error
138 * occurs during parsing.
139 */
140 public Document modify(InputSource source) throws DocumentException {
141 try {
142 return installModifyReader().read(source);
143 } catch (SAXModifyException ex) {
144 Throwable cause = ex.getCause();
145 throw new DocumentException(cause.getMessage(), cause);
146 }
147 }
148
149 /***
150 * Reads a Document from the given {@link java.io.InputStream}and writes it
151 * to the specified {@link XMLWriter}using SAX. Registered {@link
152 * ElementModifier} objects are invoked on the fly.
153 *
154 * @param source
155 * is the <code>java.io.InputStream</code> to read from.
156 *
157 * @return the newly created Document instance
158 *
159 * @throws DocumentException
160 * DocumentException org.dom4j.DocumentException} if an error
161 * occurs during parsing.
162 */
163 public Document modify(InputStream source) throws DocumentException {
164 try {
165 return installModifyReader().read(source);
166 } catch (SAXModifyException ex) {
167 Throwable cause = ex.getCause();
168 throw new DocumentException(cause.getMessage(), cause);
169 }
170 }
171
172 /***
173 * Reads a Document from the given {@link java.io.InputStream}and writes it
174 * to the specified {@link XMLWriter}using SAX. Registered {@link
175 * ElementModifier} objects are invoked on the fly.
176 *
177 * @param source
178 * is the <code>java.io.InputStream</code> to read from.
179 * @param systemId
180 * DOCUMENT ME!
181 *
182 * @return the newly created Document instance
183 *
184 * @throws DocumentException
185 * DocumentException org.dom4j.DocumentException} if an error
186 * occurs during parsing.
187 */
188 public Document modify(InputStream source, String systemId)
189 throws DocumentException {
190 try {
191 return installModifyReader().read(source);
192 } catch (SAXModifyException ex) {
193 Throwable cause = ex.getCause();
194 throw new DocumentException(cause.getMessage(), cause);
195 }
196 }
197
198 /***
199 * Reads a Document from the given {@link java.io.Reader}and writes it to
200 * the specified {@link XMLWriter}using SAX. Registered {@link
201 * ElementModifier} objects are invoked on the fly.
202 *
203 * @param source
204 * is the <code>java.io.Reader</code> to read from.
205 *
206 * @return the newly created Document instance
207 *
208 * @throws DocumentException
209 * DocumentException org.dom4j.DocumentException} if an error
210 * occurs during parsing.
211 */
212 public Document modify(Reader source) throws DocumentException {
213 try {
214 return installModifyReader().read(source);
215 } catch (SAXModifyException ex) {
216 Throwable cause = ex.getCause();
217 throw new DocumentException(cause.getMessage(), cause);
218 }
219 }
220
221 /***
222 * Reads a Document from the given {@link java.io.Reader}and writes it to
223 * the specified {@link XMLWriter}using SAX. Registered {@link
224 * ElementModifier} objects are invoked on the fly.
225 *
226 * @param source
227 * is the <code>java.io.Reader</code> to read from.
228 * @param systemId
229 * DOCUMENT ME!
230 *
231 * @return the newly created Document instance
232 *
233 * @throws DocumentException
234 * DocumentException org.dom4j.DocumentException} if an error
235 * occurs during parsing.
236 */
237 public Document modify(Reader source, String systemId)
238 throws DocumentException {
239 try {
240 return installModifyReader().read(source);
241 } catch (SAXModifyException ex) {
242 Throwable cause = ex.getCause();
243 throw new DocumentException(cause.getMessage(), cause);
244 }
245 }
246
247 /***
248 * Reads a Document from the given {@link java.net.URL}and writes it to the
249 * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
250 * objects are invoked on the fly.
251 *
252 * @param source
253 * is the <code>java.net.URL</code> to read from.
254 *
255 * @return the newly created Document instance
256 *
257 * @throws DocumentException
258 * DocumentException org.dom4j.DocumentException} if an error
259 * occurs during parsing.
260 */
261 public Document modify(URL source) throws DocumentException {
262 try {
263 return installModifyReader().read(source);
264 } catch (SAXModifyException ex) {
265 Throwable cause = ex.getCause();
266 throw new DocumentException(cause.getMessage(), cause);
267 }
268 }
269
270 /***
271 * Reads a Document from the given URL or filename and writes it to the
272 * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
273 * objects are invoked on the fly.
274 *
275 * @param source
276 * is the URL or filename to read from.
277 *
278 * @return the newly created Document instance
279 *
280 * @throws DocumentException
281 * DocumentException org.dom4j.DocumentException} if an error
282 * occurs during parsing.
283 */
284 public Document modify(String source) throws DocumentException {
285 try {
286 return installModifyReader().read(source);
287 } catch (SAXModifyException ex) {
288 Throwable cause = ex.getCause();
289 throw new DocumentException(cause.getMessage(), cause);
290 }
291 }
292
293 /***
294 * Adds the {@link ElementModifier}to be called when the specified element
295 * path is encounted while parsing the source.
296 *
297 * @param path
298 * The element path to be handled
299 * @param modifier
300 * The {@link ElementModifier}to be called by the event based
301 * processor.
302 */
303 public void addModifier(String path, ElementModifier modifier) {
304 this.modifiers.put(path, modifier);
305 }
306
307 /***
308 * Removes all registered {@link ElementModifier}instances from the event
309 * based processor.
310 */
311 public void resetModifiers() {
312 this.modifiers.clear();
313 getSAXModifyReader().resetHandlers();
314 }
315
316 /***
317 * Removes the {@link ElementModifier}from the event based processor, for
318 * the specified element path.
319 *
320 * @param path
321 * The path to remove the {@link ElementModifier}for.
322 */
323 public void removeModifier(String path) {
324 this.modifiers.remove(path);
325 getSAXModifyReader().removeHandler(path);
326 }
327
328 /***
329 * Get the {@link org.dom4j.DocumentFactory}used to create the DOM4J
330 * document structure
331 *
332 * @return <code>DocumentFactory</code> that will be used
333 */
334 public DocumentFactory getDocumentFactory() {
335 return getSAXModifyReader().getDocumentFactory();
336 }
337
338 /***
339 * Sets the {@link org.dom4j.DocumentFactory}used to create the DOM4J
340 * document tree.
341 *
342 * @param factory
343 * <code>DocumentFactory</code> to be used
344 */
345 public void setDocumentFactory(DocumentFactory factory) {
346 getSAXModifyReader().setDocumentFactory(factory);
347 }
348
349 /***
350 * Returns the current {@link XMLWriter}.
351 *
352 * @return XMLWriter
353 */
354 public XMLWriter getXMLWriter() {
355 return this.xmlWriter;
356 }
357
358 /***
359 * Sets the {@link XMLWriter}used to write the modified document.
360 *
361 * @param writer
362 * The writer to use.
363 */
364 public void setXMLWriter(XMLWriter writer) {
365 this.xmlWriter = writer;
366 }
367
368 /***
369 * Returns true when xml elements are not kept in memory while parsing. The
370 * {@link org.dom4j.Document}returned by the modify methods will be null.
371 *
372 * @return Returns the pruneElements.
373 */
374 public boolean isPruneElements() {
375 return pruneElements;
376 }
377
378 private SAXReader installModifyReader() throws DocumentException {
379 try {
380 SAXModifyReader reader = getSAXModifyReader();
381
382 if (isPruneElements()) {
383 modifyReader.setDispatchHandler(new PruningDispatchHandler());
384 }
385
386 reader.resetHandlers();
387
388 Iterator modifierIt = this.modifiers.entrySet().iterator();
389
390 while (modifierIt.hasNext()) {
391 Map.Entry entry = (Map.Entry) modifierIt.next();
392
393 SAXModifyElementHandler handler = new SAXModifyElementHandler(
394 (ElementModifier) entry.getValue());
395 reader.addHandler((String) entry.getKey(), handler);
396 }
397
398 reader.setXMLWriter(getXMLWriter());
399 reader.setXMLReader(getXMLReader());
400
401 return reader;
402 } catch (SAXException ex) {
403 throw new DocumentException(ex.getMessage(), ex);
404 }
405 }
406
407 private XMLReader getXMLReader() throws SAXException {
408 if (this.xmlReader == null) {
409 xmlReader = SAXHelper.createXMLReader(false);
410 }
411
412 return this.xmlReader;
413 }
414
415 private SAXModifyReader getSAXModifyReader() {
416 if (modifyReader == null) {
417 modifyReader = new SAXModifyReader();
418 }
419
420 return modifyReader;
421 }
422 }
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459