XMLAccess xref

View Javadoc
1   /*
2    * Copyright 1999-2006 Faculty of Mathematics, Physics and Informatics, Comenius
3    * University, Bratislava. This file is protected by the Mozilla Public License
4    * version 1.1 (the "License"); you may not use this file except in compliance
5    * with the License. You may obtain a copy of the License at
6    * http://euromath2.sourceforge.net/license.html Unless required by applicable
7    * law or agreed to in writing, software distributed under the License is
8    * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
9    * KIND, either express or implied. See the License for the specific language
10   * governing permissions and limitations under the License.
11   */
12  package sk.uniba.euromath.document;
13  import java.io.IOException;
14  import java.io.OutputStream;
15  import java.net.MalformedURLException;
16  import java.net.URISyntaxException;
17  import java.net.URL;
18  import java.util.Collections;
19  import java.util.HashSet;
20  import java.util.Map;
21  import java.util.Set;
22  import java.util.WeakHashMap;
23  import org.apache.commons.lang.StringUtils;
24  import org.w3c.dom.CDATASection;
25  import org.w3c.dom.CharacterData;
26  import org.w3c.dom.Document;
27  import org.w3c.dom.DocumentFragment;
28  import org.w3c.dom.Element;
29  import org.w3c.dom.Node;
30  import org.w3c.dom.Text;
31  import org.w3c.dom.ls.DOMImplementationLS;
32  import org.w3c.dom.ls.LSOutput;
33  import org.w3c.dom.ls.LSSerializer;
34  import org.w3c.dom.traversal.DocumentTraversal;
35  import org.w3c.dom.traversal.NodeFilter;
36  import org.w3c.dom.traversal.NodeIterator;
37  import sk.baka.ikslibs.UndoManager;
38  import sk.baka.ikslibs.ids.FailEnum;
39  import sk.baka.ikslibs.ids.IDManager;
40  import sk.baka.ikslibs.modify.DOMMutils;
41  import sk.baka.ikslibs.ptr.DomPointer;
42  import sk.baka.ikslibs.ref.EntityManager;
43  import sk.baka.ikslibs.splitted.ISplittedListener;
44  import sk.baka.ikslibs.splitted.SplittedDocChangeCollector;
45  import sk.baka.ikslibs.splitted.SplittedDocHolder;
46  import sk.baka.xml.gene.ExportUtils;
47  import sk.baka.xml.schematic.DocumentSchema;
48  import sk.baka.xml.schematic.pluginterface.SchemaException;
49  import sk.uniba.euromath.document.schema.SchematicUtils;
50  import sk.uniba.euromath.tools.URLDir;
51  /***
52   * Provides access to the document. Accessible from out of document package.
53   * Class is not intended to be instantiated by clients.
54   * @author Martin Vysny
55   */
56  public final class XMLAccess {
57  	/***
58  	 * Enclosed document.
59  	 */
60  	private final Document doc;
61  	/***
62  	 * Manages ID for this document.
63  	 */
64  	private final IDManager idMan;
65  	/***
66  	 * Manages namespaces and prefixes for this document.
67  	 */
68  	private final NamespaceManager nsManager;
69  	/***
70  	 * The document schema instance.
71  	 */
72  	private final DocumentSchema schema;
73  	/***
74  	 * The entity manager.
75  	 */
76  	private final EntityManager entityManager;
77  	/***
78  	 * Splitted document.
79  	 */
80  	private final SplittedDocHolder splittedDoc;
81  	/***
82  	 * Collects changes in the splitted document.
83  	 */
84  	private final SplittedDocChangeCollector splittedChanges;
85  	/***
86  	 * Returns object that collects changes in the splitted document.
87  	 * @return object that collects changes in the splitted document, never
88  	 * <code>null</code>.
89  	 */
90  	public final SplittedDocChangeCollector getSplittedChanges() {
91  		return splittedChanges;
92  	}
93  	/***
94  	 * The name of the document file, including the extension, without the path
95  	 * specifier.
96  	 */
97  	public final String fileName;
98  	/***
99  	 * The name of the document file, including the extension and full path. URI
100 	 * address.
101 	 */
102 	public final URL fileURL;
103 	/***
104 	 * URL pointing to the directory where the document is located.
105 	 */
106 	private final URLDir root;
107 	/***
108 	 * Table of views, that manages the transformations over the document. Value
109 	 * is ignored.
110 	 */
111 	final Map<DocumentView, Object> views = new WeakHashMap<DocumentView, Object>();
112 	/***
113 	 * The document modifier instance.
114 	 */
115 	private final DocumentModifier docModifier;
116 	/***
117 	 * DocumentModifyHelper instace for clients.
118 	 */
119 	private final DocumentModifyHelper docModifyHelper;
120 	/***
121 	 * The document listeners manager instance.
122 	 */
123 	private final DocumentListeners docListeners;
124 	/***
125 	 * The undo manager.
126 	 */
127 	private final UndoManager undoManager;
128 	/***
129 	 * Constructs a new instance of XMLAccess. For each opened document there is
130 	 * exactly one instance. Produced via the {@link DocumentFactory} factory.
131 	 * @param doc Document that this object will work with.
132 	 * @param textEntities all entities encountered during the parse.
133 	 * @param root URL pointing to the directory where the document is located.
134 	 * @param fileName the name of the document file, including the extension,
135 	 * without the path specifier.
136 	 * @throws DocumentException when error occurs during document processing.
137 	 */
138 	XMLAccess(Document doc, Map<String, String> textEntities, URLDir root,
139 			String fileName) throws DocumentException {
140 		super();
141 		this.doc = doc;
142 		this.root = root;
143 		this.fileName = fileName;
144 		try {
145 			this.fileURL = root.resolve(fileName);
146 		} catch (URISyntaxException ex) {
147 			throw new DocumentException(ex);
148 		} catch (MalformedURLException ex) {
149 			throw new DocumentException(ex);
150 		}
151 		idMan = new IDManager(doc, ExportUtils.GENE_ID_ATTRIBUTE_QNAME,
152 				FailEnum.FAIL_WHEN_ID_EXISTS);
153 		idMan.observe(doc);
154 		entityManager = new EntityManager(doc);
155 		entityManager.createEntities(textEntities, true);
156 		nsManager = new NamespaceManager(doc);
157 		nsManager.observe(doc);
158 		removeWhitespaces();
159 		schema = new DocumentSchema(doc, SchematicUtils.getPool());
160 		splittedChanges = new SplittedDocChangeCollector();
161 		splittedDoc = SplittedDocHolder.newFromDocument(doc,
162 				SplittedDocHolder.PI_GENEREF_TARGET, Collections
163 						.singletonList((ISplittedListener) splittedChanges));
164 		docListeners = new DocumentListeners(this);
165 		undoManager = new UndoManager();
166 		docModifier = new DocumentModifier(this, views.keySet(), docListeners);
167 		docModifyHelper = new DocumentModifyHelper(this);
168 		undoManager.observe(doc);
169 	}
170 	/***
171 	 * Whitespace-normalizes all text nodes.
172 	 */
173 	private void removeWhitespaces() {
174 		if (doc.getDocumentElement() != null) {
175 			// remove all whitespaced text nodes.
176 			NodeIterator iterator = ((DocumentTraversal) doc)
177 					.createNodeIterator(doc.getDocumentElement(),
178 							NodeFilter.SHOW_TEXT, null, false);
179 			Node actNode = iterator.nextNode();
180 			while (actNode != null) {
181 				if (isNodeContentEmpty(actNode)) {
182 					// the node has empty or whitespace-only content. remove it.
183 					iterator.previousNode();
184 					actNode.getParentNode().removeChild(actNode);
185 				} else {
186 					DOMMutils.normalizeWhitespaces(actNode);
187 				}
188 				// get next element
189 				actNode = iterator.nextNode();
190 			}
191 			iterator.detach();
192 		}
193 	}
194 	/***
195 	 * Returns true if node has empty text contents (the <code>Text</code> or
196 	 * <code>CDATASection</code> node), or contains whitespaces only (
197 	 * <code>Text</code>).
198 	 * @param node node to check.
199 	 * @return true if <code>node.getData()</code> returns <code>null</code>
200 	 * or empty string.
201 	 */
202 	private boolean isNodeContentEmpty(Node node) {
203 		if (!(node instanceof Text) && !(node instanceof CDATASection))
204 			return false;
205 		final String data = ((CharacterData) node).getData();
206 		if (StringUtils.isEmpty(data))
207 			return true;
208 		if (node instanceof CDATASection)
209 			return false;
210 		// test for whitespaces
211 		for (int i = 0; i < data.length(); i++) {
212 			char c = data.charAt(i);
213 			if (!Character.isWhitespace(c))
214 				return false;
215 		}
216 		return true;
217 	}
218 	/***
219 	 * Checks the node if it is from our document.
220 	 * @param node node to check.
221 	 * @throws IllegalArgumentException if node is not from bound document
222 	 */
223 	public void checkNode(Node node) {
224 		if (!isOurNode(node))
225 			throw new IllegalArgumentException("Node is not from our document."); //$NON-NLS-1$
226 	}
227 	/***
228 	 * Checks the pointer if it points into our document.
229 	 * @param ptr pointer to check.
230 	 * @throws IllegalArgumentException if pointer is not from bound document
231 	 */
232 	public void checkPtr(DomPointer ptr) {
233 		if ((ptr.getDocument() == null) || !isOurNode(ptr.getDocument()))
234 			throw new IllegalArgumentException(
235 					"Pointer is not from our document."); //$NON-NLS-1$
236 	}
237 	/***
238 	 * Checks the node if it is from our document.
239 	 * @param node node to check.
240 	 * @return true if the node was created by this document, false otherwise.
241 	 */
242 	public boolean isOurNode(Node node) {
243 		if (node.getNodeType() == Node.DOCUMENT_NODE)
244 			return node == doc;
245 		return (node.getOwnerDocument() == doc);
246 	}
247 	/***
248 	 * Returns <code>IDManager</code> instance for this document.
249 	 * @return An <code>IDManager</code> for this document.
250 	 */
251 	public IDManager getIDManager() {
252 		return idMan;
253 	}
254 	/***
255 	 * Returns <code>DomToSplitted</code> instance for this document.
256 	 * @return An <code>DomToSplitted</code> for this document.
257 	 */
258 	public SplittedDocHolder getSplittedDoc() {
259 		return splittedDoc;
260 	}
261 	/***
262 	 * Returns the nametree from the original document, that the
263 	 * <code>emp:mark</code> element points to.
264 	 * @param mark the mark element, or <code>null</code>.
265 	 * @return the nametree represented as a document fragment. This fragment
266 	 * must not be modified. If <code>null</code> is provided then root
267 	 * fragment is returned.
268 	 * @throws IllegalArgumentException if the element is not
269 	 * <code>emp:mark</code> or the ID does not exist.
270 	 */
271 	public DocumentFragment getSource(Node mark) {
272 		if (mark == null)
273 			return splittedDoc.getRootFragment();
274 		return splittedDoc.getDomFragment(splittedDoc.getRef(mark));
275 	}
276 	/***
277 	 * Opens new view on the document. The view must be initialized.
278 	 * @param view the view to register.
279 	 */
280 	public void openView(final DocumentView view) {
281 		if (!view.isInitialized())
282 			throw new IllegalArgumentException("The view is not initialized");//$NON-NLS-1$
283 		if (view.isClosed())
284 			throw new IllegalArgumentException("The view is closed");//$NON-NLS-1$
285 		views.put(view, null);
286 	}
287 	/***
288 	 * Unregisters the view. The view is no more used by the transformation
289 	 * engine and is subject to garbage collection.
290 	 * @param view the document view to close. Fails if the view was not opened
291 	 * for this document.
292 	 */
293 	public void closeView(final DocumentView view) {
294 		if (views.remove(view) == null)
295 			throw new IllegalArgumentException("Illegal view."); //$NON-NLS-1$
296 		view.close();
297 	}
298 	/***
299 	 * Returns instance of the undo manager for this document.
300 	 * @return the undo manager.
301 	 */
302 	public UndoManager getUndoManager() {
303 		return undoManager;
304 	}
305 	/***
306 	 * Returns all namespaces present in the document.
307 	 * @return set of namespaces. <code>null</code> namespace (nor empty
308 	 * namespace) does not occur in the returned set. Deprecated, use namespace
309 	 * manager methods.
310 	 */
311 	@Deprecated
312 	public Set<String> getAllNamespaces() {
313 		final Set<String> result = new HashSet<String>(getNsManager()
314 				.getAllNamespaces());
315 		result.remove(""); //$NON-NLS-1$
316 		return result;
317 	}
318 	/***
319 	 * <p>
320 	 * Access to the XML document. This document has this special feature: Every
321 	 * element has {@link ExportUtils#GENE_ID_ATTRIBUTE_QNAME} attribute,
322 	 * denoting ID of that element.
323 	 * </p>
324 	 * @return the DOM document.
325 	 */
326 	public Document getDocument() {
327 		return doc;
328 	}
329 	/***
330 	 * Validates this document.
331 	 * @throws SchemaException if something goes wrong in the process of
332 	 * validation.
333 	 */
334 	public void validate() throws SchemaException {
335 		getSchema().validate();
336 	}
337 	/***
338 	 * Returns document modifier, which is used to transparently modify the
339 	 * document.
340 	 * @return the document modifier instance.
341 	 */
342 	public DocumentModifier getModifier() {
343 		return docModifier;
344 	}
345 	/***
346 	 * Loads global schemas for all namespaces, present in the document, for
347 	 * which no local schema was loaded. It must be called before the Schema
348 	 * interface is used, to ensure that all schemata are properly loaded.
349 	 * @throws SchemaException if error happens during loading of schemas.
350 	 * @throws IOException if i/o error occurs
351 	 */
352 	public void loadGlobalSchemas() throws SchemaException, IOException {
353 		final Set<String> allNamespaces = new HashSet<String>(getNsManager()
354 				.getAllNamespaces());
355 		allNamespaces.remove(""); //$NON-NLS-1$
356 		getSchema().getRefs().loadSchemas(allNamespaces);
357 	}
358 	/***
359 	 * Document's content modifier helper. For chosen operation returns all
360 	 * possibilities of document modification, that will result in valid
361 	 * document.
362 	 * @return the document schema instance.
363 	 */
364 	public DocumentSchema getSchema() {
365 		return schema;
366 	}
367 	/***
368 	 * Returns namespace manager for this document.
369 	 * @return namespace manager for this document.
370 	 */
371 	public NamespaceManager getNsManager() {
372 		return nsManager;
373 	}
374 	/***
375 	 * Returns entity manager for this document.
376 	 * @return entity manager for this document.
377 	 */
378 	public sk.baka.ikslibs.ref.EntityManager getEntityManager() {
379 		return entityManager;
380 	}
381 	/***
382 	 * Serializes in-memory document to specified output stream.
383 	 * @param out stream, where to store saved xml.
384 	 * @param encoding the encoding. If <code>null</code>, then UTF-8 is
385 	 * used.
386 	 * @param prettyFormatting if output xml will be pretty-formatted - readable
387 	 * by user. Warning: this adds some whitespaces to xml, thus modifying
388 	 * result xml. User must be sure that these whitespaces are discardable.
389 	 */
390 	public void saveDocument(OutputStream out, String encoding,
391 			boolean prettyFormatting) {
392 		final DOMImplementationLS impl = (DOMImplementationLS) doc
393 				.getImplementation();
394 		final LSSerializer serializer = impl.createLSSerializer();
395 		final LSOutput output = impl.createLSOutput();
396 		output.setEncoding(encoding == null ? "UTF-8" : encoding); //$NON-NLS-1$
397 		output.setByteStream(out);
398 		// duplicates document and prepares it for output.
399 		final Document serializeDoc = (Document) doc.cloneNode(true);
400 		// removes all emp:ids
401 		final NodeIterator i = ((DocumentTraversal) serializeDoc)
402 				.createNodeIterator(serializeDoc.getDocumentElement(),
403 						NodeFilter.SHOW_ELEMENT, null, false);
404 		Element actNode = (Element) i.nextNode();
405 		while (actNode != null) {
406 			// remove gene:id attribute
407 			actNode.removeAttributeNS(ExportUtils.GENE_ID_ATTRIBUTE_QNAME
408 					.getNamespaceURI(), ExportUtils.GENE_ID_ATTRIBUTE_QNAME
409 					.getLocalPart());
410 			// get next element
411 			actNode = (Element) i.nextNode();
412 		}
413 		i.detach();
414 		// create namespace nodes
415 		getNsManager().createXmlnsAttributes(serializeDoc.getDocumentElement());
416 		// serialize to stream
417 		serializer.write(serializeDoc, output);
418 	}
419 	/***
420 	 * Returns encoding, in which the document is serialized. May return
421 	 * <code>null</code> if the encoding was not specified.
422 	 * @return the encoding.
423 	 */
424 	public String getEncoding() {
425 		return doc.getXmlEncoding();
426 	}
427 	/***
428 	 * Serializes transformed document "as-is", no emp:id removal is performed.
429 	 * Used only for debug. Serializes only root fragment.
430 	 * @param out the outputstream.
431 	 */
432 	public void serialize(OutputStream out) {
433 		final DOMImplementationLS impl = (DOMImplementationLS) doc
434 				.getImplementation();
435 		final LSSerializer serializer = impl.createLSSerializer();
436 		final LSOutput output = impl.createLSOutput();
437 		output.setEncoding("UTF-8"); //$NON-NLS-1$
438 		output.setByteStream(out);
439 		// serialize to stream
440 		serializer.write(getSplittedDoc().getRootFragment(), output);
441 	}
442 	/***
443 	 * Returns document listeners manager for this document.
444 	 * @return document listeners manager for this document.
445 	 */
446 	public DocumentListeners getListeners() {
447 		return docListeners;
448 	}
449 	/***
450 	 * Returns full URI address of the document.
451 	 * @return document location.
452 	 */
453 	public URL getDocumentURL() {
454 		return fileURL;
455 	}
456 	/***
457 	 * Returns the name of the document file, including the extension, without
458 	 * the path specifier.
459 	 * @return the file name without the path.
460 	 */
461 	public String getFileName() {
462 		return fileName;
463 	}
464 	/***
465 	 * Returns URL pointing to the directory where the document is located.
466 	 * @return location of the XML file.
467 	 */
468 	public URLDir getRoot() {
469 		return root;
470 	}
471 	/***
472 	 * Returns clients Helper for modifying.
473 	 * @return DocumentModifyHelper instance
474 	 */
475 	public DocumentModifyHelper getDocumentModifyHelper() {
476 		return docModifyHelper;
477 	}
478 }