1
2
3
4
5
6
7
8
9
10
11
12 package sk.uniba.euromath.document;
13 import java.io.IOException;
14 import java.io.OutputStream;
15 import java.net.MalformedURLException;
16 import java.net.URISyntaxException;
17 import java.net.URL;
18 import java.util.Collections;
19 import java.util.HashSet;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.WeakHashMap;
23 import org.apache.commons.lang.StringUtils;
24 import org.w3c.dom.CDATASection;
25 import org.w3c.dom.CharacterData;
26 import org.w3c.dom.Document;
27 import org.w3c.dom.DocumentFragment;
28 import org.w3c.dom.Element;
29 import org.w3c.dom.Node;
30 import org.w3c.dom.Text;
31 import org.w3c.dom.ls.DOMImplementationLS;
32 import org.w3c.dom.ls.LSOutput;
33 import org.w3c.dom.ls.LSSerializer;
34 import org.w3c.dom.traversal.DocumentTraversal;
35 import org.w3c.dom.traversal.NodeFilter;
36 import org.w3c.dom.traversal.NodeIterator;
37 import sk.baka.ikslibs.UndoManager;
38 import sk.baka.ikslibs.ids.FailEnum;
39 import sk.baka.ikslibs.ids.IDManager;
40 import sk.baka.ikslibs.modify.DOMMutils;
41 import sk.baka.ikslibs.ptr.DomPointer;
42 import sk.baka.ikslibs.ref.EntityManager;
43 import sk.baka.ikslibs.splitted.ISplittedListener;
44 import sk.baka.ikslibs.splitted.SplittedDocChangeCollector;
45 import sk.baka.ikslibs.splitted.SplittedDocHolder;
46 import sk.baka.xml.gene.ExportUtils;
47 import sk.baka.xml.schematic.DocumentSchema;
48 import sk.baka.xml.schematic.pluginterface.SchemaException;
49 import sk.uniba.euromath.document.schema.SchematicUtils;
50 import sk.uniba.euromath.tools.URLDir;
51 /***
52 * Provides access to the document. Accessible from out of document package.
53 * Class is not intended to be instantiated by clients.
54 * @author Martin Vysny
55 */
56 public final class XMLAccess {
57 /***
58 * Enclosed document.
59 */
60 private final Document doc;
61 /***
62 * Manages ID for this document.
63 */
64 private final IDManager idMan;
65 /***
66 * Manages namespaces and prefixes for this document.
67 */
68 private final NamespaceManager nsManager;
69 /***
70 * The document schema instance.
71 */
72 private final DocumentSchema schema;
73 /***
74 * The entity manager.
75 */
76 private final EntityManager entityManager;
77 /***
78 * Splitted document.
79 */
80 private final SplittedDocHolder splittedDoc;
81 /***
82 * Collects changes in the splitted document.
83 */
84 private final SplittedDocChangeCollector splittedChanges;
85 /***
86 * Returns object that collects changes in the splitted document.
87 * @return object that collects changes in the splitted document, never
88 * <code>null</code>.
89 */
90 public final SplittedDocChangeCollector getSplittedChanges() {
91 return splittedChanges;
92 }
93 /***
94 * The name of the document file, including the extension, without the path
95 * specifier.
96 */
97 public final String fileName;
98 /***
99 * The name of the document file, including the extension and full path. URI
100 * address.
101 */
102 public final URL fileURL;
103 /***
104 * URL pointing to the directory where the document is located.
105 */
106 private final URLDir root;
107 /***
108 * Table of views, that manages the transformations over the document. Value
109 * is ignored.
110 */
111 final Map<DocumentView, Object> views = new WeakHashMap<DocumentView, Object>();
112 /***
113 * The document modifier instance.
114 */
115 private final DocumentModifier docModifier;
116 /***
117 * DocumentModifyHelper instace for clients.
118 */
119 private final DocumentModifyHelper docModifyHelper;
120 /***
121 * The document listeners manager instance.
122 */
123 private final DocumentListeners docListeners;
124 /***
125 * The undo manager.
126 */
127 private final UndoManager undoManager;
128 /***
129 * Constructs a new instance of XMLAccess. For each opened document there is
130 * exactly one instance. Produced via the {@link DocumentFactory} factory.
131 * @param doc Document that this object will work with.
132 * @param textEntities all entities encountered during the parse.
133 * @param root URL pointing to the directory where the document is located.
134 * @param fileName the name of the document file, including the extension,
135 * without the path specifier.
136 * @throws DocumentException when error occurs during document processing.
137 */
138 XMLAccess(Document doc, Map<String, String> textEntities, URLDir root,
139 String fileName) throws DocumentException {
140 super();
141 this.doc = doc;
142 this.root = root;
143 this.fileName = fileName;
144 try {
145 this.fileURL = root.resolve(fileName);
146 } catch (URISyntaxException ex) {
147 throw new DocumentException(ex);
148 } catch (MalformedURLException ex) {
149 throw new DocumentException(ex);
150 }
151 idMan = new IDManager(doc, ExportUtils.GENE_ID_ATTRIBUTE_QNAME,
152 FailEnum.FAIL_WHEN_ID_EXISTS);
153 idMan.observe(doc);
154 entityManager = new EntityManager(doc);
155 entityManager.createEntities(textEntities, true);
156 nsManager = new NamespaceManager(doc);
157 nsManager.observe(doc);
158 removeWhitespaces();
159 schema = new DocumentSchema(doc, SchematicUtils.getPool());
160 splittedChanges = new SplittedDocChangeCollector();
161 splittedDoc = SplittedDocHolder.newFromDocument(doc,
162 SplittedDocHolder.PI_GENEREF_TARGET, Collections
163 .singletonList((ISplittedListener) splittedChanges));
164 docListeners = new DocumentListeners(this);
165 undoManager = new UndoManager();
166 docModifier = new DocumentModifier(this, views.keySet(), docListeners);
167 docModifyHelper = new DocumentModifyHelper(this);
168 undoManager.observe(doc);
169 }
170 /***
171 * Whitespace-normalizes all text nodes.
172 */
173 private void removeWhitespaces() {
174 if (doc.getDocumentElement() != null) {
175
176 NodeIterator iterator = ((DocumentTraversal) doc)
177 .createNodeIterator(doc.getDocumentElement(),
178 NodeFilter.SHOW_TEXT, null, false);
179 Node actNode = iterator.nextNode();
180 while (actNode != null) {
181 if (isNodeContentEmpty(actNode)) {
182
183 iterator.previousNode();
184 actNode.getParentNode().removeChild(actNode);
185 } else {
186 DOMMutils.normalizeWhitespaces(actNode);
187 }
188
189 actNode = iterator.nextNode();
190 }
191 iterator.detach();
192 }
193 }
194 /***
195 * Returns true if node has empty text contents (the <code>Text</code> or
196 * <code>CDATASection</code> node), or contains whitespaces only (
197 * <code>Text</code>).
198 * @param node node to check.
199 * @return true if <code>node.getData()</code> returns <code>null</code>
200 * or empty string.
201 */
202 private boolean isNodeContentEmpty(Node node) {
203 if (!(node instanceof Text) && !(node instanceof CDATASection))
204 return false;
205 final String data = ((CharacterData) node).getData();
206 if (StringUtils.isEmpty(data))
207 return true;
208 if (node instanceof CDATASection)
209 return false;
210
211 for (int i = 0; i < data.length(); i++) {
212 char c = data.charAt(i);
213 if (!Character.isWhitespace(c))
214 return false;
215 }
216 return true;
217 }
218 /***
219 * Checks the node if it is from our document.
220 * @param node node to check.
221 * @throws IllegalArgumentException if node is not from bound document
222 */
223 public void checkNode(Node node) {
224 if (!isOurNode(node))
225 throw new IllegalArgumentException("Node is not from our document.");
226 }
227 /***
228 * Checks the pointer if it points into our document.
229 * @param ptr pointer to check.
230 * @throws IllegalArgumentException if pointer is not from bound document
231 */
232 public void checkPtr(DomPointer ptr) {
233 if ((ptr.getDocument() == null) || !isOurNode(ptr.getDocument()))
234 throw new IllegalArgumentException(
235 "Pointer is not from our document.");
236 }
237 /***
238 * Checks the node if it is from our document.
239 * @param node node to check.
240 * @return true if the node was created by this document, false otherwise.
241 */
242 public boolean isOurNode(Node node) {
243 if (node.getNodeType() == Node.DOCUMENT_NODE)
244 return node == doc;
245 return (node.getOwnerDocument() == doc);
246 }
247 /***
248 * Returns <code>IDManager</code> instance for this document.
249 * @return An <code>IDManager</code> for this document.
250 */
251 public IDManager getIDManager() {
252 return idMan;
253 }
254 /***
255 * Returns <code>DomToSplitted</code> instance for this document.
256 * @return An <code>DomToSplitted</code> for this document.
257 */
258 public SplittedDocHolder getSplittedDoc() {
259 return splittedDoc;
260 }
261 /***
262 * Returns the nametree from the original document, that the
263 * <code>emp:mark</code> element points to.
264 * @param mark the mark element, or <code>null</code>.
265 * @return the nametree represented as a document fragment. This fragment
266 * must not be modified. If <code>null</code> is provided then root
267 * fragment is returned.
268 * @throws IllegalArgumentException if the element is not
269 * <code>emp:mark</code> or the ID does not exist.
270 */
271 public DocumentFragment getSource(Node mark) {
272 if (mark == null)
273 return splittedDoc.getRootFragment();
274 return splittedDoc.getDomFragment(splittedDoc.getRef(mark));
275 }
276 /***
277 * Opens new view on the document. The view must be initialized.
278 * @param view the view to register.
279 */
280 public void openView(final DocumentView view) {
281 if (!view.isInitialized())
282 throw new IllegalArgumentException("The view is not initialized");
283 if (view.isClosed())
284 throw new IllegalArgumentException("The view is closed");
285 views.put(view, null);
286 }
287 /***
288 * Unregisters the view. The view is no more used by the transformation
289 * engine and is subject to garbage collection.
290 * @param view the document view to close. Fails if the view was not opened
291 * for this document.
292 */
293 public void closeView(final DocumentView view) {
294 if (views.remove(view) == null)
295 throw new IllegalArgumentException("Illegal view.");
296 view.close();
297 }
298 /***
299 * Returns instance of the undo manager for this document.
300 * @return the undo manager.
301 */
302 public UndoManager getUndoManager() {
303 return undoManager;
304 }
305 /***
306 * Returns all namespaces present in the document.
307 * @return set of namespaces. <code>null</code> namespace (nor empty
308 * namespace) does not occur in the returned set. Deprecated, use namespace
309 * manager methods.
310 */
311 @Deprecated
312 public Set<String> getAllNamespaces() {
313 final Set<String> result = new HashSet<String>(getNsManager()
314 .getAllNamespaces());
315 result.remove("");
316 return result;
317 }
318 /***
319 * <p>
320 * Access to the XML document. This document has this special feature: Every
321 * element has {@link ExportUtils#GENE_ID_ATTRIBUTE_QNAME} attribute,
322 * denoting ID of that element.
323 * </p>
324 * @return the DOM document.
325 */
326 public Document getDocument() {
327 return doc;
328 }
329 /***
330 * Validates this document.
331 * @throws SchemaException if something goes wrong in the process of
332 * validation.
333 */
334 public void validate() throws SchemaException {
335 getSchema().validate();
336 }
337 /***
338 * Returns document modifier, which is used to transparently modify the
339 * document.
340 * @return the document modifier instance.
341 */
342 public DocumentModifier getModifier() {
343 return docModifier;
344 }
345 /***
346 * Loads global schemas for all namespaces, present in the document, for
347 * which no local schema was loaded. It must be called before the Schema
348 * interface is used, to ensure that all schemata are properly loaded.
349 * @throws SchemaException if error happens during loading of schemas.
350 * @throws IOException if i/o error occurs
351 */
352 public void loadGlobalSchemas() throws SchemaException, IOException {
353 final Set<String> allNamespaces = new HashSet<String>(getNsManager()
354 .getAllNamespaces());
355 allNamespaces.remove("");
356 getSchema().getRefs().loadSchemas(allNamespaces);
357 }
358 /***
359 * Document's content modifier helper. For chosen operation returns all
360 * possibilities of document modification, that will result in valid
361 * document.
362 * @return the document schema instance.
363 */
364 public DocumentSchema getSchema() {
365 return schema;
366 }
367 /***
368 * Returns namespace manager for this document.
369 * @return namespace manager for this document.
370 */
371 public NamespaceManager getNsManager() {
372 return nsManager;
373 }
374 /***
375 * Returns entity manager for this document.
376 * @return entity manager for this document.
377 */
378 public sk.baka.ikslibs.ref.EntityManager getEntityManager() {
379 return entityManager;
380 }
381 /***
382 * Serializes in-memory document to specified output stream.
383 * @param out stream, where to store saved xml.
384 * @param encoding the encoding. If <code>null</code>, then UTF-8 is
385 * used.
386 * @param prettyFormatting if output xml will be pretty-formatted - readable
387 * by user. Warning: this adds some whitespaces to xml, thus modifying
388 * result xml. User must be sure that these whitespaces are discardable.
389 */
390 public void saveDocument(OutputStream out, String encoding,
391 boolean prettyFormatting) {
392 final DOMImplementationLS impl = (DOMImplementationLS) doc
393 .getImplementation();
394 final LSSerializer serializer = impl.createLSSerializer();
395 final LSOutput output = impl.createLSOutput();
396 output.setEncoding(encoding == null ? "UTF-8" : encoding);
397 output.setByteStream(out);
398
399 final Document serializeDoc = (Document) doc.cloneNode(true);
400
401 final NodeIterator i = ((DocumentTraversal) serializeDoc)
402 .createNodeIterator(serializeDoc.getDocumentElement(),
403 NodeFilter.SHOW_ELEMENT, null, false);
404 Element actNode = (Element) i.nextNode();
405 while (actNode != null) {
406
407 actNode.removeAttributeNS(ExportUtils.GENE_ID_ATTRIBUTE_QNAME
408 .getNamespaceURI(), ExportUtils.GENE_ID_ATTRIBUTE_QNAME
409 .getLocalPart());
410
411 actNode = (Element) i.nextNode();
412 }
413 i.detach();
414
415 getNsManager().createXmlnsAttributes(serializeDoc.getDocumentElement());
416
417 serializer.write(serializeDoc, output);
418 }
419 /***
420 * Returns encoding, in which the document is serialized. May return
421 * <code>null</code> if the encoding was not specified.
422 * @return the encoding.
423 */
424 public String getEncoding() {
425 return doc.getXmlEncoding();
426 }
427 /***
428 * Serializes transformed document "as-is", no emp:id removal is performed.
429 * Used only for debug. Serializes only root fragment.
430 * @param out the outputstream.
431 */
432 public void serialize(OutputStream out) {
433 final DOMImplementationLS impl = (DOMImplementationLS) doc
434 .getImplementation();
435 final LSSerializer serializer = impl.createLSSerializer();
436 final LSOutput output = impl.createLSOutput();
437 output.setEncoding("UTF-8");
438 output.setByteStream(out);
439
440 serializer.write(getSplittedDoc().getRootFragment(), output);
441 }
442 /***
443 * Returns document listeners manager for this document.
444 * @return document listeners manager for this document.
445 */
446 public DocumentListeners getListeners() {
447 return docListeners;
448 }
449 /***
450 * Returns full URI address of the document.
451 * @return document location.
452 */
453 public URL getDocumentURL() {
454 return fileURL;
455 }
456 /***
457 * Returns the name of the document file, including the extension, without
458 * the path specifier.
459 * @return the file name without the path.
460 */
461 public String getFileName() {
462 return fileName;
463 }
464 /***
465 * Returns URL pointing to the directory where the document is located.
466 * @return location of the XML file.
467 */
468 public URLDir getRoot() {
469 return root;
470 }
471 /***
472 * Returns clients Helper for modifying.
473 * @return DocumentModifyHelper instance
474 */
475 public DocumentModifyHelper getDocumentModifyHelper() {
476 return docModifyHelper;
477 }
478 }