View Javadoc

1   /*
2    * Copyright 1999-2006 Faculty of Mathematics, Physics and Informatics, Comenius
3    * University, Bratislava. This file is protected by the Mozilla Public License
4    * version 1.1 (the License); you may not use this file except in compliance
5    * with the License. You may obtain a copy of the License at
6    * http://euromath2.sourceforge.net/license.html Unless required by applicable
7    * law or agreed to in writing, software distributed under the License is
8    * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
9    * KIND, either express or implied. See the License for the specific language
10   * governing permissions and limitations under the License.
11   */
12  package sk.uniba.euromath.document;
13  import org.w3c.dom.Attr;
14  import org.w3c.dom.CDATASection;
15  import org.w3c.dom.CharacterData;
16  import org.w3c.dom.Comment;
17  import org.w3c.dom.DocumentFragment;
18  import org.w3c.dom.Element;
19  import org.w3c.dom.Node;
20  import org.w3c.dom.ProcessingInstruction;
21  import org.w3c.dom.Text;
22  import org.w3c.dom.traversal.DocumentTraversal;
23  import org.w3c.dom.traversal.NodeFilter;
24  import org.w3c.dom.traversal.NodeIterator;
25  import sk.baka.ikslibs.DOMUtils;
26  import sk.baka.ikslibs.interval.DOMInterval;
27  import sk.baka.ikslibs.levelmapper.NodeListID;
28  import sk.baka.ikslibs.modify.DOMMutils;
29  import sk.baka.ikslibs.ptr.DomPointer;
30  import sk.uniba.euromath.tools.StringTools;
31  /***
32   * Provides access to document's contents. It doesn't provide functions to
33   * modify document - that is the task of <code>DocumentModifier</code>.
34   * Nodes, returned by this function, must not be modified.
35   * @author Martin Vysny
36   */
37  public final class DocumentContent {
38  	private final DomCore doc;
39  	/***
40  	 * Creates instance of <code>DocumentContent</code>. Removes all
41  	 * whitespace texts.
42  	 * @param doc reference to document proxy.
43  	 */
44  	DocumentContent(DomCore doc) {
45  		super();
46  		this.doc = doc;
47  		if (doc.getDocument().getDocumentElement() != null) {
48  			// remove all whitespaced text nodes.
49  			NodeIterator iterator = ((DocumentTraversal) doc.getDocument())
50  					.createNodeIterator(doc.getDocument().getDocumentElement(),
51  							NodeFilter.SHOW_TEXT, null, false);
52  			Node actNode = iterator.nextNode();
53  			while (actNode != null) {
54  				if (isNodeContentEmpty(actNode)) {
55  					// the node has empty or whitespace-only content. remove it.
56  					iterator.previousNode();
57  					actNode.getParentNode().removeChild(actNode);
58  				} else {
59  					DOMMutils.normalizeWhitespaces(actNode);
60  				}
61  				// get next element
62  				actNode = iterator.nextNode();
63  			}
64  			iterator.detach();
65  		}
66  	}
67  	/***
68  	 * Returns true if node has empty text contents (the <code>Text</code> or
69  	 * <code>CDATASection</code> node), or contains whitespaces only (
70  	 * <code>Text</code>).
71  	 * @param node node to check.
72  	 * @return true if <code>node.getData()</code> returns <code>null</code>
73  	 * or empty string.
74  	 */
75  	private boolean isNodeContentEmpty(Node node) {
76  		if (!(node instanceof Text) && !(node instanceof CDATASection))
77  			return false;
78  		final String data = ((CharacterData) node).getData();
79  		if (StringTools.nullStr(data) == null)
80  			return true;
81  		if (node instanceof CDATASection)
82  			return false;
83  		// test for whitespaces
84  		for (int i = 0; i < data.length(); i++) {
85  			char c = data.charAt(i);
86  			if (!Character.isWhitespace(c))
87  				return false;
88  		}
89  		return true;
90  	}
91  	/***
92  	 * Returns regular element, identified by given id.
93  	 * @param id id of node.
94  	 * @return Element, denoted by given id.
95  	 * @throws DocumentException if id doesn't exist or it doesn't denote
96  	 * regular element.
97  	 * @deprecated
98  	 */
99  	@Deprecated
100 	public Element getElement(String id) throws DocumentException {
101 		return doc.getIDManager().getElement(id);
102 	}
103 	/***
104 	 * Gets text (or CData) nodes, containing text identified by ID.
105 	 * @param id id of the desired node.
106 	 * @return text nodes denoted by given id.
107 	 * @throws DocumentException if id doesn't exist or it doesn't denote text
108 	 * or CData node.
109 	 * @deprecated
110 	 */
111 	@Deprecated
112 	public NodeListID getTextNode(String id) throws DocumentException {
113 		return doc.getIDManager().getTextNode(id);
114 	}
115 	/***
116 	 * Gets processing instruction node, identified by ID.
117 	 * @param id id of the desired node.
118 	 * @return the node
119 	 * @throws DocumentException if id doesn't exist or it doesn't denote
120 	 * <code>ProcessingInstruction</code> node.
121 	 * @deprecated
122 	 */
123 	@Deprecated
124 	public ProcessingInstruction getPINode(String id) throws DocumentException {
125 		return doc.getIDManager().getPINode(id);
126 	}
127 	/***
128 	 * Gets comment node, identified by ID.
129 	 * @param id id of the desired node.
130 	 * @return the node
131 	 * @throws DocumentException if id doesn't exist or it doesn't denote
132 	 * <code>Comment</code> node.
133 	 * @deprecated
134 	 */
135 	@Deprecated
136 	public Comment getCommentNode(String id) throws DocumentException {
137 		return doc.getIDManager().getCommentNode(id);
138 	}
139 	/***
140 	 * Gets attribute node, identified by ID.
141 	 * @param id id of the desired node.
142 	 * @return the node
143 	 * @throws DocumentException if id doesn't exist or it doesn't denote
144 	 * <code>Attr</code> node.
145 	 * @deprecated
146 	 */
147 	@Deprecated
148 	public Attr getAttrNode(String id) throws DocumentException {
149 		return doc.getIDManager().getAttrNode(id);
150 	}
151 	/***
152 	 * Returns data, associated with given node. Returns a text value.
153 	 * @param id id of node.
154 	 * @return character data, that the node contains. Never <code>null</code>.
155 	 * @throws DocumentException if node doesn't exist or it denotes an element.
156 	 * @deprecated
157 	 */
158 	@Deprecated
159 	public String getData(String id) throws DocumentException {
160 		return doc.getIDManager().getData(id);
161 	}
162 	/***
163 	 * Returns data, associated with given node. Returns a text value.
164 	 * @param node the node.
165 	 * @return character data, that the node contains. Never <code>null</code>.
166 	 * @deprecated
167 	 */
168 	@Deprecated
169 	public static String getData(Node node) {
170 		return DOMUtils.getData(node);
171 	}
172 	/***
173 	 * Modifies data, associated with given node. It does not notify splitted
174 	 * document of the change!
175 	 * @param node the node.
176 	 * @param data character data, that the node will contain. <code>null</code>
177 	 * is threated as an empty string.
178 	 * @return true, if data was changed, or false when original data was same
179 	 * as new data.
180 	 * @deprecated
181 	 */
182 	@Deprecated
183 	static boolean setData(Node node, String data) {
184 		return DOMMutils.setData(node, data);
185 	}
186 	/***
187 	 * Checks, whether given element contains some elements also, or it contains
188 	 * text/comments/pi only.
189 	 * @param id id of element.
190 	 * @return true if element contains at least one regular element, false
191 	 * otherwise.
192 	 * @throws DocumentException if id doesn't exist or it doesn't denote
193 	 * regular element.
194 	 * @deprecated
195 	 */
196 	@Deprecated
197 	public boolean containsElements(String id) throws DocumentException {
198 		Element e = getElement(id);
199 		return DOMUtils.containsElements(e);
200 	}
201 	/***
202 	 * Computes text value of element or entity reference by concatenating all
203 	 * textual values of descendant text/cdata nodes.
204 	 * @param id id of element.
205 	 * @return textual value of element, never <code>null</code>.
206 	 * @throws DocumentException if id doesn't exist or it doesn't denote
207 	 * regular element.
208 	 * @deprecated
209 	 */
210 	@Deprecated
211 	public String getContainerText(String id) {
212 		Element e = doc.getIDManager().getElement(id);
213 		return getContainerText(e);
214 	}
215 	/***
216 	 * Computes text value of element or entity by concatenating all textual
217 	 * values of descendant text/cdata nodes.
218 	 * @param node node to query.
219 	 * @return textual value of element, never <code>null</code>.
220 	 * @deprecated
221 	 */
222 	@Deprecated
223 	public static String getContainerText(Node node) {
224 		return StringTools.nonNullStr(node.getTextContent());
225 	}
226 	/***
227 	 * Returns element that is the nameroot of tree containing given node.
228 	 * @param node the node.
229 	 * @return nameroot (an element that belongs to same namespace as given node
230 	 * (if node is not element then its parent/owner is used instead), its
231 	 * parent is null or has different namespace and it is nearest such node to
232 	 * given node).
233 	 * @deprecated
234 	 */
235 	@Deprecated
236 	public Element getNameRoot(Node node) {
237 		return DOMUtils.getNameRoot(node);
238 	}
239 	/***
240 	 * Tries to copy all nodes in the specified range (the node pointed to by
241 	 * <code>to</code> parameter is not copied). Copied nodes are returned in
242 	 * a <code>DocumentFragment</code> instance. Both pointers must have the
243 	 * same parent element.
244 	 * @param from start of the removal interval. The node where this pointer
245 	 * points shall be copied first.
246 	 * @param to end of the copy interval.
247 	 * @return <code>DocumentFragment</code> instance containing nodes that
248 	 * have been cut from the document. Never <code>null</code>. No element
249 	 * in the target fragment will have <code>emp:id</code> attribute.
250 	 * @deprecated
251 	 */
252 	@Deprecated
253 	public DocumentFragment copy(DomPointer from, DomPointer to) {
254 		return new DOMInterval(from,to).toRange().cloneContents();
255 	}
256 	/***
257 	 * Checks the node if it is from our document.
258 	 * @param node node to check.
259 	 * @throws IllegalArgumentException if node is not from bound document
260 	 */
261 	public void checkNode(Node node) {
262 		doc.checkNode(node);
263 	}
264 	/***
265 	 * Checks the pointer if it points into our document.
266 	 * @param ptr pointer to check.
267 	 * @throws IllegalArgumentException if pointer is not from bound document
268 	 */
269 	public void checkPtr(DomPointer ptr) {
270 		doc.checkPtr(ptr);
271 	}
272 	/***
273 	 * Checks the node if it is from our document.
274 	 * @param node node to check.
275 	 * @return true if the node was created by this document, false otherwise.
276 	 */
277 	public boolean isOurNode(Node node) {
278 		return doc.isOurNode(node);
279 	}
280 	/***
281 	 * Iterates over the content nodes (comment, pi, text and element nodes)
282 	 * located directly in given node (i.e. there is no element node between
283 	 * <code>node</code> and returned nodes; iterator does not iterate through
284 	 * other elements).
285 	 * @param node contents of this node are to be iterated.
286 	 * @return iterator iterating through direct content nodes.
287 	 * @deprecated
288 	 */
289 	@Deprecated
290 	public ContentNodesIterator iterateContent(Node node) {
291 		return new ContentNodesIterator(node);
292 	}
293 }