1
2
3
4 package net.sourceforge.pmd.lang.xml.ast;
5
6 import java.io.IOException;
7 import java.io.Reader;
8 import java.lang.reflect.InvocationHandler;
9 import java.lang.reflect.Method;
10 import java.lang.reflect.Proxy;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.Collections;
14 import java.util.HashMap;
15 import java.util.Iterator;
16 import java.util.LinkedHashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Set;
20 import java.util.Stack;
21
22 import javax.xml.parsers.DocumentBuilder;
23 import javax.xml.parsers.ParserConfigurationException;
24 import javax.xml.parsers.SAXParser;
25 import javax.xml.parsers.SAXParserFactory;
26
27 import net.sourceforge.pmd.lang.ast.ParseException;
28 import net.sourceforge.pmd.lang.ast.RootNode;
29 import net.sourceforge.pmd.lang.ast.xpath.Attribute;
30 import net.sourceforge.pmd.lang.xml.XmlParserOptions;
31 import net.sourceforge.pmd.util.CompoundIterator;
32
33 import org.apache.xerces.dom.CoreDocumentImpl;
34 import org.apache.xerces.dom.EntityImpl;
35 import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
36 import org.w3c.dom.Attr;
37 import org.w3c.dom.CDATASection;
38 import org.w3c.dom.Comment;
39 import org.w3c.dom.Document;
40 import org.w3c.dom.DocumentType;
41 import org.w3c.dom.Element;
42 import org.w3c.dom.Entity;
43 import org.w3c.dom.EntityReference;
44 import org.w3c.dom.NamedNodeMap;
45 import org.w3c.dom.Node;
46 import org.w3c.dom.ProcessingInstruction;
47 import org.w3c.dom.Text;
48 import org.xml.sax.Attributes;
49 import org.xml.sax.InputSource;
50 import org.xml.sax.Locator;
51 import org.xml.sax.SAXException;
52 import org.xml.sax.XMLReader;
53 import org.xml.sax.ext.DefaultHandler2;
54
55 public class XmlParser {
56 protected final XmlParserOptions parserOptions;
57 protected Map<Node, XmlNode> nodeCache = new HashMap<Node, XmlNode>();
58
59 public XmlParser(XmlParserOptions parserOptions) {
60 this.parserOptions = parserOptions;
61 }
62
63 protected Document parseDocument(Reader reader) throws ParseException {
64 nodeCache.clear();
65 try {
66 SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
67 saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
68 saxParserFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
69 saxParserFactory.setNamespaceAware(parserOptions.isNamespaceAware());
70 saxParserFactory.setValidating(parserOptions.isValidating());
71 saxParserFactory.setXIncludeAware(parserOptions.isXincludeAware());
72 SAXParser saxParser = saxParserFactory.newSAXParser();
73
74 LineNumberAwareSaxHandler handler = new LineNumberAwareSaxHandler(parserOptions);
75 XMLReader xmlReader = saxParser.getXMLReader();
76 xmlReader.setContentHandler(handler);
77 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
78 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
79 xmlReader.setEntityResolver(parserOptions.getEntityResolver());
80
81 xmlReader.parse(new InputSource(reader));
82 return handler.getDocument();
83 } catch (ParserConfigurationException e) {
84 throw new ParseException(e);
85 } catch (SAXException e) {
86 throw new ParseException(e);
87 } catch (IOException e) {
88 throw new ParseException(e);
89 }
90 }
91
92
93
94
95
96 private static class LineNumberAwareSaxHandler extends DefaultHandler2 {
97 public static final String BEGIN_LINE = "pmd:beginLine";
98 public static final String BEGIN_COLUMN = "pmd:beginColumn";
99 public static final String END_LINE = "pmd:endLine";
100 public static final String END_COLUMN = "pmd:endColumn";
101
102 private Stack<Node> nodeStack = new Stack<Node>();
103 private StringBuilder text = new StringBuilder();
104 private int beginLineText = -1;
105 private int beginColumnText = -1;
106 private Locator locator;
107 private final DocumentBuilder documentBuilder;
108 private final Document document;
109 private boolean cdataEnded = false;
110
111 private boolean coalescing;
112 private boolean expandEntityReferences;
113 private boolean ignoringComments;
114 private boolean ignoringElementContentWhitespace;
115 private boolean namespaceAware;
116
117 public LineNumberAwareSaxHandler(XmlParserOptions options) throws ParserConfigurationException {
118
119 this.documentBuilder = new DocumentBuilderFactoryImpl().newDocumentBuilder();
120
121 this.document = this.documentBuilder.newDocument();
122 this.coalescing = options.isCoalescing();
123 this.expandEntityReferences = options.isExpandEntityReferences();
124 this.ignoringComments = options.isIgnoringComments();
125 this.ignoringElementContentWhitespace = options.isIgnoringElementContentWhitespace();
126 this.namespaceAware = options.isNamespaceAware();
127 }
128
129 public Document getDocument() {
130 return document;
131 }
132
133 @Override
134 public void setDocumentLocator(Locator locator) {
135 this.locator = locator;
136 }
137 @Override
138 public void startElement(String uri, String localName, String qName, Attributes attributes)
139 throws SAXException {
140 addTextIfNeeded(false);
141
142 Element element;
143 if (namespaceAware) {
144 element = document.createElementNS(uri, qName);
145 } else {
146 element = document.createElement(qName);
147 }
148
149 for (int i = 0; i < attributes.getLength(); i++) {
150 String attQName = attributes.getQName(i);
151 String attNamespaceURI = attributes.getURI(i);
152 String attValue = attributes.getValue(i);
153 Attr a;
154 if (namespaceAware) {
155 a = document.createAttributeNS(attNamespaceURI, attQName);
156 element.setAttributeNodeNS(a);
157 } else {
158 a = document.createAttribute(attQName);
159 element.setAttributeNode(a);
160 }
161 a.setValue(attValue);
162 }
163
164 element.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
165 element.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
166
167 nodeStack.push(element);
168 }
169 private void addTextIfNeeded(boolean alwaysAdd) {
170 if (text.length() > 0) {
171 addTextNode(text.toString(), cdataEnded || alwaysAdd);
172 text.setLength(0);
173 cdataEnded = false;
174 }
175 }
176 private void addTextNode(String s, boolean alwaysAdd) {
177 if (alwaysAdd || !ignoringElementContentWhitespace || s.trim().length() > 0) {
178 Text textNode = document.createTextNode(s);
179 textNode.setUserData(BEGIN_LINE, beginLineText, null);
180 textNode.setUserData(BEGIN_COLUMN, beginColumnText, null);
181 textNode.setUserData(END_LINE, locator.getLineNumber(), null);
182 textNode.setUserData(END_COLUMN, locator.getColumnNumber(), null);
183 appendChild(textNode);
184 }
185 }
186 @Override
187 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
188 this.characters(ch, start, length);
189 }
190 @Override
191 public void characters(char[] ch, int start, int length) throws SAXException {
192 if (text.length() == 0) {
193 beginLineText = locator.getLineNumber();
194 beginColumnText = locator.getColumnNumber();
195 }
196 text.append(ch, start, length);
197 }
198 @Override
199 public void endElement(String uri, String localName, String qName) throws SAXException {
200 addTextIfNeeded(false);
201 Node element = nodeStack.pop();
202 element.setUserData(END_LINE, locator.getLineNumber(), null);
203 element.setUserData(END_COLUMN, locator.getColumnNumber(), null);
204 appendChild(element);
205 }
206 @Override
207 public void startDocument() throws SAXException {
208 document.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
209 document.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
210 }
211 @Override
212 public void endDocument() throws SAXException {
213 addTextIfNeeded(false);
214 document.setUserData(END_LINE, locator.getLineNumber(), null);
215 document.setUserData(END_COLUMN, locator.getColumnNumber(), null);
216 }
217 @Override
218 public void startCDATA() throws SAXException {
219 if (!coalescing) {
220 addTextIfNeeded(true);
221 }
222 }
223 @Override
224 public void endCDATA() throws SAXException {
225 if (!coalescing) {
226 CDATASection cdataSection = document.createCDATASection(text.toString());
227 cdataSection.setUserData(BEGIN_LINE, beginLineText, null);
228 cdataSection.setUserData(BEGIN_COLUMN, beginColumnText, null);
229 cdataSection.setUserData(END_LINE, locator.getLineNumber(), null);
230 cdataSection.setUserData(END_COLUMN, locator.getColumnNumber(), null);
231 appendChild(cdataSection);
232 text.setLength(0);
233 cdataEnded = true;
234 }
235 }
236 @Override
237 public void comment(char[] ch, int start, int length) throws SAXException {
238 if (!ignoringComments) {
239 addTextIfNeeded(false);
240 Comment comment = document.createComment(new String(ch, start, length));
241 comment.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
242 comment.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
243 comment.setUserData(END_LINE, locator.getLineNumber(), null);
244 comment.setUserData(END_COLUMN, locator.getColumnNumber(), null);
245 appendChild(comment);
246 }
247 }
248 @Override
249 public void startDTD(String name, String publicId, String systemId) throws SAXException {
250 DocumentType docType = documentBuilder
251 .getDOMImplementation()
252 .createDocumentType(name, publicId, systemId);
253 docType.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
254 docType.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
255 document.appendChild(docType);
256 }
257 @Override
258 public void startEntity(String name) throws SAXException {
259 if (!expandEntityReferences) {
260 addTextIfNeeded(false);
261 }
262 }
263 @Override
264 public void endEntity(String name) throws SAXException {
265 if (!expandEntityReferences) {
266 EntityReference entity = document.createEntityReference(name);
267 entity.setUserData(BEGIN_LINE, beginLineText, null);
268 entity.setUserData(BEGIN_COLUMN, beginColumnText, null);
269 entity.setUserData(END_LINE, locator.getLineNumber(), null);
270 entity.setUserData(END_COLUMN, locator.getColumnNumber(), null);
271 appendChild(entity);
272 text.setLength(0);
273 }
274 }
275 @Override
276 public void endDTD() throws SAXException {
277 DocumentType doctype = document.getDoctype();
278 doctype.setUserData(END_LINE, locator.getLineNumber(), null);
279 doctype.setUserData(END_COLUMN, locator.getColumnNumber(), null);
280 }
281 @Override
282 public void internalEntityDecl(String name, String value) throws SAXException {
283 Entity entity = new ChangeableEntity(document, name);
284 entity.appendChild(document.createTextNode(value));
285
286 NamedNodeMap entities = document.getDoctype().getEntities();
287 entities.setNamedItem(entity);
288 }
289 @Override
290 public void processingInstruction(String target, String data) throws SAXException {
291 ProcessingInstruction pi = document.createProcessingInstruction(target, data);
292 appendChild(pi);
293 }
294 private void appendChild(Node node) {
295 if (nodeStack.isEmpty()) {
296 document.appendChild(node);
297 } else {
298 nodeStack.peek().appendChild(node);
299 }
300 }
301 private static class ChangeableEntity extends EntityImpl {
302 public ChangeableEntity(Document document, String name) {
303 super((CoreDocumentImpl)document, name);
304 flags = (short) (flags & ~READONLY);
305 }
306 }
307 }
308
309
310 public XmlNode parse(Reader reader) {
311 Document document = parseDocument(reader);
312 return createProxy(document);
313 }
314
315 public XmlNode createProxy(Node node) {
316 XmlNode proxy = nodeCache.get(node);
317 if (proxy != null) {
318 return proxy;
319 }
320
321
322 LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<Class<?>>();
323 interfaces.add(XmlNode.class);
324 if (node instanceof Document) {
325 interfaces.add(RootNode.class);
326 }
327 addAllInterfaces(interfaces, node.getClass());
328
329 proxy = (XmlNode) Proxy.newProxyInstance(XmlParser.class.getClassLoader(), interfaces
330 .toArray(new Class[interfaces.size()]), new XmlNodeInvocationHandler(node));
331 nodeCache.put(node, proxy);
332 return proxy;
333 }
334
335 public void addAllInterfaces(Set<Class<?>> interfaces, Class<?> clazz) {
336 interfaces.addAll(Arrays.asList((Class<?>[]) clazz.getInterfaces()));
337 if (clazz.getSuperclass() != null) {
338 addAllInterfaces(interfaces, clazz.getSuperclass());
339 }
340 }
341
342 public class XmlNodeInvocationHandler implements InvocationHandler {
343 private final Node node;
344 private Object userData;
345
346 public XmlNodeInvocationHandler(Node node) {
347 this.node = node;
348 }
349
350 public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
351
352 if (method.getDeclaringClass().isAssignableFrom(XmlNode.class)
353 && !"java.lang.Object".equals(method.getDeclaringClass().getName())) {
354 if ("jjtGetNumChildren".equals(method.getName())) {
355 return node.hasChildNodes() ? node.getChildNodes().getLength() : 0;
356 } else if ("jjtGetChild".equals(method.getName())) {
357 return createProxy(node.getChildNodes().item(((Integer) args[0]).intValue()));
358 } else if ("getImage".equals(method.getName())) {
359 if (node instanceof Text) {
360 return ((Text) node).getData();
361 } else {
362 return null;
363 }
364 } else if ("jjtGetParent".equals(method.getName())) {
365 Node parent = node.getParentNode();
366 if (parent != null && !(parent instanceof Document)) {
367 return createProxy(parent);
368 } else {
369 return null;
370 }
371 } else if ("getAttributeIterator".equals(method.getName())) {
372 List<Iterator<Attribute>> iterators = new ArrayList<Iterator<Attribute>>();
373
374
375 final NamedNodeMap attributes = node.getAttributes();
376 iterators.add(new Iterator<Attribute>() {
377 private int index;
378
379 public boolean hasNext() {
380 return attributes != null && index < attributes.getLength();
381 }
382
383 public Attribute next() {
384 Node attributeNode = attributes.item(index++);
385 return new Attribute(createProxy(node), attributeNode.getNodeName(), attributeNode
386 .getNodeValue());
387 }
388
389 public void remove() {
390 throw new UnsupportedOperationException();
391 }
392 });
393
394
395 if (proxy instanceof Text) {
396 iterators.add(Collections.singletonList(
397 new Attribute((net.sourceforge.pmd.lang.ast.Node) proxy, "Image", ((Text) proxy)
398 .getData())).iterator());
399 }
400
401
402
403
404 return new CompoundIterator<Attribute>(iterators.toArray(new Iterator[iterators.size()]));
405 } else if ("getBeginLine".equals(method.getName())) {
406 return getUserData(LineNumberAwareSaxHandler.BEGIN_LINE);
407 } else if ("getBeginColumn".equals(method.getName())) {
408 return getUserData(LineNumberAwareSaxHandler.BEGIN_COLUMN);
409 } else if ("getEndLine".equals(method.getName())) {
410 return getUserData(LineNumberAwareSaxHandler.END_LINE);
411 } else if ("getEndColumn".equals(method.getName())) {
412 return getUserData(LineNumberAwareSaxHandler.END_COLUMN);
413 } else if ("getNode".equals(method.getName())) {
414 return node;
415 } else if ("getUserData".equals(method.getName())) {
416 return userData;
417 } else if ("setUserData".equals(method.getName())) {
418 userData = args[0];
419 return null;
420 } else if ("isFindBoundary".equals(method.getName())) {
421 return false;
422 }
423 throw new UnsupportedOperationException("Method not supported for XmlNode: " + method);
424 }
425
426 else {
427 if ("toString".equals(method.getName())) {
428 String s = node.getNodeName();
429 s = s.replace("#", "");
430 return s;
431 }
432 Object result = method.invoke(node, args);
433 return result;
434 }
435 }
436
437 private Integer getUserData(String key) {
438 if (node.getUserData(key) != null) {
439 return (Integer)node.getUserData(key);
440 }
441 return Integer.valueOf(-1);
442 }
443 }
444 }