xq
xq copied to clipboard
XPathEvalError when querying with namespace prefixes.
http get http://br-rss.jeffbr13.net/rss/ | xq '//item/itunes:duration'
# Traceback (most recent call last):
# File "/Users/ben/.virtualenvs/br-rss/bin/xq", line 11, in <module>
# sys.exit(main())
# File "/Users/ben/.virtualenvs/br-rss/lib/python3.6/site-packages/xq/__main__.py", line 54, in main
# sys.stdout.write(apply_xpath(args.file, args.xpath_query, sys.stdout.isatty()))
# File "/Users/ben/.virtualenvs/br-rss/lib/python3.6/site-packages/xq/__main__.py", line 29, in apply_xpath
# matches = parsed.xpath(xpath_query)
# File "src/lxml/etree.pyx", line 2289, in lxml.etree._ElementTree.xpath (src/lxml/etree.c:69770)
# File "src/lxml/xpath.pxi", line 359, in lxml.etree.XPathDocumentEvaluator.__call__ (src/lxml/etree.c:179350)
# File "src/lxml/xpath.pxi", line 227, in lxml.etree._XPathEvaluatorBase._handle_result (src/lxml/etree.c:177714)
# lxml.etree.XPathEvalError: Undefined namespace prefix
It turns out that lxml's .xpath methods require the namespace mapping to be fed to them.
The naïve solutions of searching all namespaces in the XML document is technically incorrect but probably fine for command-line usage.
I was having a play, and I'm not sure what the correct solution is, but this makes it more useful and allows you to add default: onto elements with a default namespace set.
diff --git xq/__main__.py xq/__main__.py
index ae40ee0..79477f6 100644
--- xq/__main__.py
+++ xq/__main__.py
@@ -26,7 +26,13 @@ def apply_xpath(infile, xpath_query=None, colorize=False):
parsed = etree.parse(infile, etree.HTMLParser(remove_blank_text=True))
if xpath_query:
- matches = parsed.xpath(xpath_query)
+ default_namespace = parsed.getroot().nsmap.get(None)
+ namespaces = {}
+ if default_namespace is not None:
+ namespaces['default'] = default_namespace
+ matches = parsed.xpath(xpath_query, namespaces=namespaces)
results = wrap_in_results(matches)
output = etree.tostring(results, pretty_print=True)
else: