Posted By

cortesi on 02/06/11


Tagged


Versions (?)

Robust pretty-printing for XML-like data


 / Published in: Python
 

  1. import re
  2. TAG = r"""
  3. <\s*
  4. (?!\s*[!"])
  5. (?P<close>\s*\/)?
  6. (?P<name>\w+)
  7. (
  8. [^'"\t >]+
  9. |
  10. "[^\"]*"['\"]*
  11. |
  12. '[^']*'['\"]*
  13. |
  14. \s+
  15. )*
  16. (?P<selfcont>\s*\/\s*)?
  17. \s*>
  18. """
  19. UNI = set(["br", "hr", "img", "input", "area", "link"])
  20. INDENT = " "*4
  21. def pretty_xmlish(s):
  22. """
  23. This is a robust, general indenter for XML-ish data.
  24. Returns a list of lines.
  25. """
  26. data, offset, indent, prev = [], 0, 0, None
  27. for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
  28. start, end = i.span()
  29. name = i.group("name")
  30. if start > offset:
  31. txt = []
  32. for x in textwrap.dedent(s[offset:start]).split("\n"):
  33. if x.strip():
  34. txt.append(indent*INDENT + x)
  35. data.extend(txt)
  36. if i.group("close") and not (name in UNI and name==prev):
  37. indent = max(indent - 1, 0)
  38. data.append(indent*INDENT + i.group().strip())
  39. offset = end
  40. if not any([i.group("close"), i.group("selfcont"), name in UNI]):
  41. indent += 1
  42. prev = name
  43. return data

Report this snippet  

You need to login to post a comment.