validator.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #!/usr/bin/env python3
  2. # A quick&dirty Markdown Validator
  3. # Copyright (C) 2018 Ingo Ruhnke <grumbel@gmail.com>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. from typing import List
  18. import os
  19. import sys
  20. import argparse
  21. from enum import Flag
  22. import requests
  23. # https://github.com/rtfd/CommonMark-py
  24. from CommonMark import Parser, dumpAST
  25. def url_valid(url: str) -> bool:
  26. try:
  27. request = requests.head(url, timeout=1.0)
  28. if request.ok:
  29. return True
  30. else:
  31. return False
  32. except Exception:
  33. return False
  34. class ValidatorFlags(Flag):
  35. # Validate everything
  36. ALL = ~0
  37. # Validate image
  38. IMAGE = (1 << 1)
  39. # Validate regular text links
  40. LINK = (1 << 2)
  41. # Validate anchor links (e.g. "#FOOBAR")
  42. ANCHOR = (1 << 3)
  43. # Validate external references (e.g. "http://...", "https://...")
  44. EXTERNAL = (1 << 4)
  45. # Validate local references
  46. LOCAL = (1 << 5)
  47. class Validator:
  48. def __init__(self, filename: str) -> None:
  49. self._filename = filename
  50. self._anchors = []
  51. def set_flags(self, flags):
  52. self._flags = flags
  53. def validate(self) -> None:
  54. with open(self._filename) as fin:
  55. parser = Parser()
  56. document = parser.parse(fin.read())
  57. for node, starttag in document.walker():
  58. self.preprocess(node, starttag)
  59. for node, starttag in document.walker():
  60. self.check(node, starttag)
  61. def get_pos(self, node) -> str:
  62. cur = node.parent
  63. while cur is not None:
  64. if cur.sourcepos is not None:
  65. return "{}:{}".format(self._filename, cur.sourcepos[0][0])
  66. else:
  67. cur = cur.parent
  68. return "{}".format(self._filename)
  69. def validate_link_destination(self, node) -> None:
  70. # print("link: {}".format(link))
  71. if node.destination.startswith("http://") or node.destination.startswith("https://"):
  72. if self._flags & ValidatorFlags.EXTERNAL:
  73. if not url_valid(node.destination):
  74. print("{}: error: {} failed to load".format(self.get_pos(node),
  75. node.destination))
  76. else:
  77. print("{}: OK".format(node.destination))
  78. elif node.destination.startswith("#"):
  79. if self._flags & ValidatorFlags.ANCHOR:
  80. self.validate_anchor(node, node.destination[1:])
  81. else:
  82. page, *rest = node.destination.split("#", 1)
  83. filename = page + ".md"
  84. if not os.path.exists(filename):
  85. print("{}: error: {} does not exist".format(self.get_pos(node),
  86. filename),
  87. file=sys.stderr)
  88. if rest and self._flags & ValidatorFlags.ANCHOR:
  89. # need to read in the other document to validate this
  90. # anchor properly
  91. # self.validate_anchor(node, rest[0])
  92. pass
  93. def validate_anchor(self, node, anchor: str) -> None:
  94. if anchor not in self._anchors:
  95. print("{}: error: {} anchor does not exist".format(self.get_pos(node),
  96. anchor),
  97. file=sys.stderr)
  98. def validate_image_destination(self, node) -> None:
  99. # print("image: {}".format(link))
  100. if node.destination.startswith("http://") or node.destination.startswith("https://"):
  101. if self._flags & ValidatorFlags.EXTERNAL:
  102. if not url_valid(node.destination):
  103. print("{}: error: {} failed to load".format(self.get_pos(node),
  104. node.destination))
  105. elif not os.path.exists(node.destination):
  106. print("{}: error: {} does not exist".format(self.get_pos(node),
  107. node.destination),
  108. file=sys.stderr)
  109. def get_text(self, node) -> str:
  110. result = ""
  111. for child, starttag in node.walker():
  112. if child.t == "text":
  113. result += child.literal
  114. return result
  115. def preprocess(self, node, starttag: bool):
  116. if starttag:
  117. if node.t == "heading":
  118. title = self.get_text(node)
  119. anchor = title.lower().replace(" ", "-")
  120. self._anchors.append(anchor)
  121. def check(self, node, starttag: bool):
  122. if starttag:
  123. if node.t == "link" and self._flags & ValidatorFlags.LINK:
  124. # children = list(node.walker())
  125. # print(children[1][0].literal)
  126. # print(node.destination)
  127. # print()
  128. self.validate_link_destination(node)
  129. elif node.t == "image" and self._flags & ValidatorFlags.IMAGE:
  130. # children = list(node.walker())
  131. # print(children[1][0].literal)
  132. # print(node.destination)
  133. # print()
  134. self.validate_image_destination(node)
  135. def parse_args(argv: List[str]) -> argparse.Namespace:
  136. # Parse Arguments
  137. parser = argparse.ArgumentParser(description="Flexlay - SuperTux Editor")
  138. parser.add_argument("FILE", action="store", type=str, nargs="+",
  139. help=".md file to verify")
  140. parser.add_argument("--validate", metavar="CATEGORY", type=str, default="all")
  141. parser.add_argument("--dump-ast", action='store_true', default=False)
  142. return parser.parse_args(argv[1:])
  143. def main(argv: List[str]):
  144. args = parse_args(argv)
  145. flags = ValidatorFlags(0)
  146. categories = args.validate.split(",")
  147. for category in categories:
  148. category = category.lower()
  149. if category == "all":
  150. flags |= ValidatorFlags.ALL
  151. elif category == "external":
  152. flags |= ValidatorFlags.EXTERNAL
  153. elif category == "local":
  154. flags |= ValidatorFlags.LOCAL
  155. elif category == "link":
  156. flags |= ValidatorFlags.LINK
  157. elif category == "anchor":
  158. flags |= ValidatorFlags.ANCHOR
  159. elif category == "image":
  160. flags |= ValidatorFlags.IMAGE
  161. else:
  162. raise Exception("unknown category '{}'".format(category))
  163. for filename in args.FILE:
  164. if args.dump_ast:
  165. with open(filename) as fin:
  166. parser = Parser()
  167. document = parser.parse(fin.read())
  168. dumpAST(document)
  169. else:
  170. validator = Validator(filename)
  171. validator.set_flags(flags)
  172. validator.validate()
  173. if __name__ == "__main__":
  174. main(sys.argv)
  175. # EOF #