dtd.vim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. " Vim indent file
  2. " Language: DTD (Document Type Definition for XML)
  3. " Maintainer: Doug Kearns <dougkearns@gmail.com>
  4. " Previous Maintainer: Nikolai Weibull <now@bitwi.se>
  5. " Last Change: 24 Sep 2021
  6. " Only load this indent file when no other was loaded.
  7. if exists("b:did_indent")
  8. finish
  9. endif
  10. let b:did_indent = 1
  11. setlocal indentexpr=GetDTDIndent()
  12. setlocal indentkeys=!^F,o,O,>
  13. setlocal nosmartindent
  14. let b:undo_indent = "setl inde< indk< si<"
  15. if exists("*GetDTDIndent")
  16. finish
  17. endif
  18. let s:cpo_save = &cpo
  19. set cpo&vim
  20. " TODO: Needs to be adjusted to stop at [, <, and ].
  21. let s:token_pattern = '^[^[:space:]]\+'
  22. function s:lex1(input, start, ...)
  23. let pattern = a:0 > 0 ? a:1 : s:token_pattern
  24. let start = matchend(a:input, '^\_s*', a:start)
  25. if start == -1
  26. return ["", a:start]
  27. endif
  28. let end = matchend(a:input, pattern, start)
  29. if end == -1
  30. return ["", a:start]
  31. endif
  32. let token = strpart(a:input, start, end - start)
  33. return [token, end]
  34. endfunction
  35. function s:lex(input, start, ...)
  36. let pattern = a:0 > 0 ? a:1 : s:token_pattern
  37. let info = s:lex1(a:input, a:start, pattern)
  38. while info[0] == '--'
  39. let info = s:lex1(a:input, info[1], pattern)
  40. while info[0] != "" && info[0] != '--'
  41. let info = s:lex1(a:input, info[1], pattern)
  42. endwhile
  43. if info[0] == ""
  44. return info
  45. endif
  46. let info = s:lex1(a:input, info[1], pattern)
  47. endwhile
  48. return info
  49. endfunction
  50. function s:indent_to_innermost_parentheses(line, end)
  51. let token = '('
  52. let end = a:end
  53. let parentheses = [end - 1]
  54. while token != ""
  55. let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
  56. if token[0] == '('
  57. call add(parentheses, end - 1)
  58. elseif token[0] == ')'
  59. if len(parentheses) == 1
  60. return [-1, end]
  61. endif
  62. call remove(parentheses, -1)
  63. endif
  64. endwhile
  65. return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
  66. endfunction
  67. " TODO: Line and end could be script global (think OO members).
  68. function GetDTDIndent()
  69. if v:lnum == 1
  70. return 0
  71. endif
  72. " Begin by searching back for a <! that isn’t inside a comment.
  73. " From here, depending on what follows immediately after, parse to
  74. " where we’re at to determine what to do.
  75. if search('<!', 'bceW') == 0
  76. return indent(v:lnum - 1)
  77. endif
  78. let lnum = line('.')
  79. let col = col('.')
  80. let indent = indent('.')
  81. let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
  82. let [declaration, end] = s:lex1(line, col)
  83. if declaration == ""
  84. return indent + shiftwidth()
  85. elseif declaration == '--'
  86. " We’re looking at a comment. Now, simply determine if the comment is
  87. " terminated or not. If it isn’t, let Vim take care of that using
  88. " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
  89. while declaration != ""
  90. let [declaration, end] = s:lex(line, end)
  91. if declaration == "-->"
  92. return indent
  93. endif
  94. endwhile
  95. return -1
  96. elseif declaration == 'ELEMENT'
  97. " Check for element name. If none exists, indent one level.
  98. let [name, end] = s:lex(line, end)
  99. if name == ""
  100. return indent + shiftwidth()
  101. endif
  102. " Check for token following element name. This can be a specification of
  103. " whether the start or end tag may be omitted. If nothing is found, indent
  104. " one level.
  105. let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
  106. let n = 0
  107. while token =~ '[-O]' && n < 2
  108. let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
  109. let n += 1
  110. endwhile
  111. if token == ""
  112. return indent + shiftwidth()
  113. endif
  114. " Next comes the content model. If the token we’ve found isn’t a
  115. " parenthesis it must be either ANY, EMPTY or some random junk. Either
  116. " way, we’re done indenting this element, so set it to that of the first
  117. " line so that the terminating “>” winds up having the same indentation.
  118. if token != '('
  119. return indent
  120. endif
  121. " Now go through the content model. We need to keep track of the nesting
  122. " of parentheses. As soon as we hit 0 we’re done. If that happens we must
  123. " have a complete content model. Thus set indentation to be the same as that
  124. " of the first line so that the terminating “>” winds up having the same
  125. " indentation. Otherwise, we’ll indent to the innermost parentheses not yet
  126. " matched.
  127. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  128. if indent_of_innermost != -1
  129. return indent_of_innermost
  130. endif
  131. " Finally, look for any additions and/or exceptions to the content model.
  132. " This is defined by a “+” or “-” followed by another content model
  133. " declaration.
  134. " TODO: Can the “-” be separated by whitespace from the “(”?
  135. let seen = { '+(': 0, '-(': 0 }
  136. while 1
  137. let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
  138. if additions_exceptions != '+(' && additions_exceptions != '-('
  139. let [token, end] = s:lex(line, end)
  140. if token == '>'
  141. return indent
  142. endif
  143. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  144. return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
  145. endif
  146. " If we’ve seen an addition or exception already and this is of the same
  147. " kind, the user is writing a broken DTD. Time to bail.
  148. if seen[additions_exceptions]
  149. return indent
  150. endif
  151. let seen[additions_exceptions] = 1
  152. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  153. if indent_of_innermost != -1
  154. return indent_of_innermost
  155. endif
  156. endwhile
  157. elseif declaration == 'ATTLIST'
  158. " Check for element name. If none exists, indent one level.
  159. let [name, end] = s:lex(line, end)
  160. if name == ""
  161. return indent + shiftwidth()
  162. endif
  163. " Check for any number of attributes.
  164. while 1
  165. " Check for attribute name. If none exists, indent one level, unless the
  166. " current line is a lone “>”, in which case we indent to the same level
  167. " as the first line. Otherwise, if the attribute name is “>”, we have
  168. " actually hit the end of the attribute list, in which case we indent to
  169. " the same level as the first line.
  170. let [name, end] = s:lex(line, end)
  171. if name == ""
  172. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  173. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
  174. elseif name == ">"
  175. return indent
  176. endif
  177. " Check for attribute value declaration. If none exists, indent two
  178. " levels. Otherwise, if it’s an enumerated value, check for nested
  179. " parentheses and indent to the innermost one if we don’t reach the end
  180. " of the listc. Otherwise, just continue with looking for the default
  181. " attribute value.
  182. " TODO: Do validation of keywords
  183. " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
  184. let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
  185. if value == ""
  186. return indent + shiftwidth() * 2
  187. elseif value == 'NOTATION'
  188. " If this is a enumerated value based on notations, read another token
  189. " for the actual value. If it doesn’t exist, indent three levels.
  190. " TODO: If validating according to above, value must be equal to '('.
  191. let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
  192. if value == ""
  193. return indent + shiftwidth() * 3
  194. endif
  195. endif
  196. if value == '('
  197. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  198. if indent_of_innermost != -1
  199. return indent_of_innermost
  200. endif
  201. endif
  202. " Finally look for the attribute’s default value. If non exists, indent
  203. " two levels.
  204. let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
  205. if default == ""
  206. return indent + shiftwidth() * 2
  207. elseif default == '#FIXED'
  208. " We need to look for the fixed value. If non exists, indent three
  209. " levels.
  210. let [default, end] = s:lex(line, end, '^"\_[^"]*"')
  211. if default == ""
  212. return indent + shiftwidth() * 3
  213. endif
  214. endif
  215. endwhile
  216. elseif declaration == 'ENTITY'
  217. " Check for entity name. If none exists, indent one level. Otherwise, if
  218. " the name actually turns out to be a percent sign, “%”, this is a
  219. " parameter entity. Read another token to determine the entity name and,
  220. " again, if none exists, indent one level.
  221. let [name, end] = s:lex(line, end)
  222. if name == ""
  223. return indent + shiftwidth()
  224. elseif name == '%'
  225. let [name, end] = s:lex(line, end)
  226. if name == ""
  227. return indent + shiftwidth()
  228. endif
  229. endif
  230. " Now check for the entity value. If none exists, indent one level. If it
  231. " does exist, indent to same level as first line, as we’re now done with
  232. " this entity.
  233. "
  234. " The entity value can be a string in single or double quotes (no escapes
  235. " to worry about, as entities are used instead). However, it can also be
  236. " that this is an external unparsed entity. In that case we have to look
  237. " further for (possibly) a public ID and an URI followed by the NDATA
  238. " keyword and the actual notation name. For the public ID and URI, indent
  239. " two levels, if they don’t exist. If the NDATA keyword doesn’t exist,
  240. " indent one level. Otherwise, if the actual notation name doesn’t exist,
  241. " indent two level. If it does, indent to same level as first line, as
  242. " we’re now done with this entity.
  243. let [value, end] = s:lex(line, end)
  244. if value == ""
  245. return indent + shiftwidth()
  246. elseif value == 'SYSTEM' || value == 'PUBLIC'
  247. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  248. if quoted_string == ""
  249. return indent + shiftwidth() * 2
  250. endif
  251. if value == 'PUBLIC'
  252. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  253. if quoted_string == ""
  254. return indent + shiftwidth() * 2
  255. endif
  256. endif
  257. let [ndata, end] = s:lex(line, end)
  258. if ndata == ""
  259. return indent + shiftwidth()
  260. endif
  261. let [name, end] = s:lex(line, end)
  262. return name == "" ? (indent + shiftwidth() * 2) : indent
  263. else
  264. return indent
  265. endif
  266. elseif declaration == 'NOTATION'
  267. " Check for notation name. If none exists, indent one level.
  268. let [name, end] = s:lex(line, end)
  269. if name == ""
  270. return indent + shiftwidth()
  271. endif
  272. " Now check for the external ID. If none exists, indent one level.
  273. let [id, end] = s:lex(line, end)
  274. if id == ""
  275. return indent + shiftwidth()
  276. elseif id == 'SYSTEM' || id == 'PUBLIC'
  277. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  278. if quoted_string == ""
  279. return indent + shiftwidth() * 2
  280. endif
  281. if id == 'PUBLIC'
  282. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
  283. if quoted_string == ""
  284. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  285. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
  286. elseif quoted_string == '>'
  287. return indent
  288. endif
  289. endif
  290. endif
  291. return indent
  292. endif
  293. " TODO: Processing directives could be indented I suppose. But perhaps it’s
  294. " just as well to let the user decide how to indent them (perhaps extending
  295. " this function to include proper support for whatever processing directive
  296. " language they want to use).
  297. " Conditional sections are simply passed along to let Vim decide what to do
  298. " (and hence the user).
  299. return -1
  300. endfunction
  301. let &cpo = s:cpo_save
  302. unlet s:cpo_save