dtd.vim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. " Vim indent file
  2. " Language: DTD (Document Type Definition for XML)
  3. " Previous Maintainer: Nikolai Weibull <now@bitwi.se>
  4. " Latest Revision: 2011-07-08
  5. setlocal indentexpr=GetDTDIndent()
  6. setlocal indentkeys=!^F,o,O,>
  7. setlocal nosmartindent
  8. if exists("*GetDTDIndent")
  9. finish
  10. endif
  11. let s:cpo_save = &cpo
  12. set cpo&vim
  13. " TODO: Needs to be adjusted to stop at [, <, and ].
  14. let s:token_pattern = '^[^[:space:]]\+'
  15. function s:lex1(input, start, ...)
  16. let pattern = a:0 > 0 ? a:1 : s:token_pattern
  17. let start = matchend(a:input, '^\_s*', a:start)
  18. if start == -1
  19. return ["", a:start]
  20. endif
  21. let end = matchend(a:input, pattern, start)
  22. if end == -1
  23. return ["", a:start]
  24. endif
  25. let token = strpart(a:input, start, end - start)
  26. return [token, end]
  27. endfunction
  28. function s:lex(input, start, ...)
  29. let pattern = a:0 > 0 ? a:1 : s:token_pattern
  30. let info = s:lex1(a:input, a:start, pattern)
  31. while info[0] == '--'
  32. let info = s:lex1(a:input, info[1], pattern)
  33. while info[0] != "" && info[0] != '--'
  34. let info = s:lex1(a:input, info[1], pattern)
  35. endwhile
  36. if info[0] == ""
  37. return info
  38. endif
  39. let info = s:lex1(a:input, info[1], pattern)
  40. endwhile
  41. return info
  42. endfunction
  43. function s:indent_to_innermost_parentheses(line, end)
  44. let token = '('
  45. let end = a:end
  46. let parentheses = [end - 1]
  47. while token != ""
  48. let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
  49. if token[0] == '('
  50. call add(parentheses, end - 1)
  51. elseif token[0] == ')'
  52. if len(parentheses) == 1
  53. return [-1, end]
  54. endif
  55. call remove(parentheses, -1)
  56. endif
  57. endwhile
  58. return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
  59. endfunction
  60. " TODO: Line and end could be script global (think OO members).
  61. function GetDTDIndent()
  62. if v:lnum == 1
  63. return 0
  64. endif
  65. " Begin by searching back for a <! that isn’t inside a comment.
  66. " From here, depending on what follows immediately after, parse to
  67. " where we’re at to determine what to do.
  68. if search('<!', 'bceW') == 0
  69. return indent(v:lnum - 1)
  70. endif
  71. let lnum = line('.')
  72. let col = col('.')
  73. let indent = indent('.')
  74. let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
  75. let [declaration, end] = s:lex1(line, col)
  76. if declaration == ""
  77. return indent + shiftwidth()
  78. elseif declaration == '--'
  79. " We’re looking at a comment. Now, simply determine if the comment is
  80. " terminated or not. If it isn’t, let Vim take care of that using
  81. " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
  82. while declaration != ""
  83. let [declaration, end] = s:lex(line, end)
  84. if declaration == "-->"
  85. return indent
  86. endif
  87. endwhile
  88. return -1
  89. elseif declaration == 'ELEMENT'
  90. " Check for element name. If none exists, indent one level.
  91. let [name, end] = s:lex(line, end)
  92. if name == ""
  93. return indent + shiftwidth()
  94. endif
  95. " Check for token following element name. This can be a specification of
  96. " whether the start or end tag may be omitted. If nothing is found, indent
  97. " one level.
  98. let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
  99. let n = 0
  100. while token =~ '[-O]' && n < 2
  101. let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
  102. let n += 1
  103. endwhile
  104. if token == ""
  105. return indent + shiftwidth()
  106. endif
  107. " Next comes the content model. If the token we’ve found isn’t a
  108. " parenthesis it must be either ANY, EMPTY or some random junk. Either
  109. " way, we’re done indenting this element, so set it to that of the first
  110. " line so that the terminating “>” winds up having the same indention.
  111. if token != '('
  112. return indent
  113. endif
  114. " Now go through the content model. We need to keep track of the nesting
  115. " of parentheses. As soon as we hit 0 we’re done. If that happens we must
  116. " have a complete content model. Thus set indention to be the same as that
  117. " of the first line so that the terminating “>” winds up having the same
  118. " indention. Otherwise, we’ll indent to the innermost parentheses not yet
  119. " matched.
  120. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  121. if indent_of_innermost != -1
  122. return indent_of_innermost
  123. endif
  124. " Finally, look for any additions and/or exceptions to the content model.
  125. " This is defined by a “+” or “-” followed by another content model
  126. " declaration.
  127. " TODO: Can the “-” be separated by whitespace from the “(”?
  128. let seen = { '+(': 0, '-(': 0 }
  129. while 1
  130. let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
  131. if additions_exceptions != '+(' && additions_exceptions != '-('
  132. let [token, end] = s:lex(line, end)
  133. if token == '>'
  134. return indent
  135. endif
  136. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  137. return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
  138. endif
  139. " If we’ve seen an addition or exception already and this is of the same
  140. " kind, the user is writing a broken DTD. Time to bail.
  141. if seen[additions_exceptions]
  142. return indent
  143. endif
  144. let seen[additions_exceptions] = 1
  145. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  146. if indent_of_innermost != -1
  147. return indent_of_innermost
  148. endif
  149. endwhile
  150. elseif declaration == 'ATTLIST'
  151. " Check for element name. If none exists, indent one level.
  152. let [name, end] = s:lex(line, end)
  153. if name == ""
  154. return indent + shiftwidth()
  155. endif
  156. " Check for any number of attributes.
  157. while 1
  158. " Check for attribute name. If none exists, indent one level, unless the
  159. " current line is a lone “>”, in which case we indent to the same level
  160. " as the first line. Otherwise, if the attribute name is “>”, we have
  161. " actually hit the end of the attribute list, in which case we indent to
  162. " the same level as the first line.
  163. let [name, end] = s:lex(line, end)
  164. if name == ""
  165. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  166. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
  167. elseif name == ">"
  168. return indent
  169. endif
  170. " Check for attribute value declaration. If none exists, indent two
  171. " levels. Otherwise, if it’s an enumerated value, check for nested
  172. " parentheses and indent to the innermost one if we don’t reach the end
  173. " of the listc. Otherwise, just continue with looking for the default
  174. " attribute value.
  175. " TODO: Do validation of keywords
  176. " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
  177. let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
  178. if value == ""
  179. return indent + shiftwidth() * 2
  180. elseif value == 'NOTATION'
  181. " If this is a enumerated value based on notations, read another token
  182. " for the actual value. If it doesn’t exist, indent three levels.
  183. " TODO: If validating according to above, value must be equal to '('.
  184. let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
  185. if value == ""
  186. return indent + shiftwidth() * 3
  187. endif
  188. endif
  189. if value == '('
  190. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
  191. if indent_of_innermost != -1
  192. return indent_of_innermost
  193. endif
  194. endif
  195. " Finally look for the attribute’s default value. If non exists, indent
  196. " two levels.
  197. let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
  198. if default == ""
  199. return indent + shiftwidth() * 2
  200. elseif default == '#FIXED'
  201. " We need to look for the fixed value. If non exists, indent three
  202. " levels.
  203. let [default, end] = s:lex(line, end, '^"\_[^"]*"')
  204. if default == ""
  205. return indent + shiftwidth() * 3
  206. endif
  207. endif
  208. endwhile
  209. elseif declaration == 'ENTITY'
  210. " Check for entity name. If none exists, indent one level. Otherwise, if
  211. " the name actually turns out to be a percent sign, “%”, this is a
  212. " parameter entity. Read another token to determine the entity name and,
  213. " again, if none exists, indent one level.
  214. let [name, end] = s:lex(line, end)
  215. if name == ""
  216. return indent + shiftwidth()
  217. elseif name == '%'
  218. let [name, end] = s:lex(line, end)
  219. if name == ""
  220. return indent + shiftwidth()
  221. endif
  222. endif
  223. " Now check for the entity value. If none exists, indent one level. If it
  224. " does exist, indent to same level as first line, as we’re now done with
  225. " this entity.
  226. "
  227. " The entity value can be a string in single or double quotes (no escapes
  228. " to worry about, as entities are used instead). However, it can also be
  229. " that this is an external unparsed entity. In that case we have to look
  230. " further for (possibly) a public ID and an URI followed by the NDATA
  231. " keyword and the actual notation name. For the public ID and URI, indent
  232. " two levels, if they don’t exist. If the NDATA keyword doesn’t exist,
  233. " indent one level. Otherwise, if the actual notation name doesn’t exist,
  234. " indent two level. If it does, indent to same level as first line, as
  235. " we’re now done with this entity.
  236. let [value, end] = s:lex(line, end)
  237. if value == ""
  238. return indent + shiftwidth()
  239. elseif value == 'SYSTEM' || value == 'PUBLIC'
  240. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  241. if quoted_string == ""
  242. return indent + shiftwidth() * 2
  243. endif
  244. if value == 'PUBLIC'
  245. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  246. if quoted_string == ""
  247. return indent + shiftwidth() * 2
  248. endif
  249. endif
  250. let [ndata, end] = s:lex(line, end)
  251. if ndata == ""
  252. return indent + shiftwidth()
  253. endif
  254. let [name, end] = s:lex(line, end)
  255. return name == "" ? (indent + shiftwidth() * 2) : indent
  256. else
  257. return indent
  258. endif
  259. elseif declaration == 'NOTATION'
  260. " Check for notation name. If none exists, indent one level.
  261. let [name, end] = s:lex(line, end)
  262. if name == ""
  263. return indent + shiftwidth()
  264. endif
  265. " Now check for the external ID. If none exists, indent one level.
  266. let [id, end] = s:lex(line, end)
  267. if id == ""
  268. return indent + shiftwidth()
  269. elseif id == 'SYSTEM' || id == 'PUBLIC'
  270. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
  271. if quoted_string == ""
  272. return indent + shiftwidth() * 2
  273. endif
  274. if id == 'PUBLIC'
  275. let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
  276. if quoted_string == ""
  277. " TODO: Should use s:lex here on getline(v:lnum) and check for >.
  278. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
  279. elseif quoted_string == '>'
  280. return indent
  281. endif
  282. endif
  283. endif
  284. return indent
  285. endif
  286. " TODO: Processing directives could be indented I suppose. But perhaps it’s
  287. " just as well to let the user decide how to indent them (perhaps extending
  288. " this function to include proper support for whatever processing directive
  289. " language they want to use).
  290. " Conditional sections are simply passed along to let Vim decide what to do
  291. " (and hence the user).
  292. return -1
  293. endfunction
  294. let &cpo = s:cpo_save
  295. unlet s:cpo_save