hilite.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /* hlite.c, generic syntax highlighting, Ait Emacs, Kevin Bloom, BSD 3-Clause, 2023-2025 */
  2. #include "header.h"
  3. #include "util.h"
  4. int state = ID_DEFAULT;
  5. int next_state = ID_DEFAULT;
  6. int skip_count = 0;
  7. int exclude_state = ID_DEFAULT;
  8. int exclude_count = 0;
  9. char_t get_at(buffer_t *bp, point_t pt)
  10. {
  11. return (*ptr(bp, pt));
  12. }
  13. void set_parse_state(buffer_t *bp, point_t pt, window_t *wp, int loop)
  14. {
  15. register point_t po;
  16. state = ID_DEFAULT;
  17. next_state = ID_DEFAULT;
  18. skip_count = 0;
  19. if(bp->b_mode != NULL && loop) {
  20. for (po =0; po < pt; po++)
  21. parse_text(bp, po);
  22. wp->w_hilite = state;
  23. }
  24. }
  25. void write_parse_state(window_t *wp)
  26. {
  27. state = wp->w_hilite;
  28. next_state = wp->w_hilite;
  29. skip_count = 0;
  30. }
  31. /* we don't bother running the syntax logic on space chars
  32. therefore we must manually decrease the skip_count if we are
  33. in a syntax highlight that includes space chars.
  34. */
  35. void dec_skip()
  36. {
  37. skip_count--;
  38. }
  39. int parse_text(buffer_t *bp, point_t pt)
  40. {
  41. // if(bp->b_mode == NULL)
  42. // return state;
  43. if (skip_count-- > 0) {
  44. if(exclude_count != 0)
  45. exclude_count--;
  46. if(exclude_state != ID_DEFAULT &&
  47. exclude_count == 0) {
  48. state = exclude_state;
  49. exclude_state = ID_DEFAULT;
  50. }
  51. return state;
  52. }
  53. char_t c_now = get_at(bp, pt);
  54. char_t c_prev = get_at(bp, pt-1);
  55. char_t next = c_now;
  56. int valid = TRUE, k = 0;
  57. state = next_state;
  58. if (state == ID_DEFAULT &&
  59. bp->b_mode != NULL &&
  60. bp->b_mode->mlc != NULL) {
  61. next = c_now;
  62. for(int i = 0; bp->b_mode->mlc[i] != '\0'; i++) {
  63. next = get_at(bp, pt + i);
  64. if(next != bp->b_mode->mlc[i]) {
  65. valid = FALSE;
  66. break;
  67. }
  68. }
  69. if(valid) {
  70. skip_count = 1;
  71. return (next_state = state = ID_BLOCK_COMMENT);
  72. }
  73. valid = TRUE;
  74. }
  75. if (state == ID_BLOCK_COMMENT &&
  76. bp->b_mode != NULL &&
  77. bp->b_mode->emlc != NULL) {
  78. next = c_now;
  79. for(int i = 0; bp->b_mode->emlc[i] != '\0'; i++) {
  80. next = get_at(bp, pt + i);
  81. if(next != bp->b_mode->emlc[i]) {
  82. valid = FALSE;
  83. break;
  84. }
  85. }
  86. if(valid) {
  87. skip_count = strlen(bp->b_mode->emlc) - 1;
  88. next_state = ID_DEFAULT;
  89. return ID_BLOCK_COMMENT;
  90. }
  91. valid = TRUE;
  92. }
  93. if (state == ID_DEFAULT &&
  94. bp->b_mode != NULL &&
  95. bp->b_mode->slc != NULL &&
  96. bp->b_mode->slc[0] != '\0') {
  97. next = c_now;
  98. for(int i = 0; bp->b_mode->slc[i] != '\0'; i++) {
  99. next = get_at(bp, pt + i);
  100. if(next != bp->b_mode->slc[i]) {
  101. valid = FALSE;
  102. break;
  103. }
  104. }
  105. if(valid) {
  106. skip_count = 1;
  107. return (next_state = state = ID_LINE_COMMENT);
  108. }
  109. valid = TRUE;
  110. }
  111. if (state == ID_LINE_COMMENT && c_now == '\n')
  112. return (next_state = ID_DEFAULT);
  113. if (state == ID_DEFAULT && c_now == '"') {
  114. int enable = FALSE;
  115. char_t z = get_at(bp, pt+1);
  116. point_t end = pos(bp, bp->b_ebuf);
  117. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  118. if(z == '"') {
  119. enable = TRUE;
  120. break;
  121. }
  122. if((bp->b_mode != NULL && !bp->b_mode->bmls) || (z == '\\' && get_at(bp, i+1) == '\n')) {
  123. enable = TRUE;
  124. break;
  125. }
  126. }
  127. if(enable)
  128. return (next_state = ID_DOUBLE_STRING);
  129. }
  130. if (state == ID_DEFAULT &&
  131. bp->b_mode != NULL &&
  132. bp->b_mode->bqas &&
  133. c_now == '`')
  134. return (next_state = ID_BACK_STRING);
  135. if (state == ID_DEFAULT &&
  136. bp->b_mode != NULL &&
  137. bp->b_mode->sqas &&
  138. c_now == '\'') {
  139. int enable = FALSE;
  140. char_t z = get_at(bp, pt+1);
  141. point_t end = pos(bp, bp->b_ebuf);
  142. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  143. if(z == '\'') {
  144. enable = TRUE;
  145. break;
  146. }
  147. }
  148. if(enable)
  149. return (next_state = ID_SINGLE_STRING);
  150. }
  151. if (state == ID_DOUBLE_STRING && c_now == '\\') {
  152. skip_count = 1;
  153. return (next_state = ID_DOUBLE_STRING);
  154. }
  155. if (state == ID_DOUBLE_STRING && c_now == '"') {
  156. next_state = ID_DEFAULT;
  157. return ID_DOUBLE_STRING;
  158. }
  159. if (state == ID_SINGLE_STRING && c_now == '\\') {
  160. skip_count = 1;
  161. return (next_state = ID_SINGLE_STRING);
  162. }
  163. if (state == ID_DEFAULT &&
  164. bp->b_mode != NULL &&
  165. bp->b_mode->bqas &&
  166. c_now == '`')
  167. return (next_state = ID_BACK_STRING);
  168. if (state == ID_BACK_STRING && c_now == '\\') {
  169. skip_count = 1;
  170. return (next_state = ID_BACK_STRING);
  171. }
  172. if (state == ID_SINGLE_STRING && c_now == '\'') {
  173. next_state = ID_DEFAULT;
  174. return ID_SINGLE_STRING;
  175. }
  176. if (state == ID_BACK_STRING && c_now == '`') {
  177. next_state = ID_DEFAULT;
  178. return ID_BACK_STRING;
  179. }
  180. point_t ep = pos(bp, bp->b_ebuf);
  181. int sub = 1;
  182. if(bp->b_mode != NULL &&
  183. bp->b_mode->keywords != NULL &&
  184. state == ID_DEFAULT) {
  185. for(int i = 0; bp->b_mode->keywords[i].word != NULL; i++) {
  186. int l = 0, t = 0;
  187. k = 0;
  188. sub = 1;
  189. exclude_count = 0;
  190. exclude_state = ID_DEFAULT;
  191. if(bp->b_mode->keywords[i].word[l] != '' && (pt == 0 ||
  192. (is_symbol(c_prev) &&
  193. (c_prev != '-' && c_prev != '_'))
  194. || isspace(c_prev))) {
  195. // do nothing
  196. } else if(bp->b_mode->keywords[i].word[l] == '') {
  197. l++;
  198. } else {
  199. return (state = ID_DEFAULT);
  200. }
  201. if(bp->b_mode->keywords[i].word[l] == '') {
  202. if(c_prev != '\n' && pt != 0)
  203. return (state = ID_DEFAULT);
  204. l++;
  205. }
  206. for(k = 0; bp->b_mode->keywords[i].word[l] != '\0'; k++, l++) {
  207. c_now = get_at(bp, pt+k);
  208. /* at the end */
  209. if(bp->b_mode->keywords[i].word[l] == '') {
  210. l++;
  211. if(bp->b_mode->keywords[i].word[l] == '\0') {
  212. for(; c_now != '\n' && pt+k != ep; k++) {
  213. c_now = get_at(bp, pt+k);
  214. }
  215. break;
  216. } else if(bp->b_mode->keywords[i].word[l] > 32) {
  217. for(; c_now != bp->b_mode->keywords[i].word[l]; k++) {
  218. c_now = get_at(bp, pt+k);
  219. }
  220. k--;
  221. } else if(bp->b_mode->keywords[i].word[l] == '') {
  222. for(; c_now != '\n' && pt+k != ep ; k++) {
  223. c_now = get_at(bp, pt+k);
  224. if(bp->b_mode->keywords[i].word[l] == '' &&
  225. bp->b_mode->keywords[i].word[l+1] == c_now) {
  226. t = 2;
  227. break;
  228. }
  229. }
  230. if(t == 0) {
  231. k = 0;
  232. break;
  233. }
  234. if(t == 2) {
  235. l++;
  236. k--;
  237. sub++;
  238. continue;
  239. }
  240. }
  241. }
  242. if(bp->b_mode->keywords[i].word[l] == '' ||
  243. bp->b_mode->keywords[i].word[l] == '') {
  244. int all = bp->b_mode->keywords[i].word[l] == '';
  245. if(bp->b_mode->keywords[i].word[l+1] == '\0') {
  246. for(; !isspace(c_now) &&
  247. (all ? TRUE : !is_symbolis(
  248. c_now,
  249. bp->b_mode->saiv
  250. ));
  251. k++) {
  252. c_now = get_at(bp, pt+k);
  253. }
  254. k--;
  255. break;
  256. } else {
  257. l++;
  258. if(all) {
  259. for(; !isspace(c_now); k++) {
  260. if(bp->b_mode->keywords[i].word[l] == c_now) {
  261. t = 1;
  262. break;
  263. }
  264. if(bp->b_mode->keywords[i].word[l] == '' &&
  265. bp->b_mode->keywords[i].word[l+1] == c_now) {
  266. t = 2;
  267. break;
  268. }
  269. if(bp->b_mode->keywords[i].word[l] == '' &&
  270. isspace(get_at(bp, pt+k))) {
  271. t = 3;
  272. break;
  273. }
  274. if(pt+k == ep)
  275. break;
  276. c_now = get_at(bp, pt+k);
  277. }
  278. } else {
  279. for(; !isspace(c_now) &&
  280. (bp->b_mode->keywords[i].word[l] < 32 ||
  281. !is_symboli(
  282. c_now,
  283. bp->b_mode->keywords[i].word[l]
  284. ));
  285. k++) {
  286. if(bp->b_mode->keywords[i].word[l] == c_now) {
  287. t = 1;
  288. break;
  289. }
  290. if(bp->b_mode->keywords[i].word[l] == '' &&
  291. bp->b_mode->keywords[i].word[l+1] == c_now) {
  292. t = 2;
  293. break;
  294. }
  295. if(bp->b_mode->keywords[i].word[l] == '' &&
  296. isspace(get_at(bp, pt+k))) {
  297. t = 3;
  298. break;
  299. }
  300. if(pt+k == ep)
  301. break;
  302. c_now = get_at(bp, pt+k);
  303. }
  304. }
  305. if(t == 0) {
  306. k = 0;
  307. break;
  308. }
  309. if(t == 1) {
  310. k--;
  311. continue;
  312. }
  313. if(t == 2) {
  314. l++;
  315. k--;
  316. sub++;
  317. continue;
  318. }
  319. if(t == 3) {
  320. // do nothing
  321. }
  322. }
  323. }
  324. if(bp->b_mode->keywords[i].word[l] == '') {
  325. l++;
  326. c_now = get_at(bp, pt+k);
  327. for(; isspace(c_now) && pt+k != ep; k++) {
  328. c_now = get_at(bp, pt+k);
  329. }
  330. k--;
  331. c_now = get_at(bp, pt+k);
  332. }
  333. if(bp->b_mode->keywords[i].word[l] == '') {
  334. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  335. sub++;
  336. l++;
  337. continue;
  338. } else {
  339. k = 0;
  340. exclude_count = 0;
  341. break;
  342. }
  343. }
  344. if(bp->b_mode->keywords[i].word[l] == '') {
  345. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  346. exclude_state = bp->b_mode->keywords[i].color;
  347. exclude_count++;
  348. k--;
  349. continue;
  350. } else {
  351. k = 0;
  352. exclude_count = 0;
  353. break;
  354. }
  355. }
  356. if(bp->b_mode->keywords[i].word[l] != c_now) {
  357. k = 0;
  358. break;
  359. }
  360. }
  361. c_now = get_at(bp, pt+k);
  362. if(k > 0 && (isspace(c_now) ||
  363. (is_symbol(c_now) &&
  364. (c_now != '-' && (
  365. bp->b_mode->keywords[i].word[l] == '_' || c_now != '_')))) &&
  366. (bp->b_mode->keywords[i].word[l] == '\0' ||
  367. bp->b_mode->keywords[i].word[l+1] == '\0')) {
  368. skip_count = k-sub;
  369. next_state = ID_DEFAULT;
  370. if(exclude_state != ID_DEFAULT) {
  371. return (state = ID_DEFAULT);
  372. }
  373. return (state = bp->b_mode->keywords[i].color);
  374. }
  375. }
  376. }
  377. if (state != ID_DEFAULT)
  378. return (next_state = state);
  379. // if (state == ID_DEFAULT && c_now >= '0' && c_now <= '9') {
  380. // next_state = ID_DEFAULT;
  381. // return (state = ID_DIGITS);
  382. // }
  383. // if (state == ID_DEFAULT && 1 == is_symbol(c_now)) {
  384. // next_state = ID_DEFAULT;
  385. // return (state = ID_SYMBOL);
  386. // }
  387. return (next_state = state);
  388. }