markdown.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462
  1. /* markdown: a C implementation of John Gruber's Markdown markup language.
  2. *
  3. * Copyright (C) 2007 David L Parsons.
  4. * The redistribution terms are provided in the COPYRIGHT file that must
  5. * be distributed with this source code.
  6. */
  7. #include "config.h"
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include <ctype.h>
  14. #include "cstring.h"
  15. #include "markdown.h"
  16. #include "amalloc.h"
  17. #include "tags.h"
  18. typedef int (*stfu)(const void*,const void*);
  19. typedef ANCHOR(Paragraph) ParagraphRoot;
  20. static Paragraph *Pp(ParagraphRoot *, Line *, int);
  21. static Paragraph *compile(Line *, int, MMIOT *);
  22. /* case insensitive string sort for Footnote tags.
  23. */
  24. int
  25. __mkd_footsort(Footnote *a, Footnote *b)
  26. {
  27. int i;
  28. char ac, bc;
  29. if ( S(a->tag) != S(b->tag) )
  30. return S(a->tag) - S(b->tag);
  31. for ( i=0; i < S(a->tag); i++) {
  32. ac = tolower(T(a->tag)[i]);
  33. bc = tolower(T(b->tag)[i]);
  34. if ( isspace(ac) && isspace(bc) )
  35. continue;
  36. if ( ac != bc )
  37. return ac - bc;
  38. }
  39. return 0;
  40. }
  41. /* find the first blank character after position <i>
  42. */
  43. static int
  44. nextblank(Line *t, int i)
  45. {
  46. while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
  47. ++i;
  48. return i;
  49. }
  50. /* find the next nonblank character after position <i>
  51. */
  52. static int
  53. nextnonblank(Line *t, int i)
  54. {
  55. while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
  56. ++i;
  57. return i;
  58. }
  59. /* find the first nonblank character on the Line.
  60. */
  61. int
  62. mkd_firstnonblank(Line *p)
  63. {
  64. return nextnonblank(p,0);
  65. }
  66. static inline int
  67. blankline(Line *p)
  68. {
  69. return ! (p && (S(p->text) > p->dle) );
  70. }
  71. static Line *
  72. skipempty(Line *p)
  73. {
  74. while ( p && (p->dle == S(p->text)) )
  75. p = p->next;
  76. return p;
  77. }
  78. void
  79. ___mkd_tidy(Cstring *t)
  80. {
  81. while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
  82. --S(*t);
  83. }
  84. static struct kw comment = { "!--", 3, 0 };
  85. static struct kw *
  86. isopentag(Line *p)
  87. {
  88. int i=0, len;
  89. char *line;
  90. if ( !p ) return 0;
  91. line = T(p->text);
  92. len = S(p->text);
  93. if ( len < 3 || line[0] != '<' )
  94. return 0;
  95. if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
  96. /* comments need special case handling, because
  97. * the !-- doesn't need to end in a whitespace
  98. */
  99. return &comment;
  100. /* find how long the tag is so we can check to see if
  101. * it's a block-level tag
  102. */
  103. for ( i=1; i < len && T(p->text)[i] != '>'
  104. && T(p->text)[i] != '/'
  105. && !isspace(T(p->text)[i]); ++i )
  106. ;
  107. return mkd_search_tags(T(p->text)+1, i-1);
  108. }
  109. typedef struct _flo {
  110. Line *t;
  111. int i;
  112. } FLO;
  113. #define floindex(x) (x.i)
  114. static unsigned int
  115. flogetc(FLO *f)
  116. {
  117. if ( f && f->t ) {
  118. if ( f->i < S(f->t->text) )
  119. return (unsigned char)T(f->t->text)[f->i++];
  120. f->t = f->t->next;
  121. f->i = 0;
  122. return flogetc(f);
  123. }
  124. return EOF;
  125. }
  126. static void
  127. splitline(Line *t, int cutpoint)
  128. {
  129. if ( t && (cutpoint < S(t->text)) ) {
  130. Line *tmp = calloc(1, sizeof *tmp);
  131. tmp->next = t->next;
  132. t->next = tmp;
  133. SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
  134. EXPAND(tmp->text) = 0;
  135. S(tmp->text)--;
  136. S(t->text) = cutpoint;
  137. }
  138. }
  139. #define UNCHECK(l) ((l)->line_flags &= ~CHECKED)
  140. #define UNLESS_FENCED(t) if (fenced) { \
  141. other = 1; l->count += (c == ' ' ? 0 : -1); \
  142. } else { t; }
  143. /*
  144. * walk a line, seeing if it's any of half a dozen interesting regular
  145. * types.
  146. */
  147. static void
  148. checkline(Line *l, mkd_flag_t *flags)
  149. {
  150. int eol, i;
  151. int dashes = 0, spaces = 0,
  152. equals = 0, underscores = 0,
  153. stars = 0, tildes = 0, other = 0,
  154. backticks = 0, fenced = 0;
  155. l->line_flags |= CHECKED;
  156. l->kind = chk_text;
  157. l->count = 0;
  158. if (l->dle >= 4) { l->kind=chk_code; return; }
  159. for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
  160. ;
  161. for (i=l->dle; i<eol; i++) {
  162. register int c = T(l->text)[i];
  163. int is_fence_char = 0;
  164. if ( c != ' ' ) l->count++;
  165. switch (c) {
  166. case '-': UNLESS_FENCED(dashes = 1); break;
  167. case ' ': UNLESS_FENCED(spaces = 1); break;
  168. case '=': equals = 1; break;
  169. case '_': UNLESS_FENCED(underscores = 1); break;
  170. case '*': stars = 1; break;
  171. default:
  172. if ( is_flag_set(flags, MKD_FENCEDCODE) ) {
  173. switch (c) {
  174. case '~': if (other) return; is_fence_char = 1; tildes = 1; break;
  175. case '`': if (other) return; is_fence_char = 1; backticks = 1; break;
  176. }
  177. if (is_fence_char) {
  178. fenced = 1;
  179. break;
  180. }
  181. }
  182. other = 1;
  183. l->count--;
  184. if (!fenced) return;
  185. }
  186. }
  187. if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
  188. return;
  189. if ( spaces ) {
  190. if ( (underscores || stars || dashes) )
  191. l->kind = chk_hr;
  192. return;
  193. }
  194. if ( stars || underscores ) { l->kind = chk_hr; }
  195. else if ( dashes ) { l->kind = chk_dash; }
  196. else if ( equals ) { l->kind = chk_equal; }
  197. else if ( tildes ) { l->kind = chk_tilde; }
  198. else if ( backticks ) { l->kind = chk_backtick; }
  199. }
  200. /* markdown only does special handling of comments if the comment end
  201. * is at the end of a line
  202. */
  203. static Line *
  204. commentblock(Paragraph *p, int *unclosed)
  205. {
  206. Line *t, *ret;
  207. char *end;
  208. for ( t = p->text; t ; t = t->next) {
  209. if ( end = strstr(T(t->text), "-->") ) {
  210. if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) )
  211. continue;
  212. /*splitline(t, 3 + (end - T(t->text)) );*/
  213. ret = t->next;
  214. t->next = 0;
  215. return ret;
  216. }
  217. }
  218. *unclosed = 1;
  219. return t;
  220. }
  221. static Line *
  222. htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
  223. {
  224. Line *ret;
  225. FLO f = { p->text, 0 };
  226. int c;
  227. int i, closing, depth=0;
  228. *unclosed = 0;
  229. if ( tag == &comment )
  230. return commentblock(p, unclosed);
  231. if ( tag->selfclose ) {
  232. ret = f.t->next;
  233. f.t->next = 0;
  234. return ret;
  235. }
  236. while ( (c = flogetc(&f)) != EOF ) {
  237. if ( c == '<' ) {
  238. /* tag? */
  239. c = flogetc(&f);
  240. if ( c == '!' ) { /* comment? */
  241. if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
  242. /* yes */
  243. while ( (c = flogetc(&f)) != EOF ) {
  244. if ( c == '-' && flogetc(&f) == '-'
  245. && flogetc(&f) == '>')
  246. /* consumed whole comment */
  247. break;
  248. }
  249. }
  250. }
  251. else {
  252. if ( closing = (c == '/') ) c = flogetc(&f);
  253. for ( i=0; i < tag->size; i++, c=flogetc(&f) ) {
  254. if ( tag->id[i] != toupper(c) )
  255. break;
  256. }
  257. if ( (i == tag->size) && !isalnum(c) ) {
  258. depth = depth + (closing ? -1 : 1);
  259. if ( depth == 0 ) {
  260. while ( c != EOF && c != '>' ) {
  261. /* consume trailing gunk in close tag */
  262. c = flogetc(&f);
  263. }
  264. if ( c == EOF )
  265. break;
  266. if ( !f.t )
  267. return 0;
  268. splitline(f.t, floindex(f));
  269. ret = f.t->next;
  270. f.t->next = 0;
  271. return ret;
  272. }
  273. }
  274. }
  275. }
  276. }
  277. *unclosed = 1;
  278. return 0;
  279. }
  280. /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
  281. */
  282. static int
  283. isfootnote(Line *t)
  284. {
  285. int i;
  286. if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
  287. return 0;
  288. for ( ++i; i < S(t->text) ; ++i ) {
  289. if ( T(t->text)[i] == '[' )
  290. return 0;
  291. else if ( T(t->text)[i] == ']' )
  292. return ( T(t->text)[i+1] == ':' ) ;
  293. }
  294. return 0;
  295. }
  296. static inline int
  297. isquote(Line *t)
  298. {
  299. return (t->dle < 4 && T(t->text)[t->dle] == '>');
  300. }
  301. static inline int
  302. iscode(Line *t)
  303. {
  304. return (t->dle >= 4);
  305. }
  306. static inline int
  307. ishr(Line *t, mkd_flag_t *flags)
  308. {
  309. if ( ! (t->line_flags & CHECKED) )
  310. checkline(t, flags);
  311. if ( t->count > 2 )
  312. return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
  313. return 0;
  314. }
  315. static int
  316. issetext(Line *t, int *htyp, mkd_flag_t *flags)
  317. {
  318. Line *n;
  319. /* check for setext-style HEADER
  320. * ======
  321. */
  322. if ( (n = t->next) ) {
  323. if ( !(n->line_flags & CHECKED) )
  324. checkline(n, flags);
  325. if ( n->kind == chk_dash || n->kind == chk_equal ) {
  326. *htyp = SETEXT;
  327. return 1;
  328. }
  329. }
  330. return 0;
  331. }
  332. static int
  333. ishdr(Line *t, int *htyp, mkd_flag_t *flags)
  334. {
  335. /* ANY leading `#`'s make this into an ETX header
  336. */
  337. if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
  338. *htyp = ETX;
  339. return 1;
  340. }
  341. /* And if not, maybe it's a SETEXT header instead
  342. */
  343. return issetext(t, htyp, flags);
  344. }
  345. static inline int
  346. end_of_block(Line *t, mkd_flag_t *flags)
  347. {
  348. int dummy;
  349. if ( !t )
  350. return 0;
  351. return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) );
  352. }
  353. static Line*
  354. is_discount_dt(Line *t, int *clip, mkd_flag_t *flags)
  355. {
  356. if ( is_flag_set(flags, MKD_DLDISCOUNT)
  357. && t
  358. && t->next
  359. && (S(t->text) > 2)
  360. && (t->dle == 0)
  361. && (T(t->text)[0] == '=')
  362. && (T(t->text)[S(t->text)-1] == '=') ) {
  363. if ( t->next->dle >= 4 ) {
  364. *clip = 4;
  365. return t;
  366. }
  367. else
  368. return is_discount_dt(t->next, clip, flags);
  369. }
  370. return 0;
  371. }
  372. static int
  373. is_extra_dd(Line *t)
  374. {
  375. return (t->dle < 4) && (T(t->text)[t->dle] == ':')
  376. && isspace(T(t->text)[t->dle+1]);
  377. }
  378. static Line*
  379. is_extra_dt(Line *t, int *clip, mkd_flag_t* flags)
  380. {
  381. if ( is_flag_set(flags, MKD_DLEXTRA)
  382. && t
  383. && t->next && S(t->text) && T(t->text)[0] != '='
  384. && T(t->text)[S(t->text)-1] != '=') {
  385. Line *x;
  386. if ( iscode(t) || end_of_block(t, flags) )
  387. return 0;
  388. if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
  389. *clip = x->dle+2;
  390. return t;
  391. }
  392. if ( x=is_extra_dt(t->next, clip, flags) )
  393. return x;
  394. }
  395. return 0;
  396. }
  397. static Line*
  398. isdefinition(Line *t, int *clip, int *kind, mkd_flag_t *flags)
  399. {
  400. Line *ret;
  401. *kind = 1;
  402. if ( ret = is_discount_dt(t,clip,flags) )
  403. return ret;
  404. *kind=2;
  405. return is_extra_dt(t,clip,flags);
  406. }
  407. static int
  408. islist(Line *t, int *clip, mkd_flag_t *flags, int *list_type)
  409. {
  410. int i, j;
  411. char *q;
  412. if ( end_of_block(t, flags) )
  413. return 0;
  414. if ( (is_flag_set(flags, MKD_DLEXTRA)||is_flag_set(flags, MKD_DLDISCOUNT)) && isdefinition(t,clip,list_type,flags) )
  415. return DL;
  416. if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
  417. i = nextnonblank(t, t->dle+1);
  418. *clip = (i > 4) ? 4 : i;
  419. *list_type = UL;
  420. return is_flag_set(flags, MKD_EXPLICITLIST) ? UL : AL;
  421. }
  422. if ( (j = nextblank(t,t->dle)) > t->dle ) {
  423. if ( T(t->text)[j-1] == '.' ) {
  424. if ( !is_flag_set(flags, MKD_NOALPHALIST)
  425. && (j == t->dle + 2)
  426. && isalpha(T(t->text)[t->dle]) ) {
  427. j = nextnonblank(t,j);
  428. *clip = (j > 4) ? 4 : j;
  429. *list_type = AL;
  430. return AL;
  431. }
  432. strtoul(T(t->text)+t->dle, &q, 10);
  433. if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
  434. j = nextnonblank(t,j);
  435. *clip = j;
  436. *list_type = OL;
  437. return AL;
  438. }
  439. }
  440. }
  441. return 0;
  442. }
  443. static Line *
  444. headerblock(Paragraph *pp, int htyp)
  445. {
  446. Line *ret = 0;
  447. Line *p = pp->text;
  448. int i, j;
  449. switch (htyp) {
  450. case SETEXT:
  451. /* p->text is header, p->next->text is -'s or ='s
  452. */
  453. pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
  454. ret = p->next->next;
  455. ___mkd_freeLine(p->next);
  456. p->next = 0;
  457. break;
  458. case ETX:
  459. /* p->text is ###header###, so we need to trim off
  460. * the leading and trailing `#`'s
  461. */
  462. for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
  463. && (i < 6); i++)
  464. ;
  465. pp->hnumber = i;
  466. while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
  467. ++i;
  468. CLIP(p->text, 0, i);
  469. UNCHECK(p);
  470. for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
  471. ;
  472. while ( j && isspace(T(p->text)[j-1]) )
  473. --j;
  474. S(p->text) = j;
  475. ret = p->next;
  476. p->next = 0;
  477. break;
  478. }
  479. return ret;
  480. }
  481. static Line *
  482. codeblock(Paragraph *p)
  483. {
  484. Line *t = p->text, *r;
  485. for ( ; t; t = r ) {
  486. __mkd_trim_line(t,4);
  487. if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
  488. ___mkd_freeLineRange(t,r);
  489. t->next = 0;
  490. return r;
  491. }
  492. }
  493. return t;
  494. }
  495. static int
  496. iscodefence(Line *r, int size, line_type kind, mkd_flag_t *flags)
  497. {
  498. if ( !is_flag_set(flags, MKD_FENCEDCODE) )
  499. return 0;
  500. if ( !(r->line_flags & CHECKED) )
  501. checkline(r, flags);
  502. if ( kind )
  503. return (r->kind == kind) && (r->count >= size);
  504. else
  505. return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
  506. }
  507. static Paragraph *
  508. fencedcodeblock(ParagraphRoot *d, Line **ptr, mkd_flag_t *flags)
  509. {
  510. Line *first, *r;
  511. Paragraph *ret;
  512. first = (*ptr);
  513. /* don't allow zero-length code fences
  514. */
  515. if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) )
  516. return 0;
  517. /* find the closing fence, discard the fences,
  518. * return a Paragraph with the contents
  519. */
  520. for ( r = first; r && r->next; r = r->next )
  521. if ( iscodefence(r->next, first->count, first->kind, flags) ) {
  522. (*ptr) = r->next->next;
  523. ret = Pp(d, first->next, CODE);
  524. if (S(first->text) - first->count > 0) {
  525. char *lang_attr = T(first->text) + first->count;
  526. while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++;
  527. ret->lang = strdup(lang_attr);
  528. }
  529. else {
  530. ret->lang = 0;
  531. }
  532. ___mkd_freeLine(first);
  533. ___mkd_freeLine(r->next);
  534. r->next = 0;
  535. return ret;
  536. }
  537. return 0;
  538. }
  539. static int
  540. centered(Line *first, Line *last)
  541. {
  542. if ( first&&last ) {
  543. int len = S(last->text);
  544. if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
  545. && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
  546. CLIP(first->text, 0, 2);
  547. S(last->text) -= 2;
  548. return CENTER;
  549. }
  550. }
  551. return 0;
  552. }
  553. static int
  554. endoftextblock(Line *t, int toplevelblock, mkd_flag_t *flags)
  555. {
  556. int z;
  557. if ( end_of_block(t, flags) || isquote(t) )
  558. return 1;
  559. /* HORRIBLE STANDARDS KLUDGES:
  560. * 1. non-toplevel paragraphs absorb adjacent code blocks
  561. * 2. Toplevel paragraphs eat absorb adjacent list items,
  562. * but sublevel blocks behave properly.
  563. * (What this means is that we only need to check for code
  564. * blocks at toplevel, and only check for list items at
  565. * nested levels.)
  566. */
  567. return toplevelblock ? 0 : islist(t,&z,flags,&z);
  568. }
  569. static Line *
  570. textblock(Paragraph *p, int toplevel, mkd_flag_t *flags)
  571. {
  572. Line *t, *next;
  573. for ( t = p->text; t ; t = next ) {
  574. if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
  575. p->align = centered(p->text, t);
  576. t->next = 0;
  577. return next;
  578. }
  579. }
  580. return t;
  581. }
  582. /* length of the id: or class: kind in a special div-not-quote block
  583. */
  584. static int
  585. szmarkerclass(char *p)
  586. {
  587. if ( strncasecmp(p, "id:", 3) == 0 )
  588. return 3;
  589. if ( strncasecmp(p, "class:", 6) == 0 )
  590. return 6;
  591. return 0;
  592. }
  593. /*
  594. * check if the first line of a quoted block is the special div-not-quote
  595. * marker %[kind:]name%
  596. */
  597. #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
  598. static int
  599. isdivmarker(Line *p, int start, mkd_flag_t *flags)
  600. {
  601. char *s;
  602. int last, i;
  603. if ( is_flag_set(flags, MKD_NODIVQUOTE) )
  604. return 0;
  605. start = nextnonblank(p, start);
  606. last= S(p->text) - (1 + start);
  607. s = T(p->text) + start;
  608. if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
  609. return 0;
  610. i = szmarkerclass(s+1);
  611. if ( !iscsschar(s[i+1]) )
  612. return 0;
  613. while ( ++i < last )
  614. if ( !(isdigit(s[i]) || iscsschar(s[i])) )
  615. return 0;
  616. return 1;
  617. }
  618. /*
  619. * accumulate a blockquote.
  620. *
  621. * one sick horrible thing about blockquotes is that even though
  622. * it just takes ^> to start a quote, following lines, if quoted,
  623. * assume that the prefix is ``> ''. This means that code needs
  624. * to be indented *5* spaces from the leading '>', but *4* spaces
  625. * from the start of the line. This does not appear to be
  626. * documented in the reference implementation, but it's the
  627. * way the markdown sample web form at Daring Fireball works.
  628. */
  629. static Line *
  630. quoteblock(Paragraph *p, mkd_flag_t *flags)
  631. {
  632. Line *t, *q;
  633. int qp;
  634. for ( t = p->text; t ; t = q ) {
  635. if ( isquote(t) ) {
  636. /* clip leading spaces */
  637. for (qp = 0; T(t->text)[qp] != '>'; qp ++)
  638. /* assert: the first nonblank character on this line
  639. * will be a >
  640. */;
  641. /* clip '>' */
  642. qp++;
  643. /* clip next space, if any */
  644. if ( T(t->text)[qp] == ' ' )
  645. qp++;
  646. __mkd_trim_line(t,qp);
  647. UNCHECK(t);
  648. }
  649. q = skipempty(t->next);
  650. if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
  651. ___mkd_freeLineRange(t, q);
  652. t = q;
  653. break;
  654. }
  655. }
  656. if ( isdivmarker(p->text,0,flags) ) {
  657. char *prefix = "class";
  658. int i;
  659. q = p->text;
  660. p->text = p->text->next;
  661. if ( (i = szmarkerclass(1+T(q->text))) == 3 )
  662. /* and this would be an "%id:" prefix */
  663. prefix="id";
  664. if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
  665. sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
  666. T(q->text)+(i+1) );
  667. ___mkd_freeLine(q);
  668. }
  669. return t;
  670. }
  671. typedef int (*linefn)(Line *);
  672. /*
  673. * pull in a list block. A list block starts with a list marker and
  674. * runs until the next list marker, the next non-indented paragraph,
  675. * or EOF. You do not have to indent nonblank lines after the list
  676. * marker, but multiple paragraphs need to start with a 4-space indent.
  677. */
  678. static Line *
  679. listitem(Paragraph *p, int indent, mkd_flag_t *flags, linefn check)
  680. {
  681. Line *t, *q;
  682. int clip = indent;
  683. int z;
  684. int firstpara = 1;
  685. int ischeck;
  686. #define CHECK_NOT 0
  687. #define CHECK_NO 1
  688. #define CHECK_YES 2
  689. for ( t = p->text; t ; t = q) {
  690. UNCHECK(t);
  691. __mkd_trim_line(t, clip);
  692. if ( firstpara && !is_flag_set(flags, MKD_NORMAL_LISTITEM) ) {
  693. ischeck = CHECK_NOT;
  694. if ( strncmp(T(t->text)+t->dle, "[ ]", 3) == 0 )
  695. ischeck = CHECK_NO;
  696. else if ( strncasecmp(T(t->text)+t->dle, "[x]", 3) == 0 )
  697. ischeck = CHECK_YES;
  698. if ( ischeck != CHECK_NOT ) {
  699. __mkd_trim_line(t, 3);
  700. p->para_flags |= GITHUB_CHECK;
  701. if ( ischeck == CHECK_YES )
  702. p->para_flags |= IS_CHECKED;
  703. }
  704. firstpara = 0;
  705. }
  706. /* even though we had to trim a long leader off this item,
  707. * the indent for trailing paragraphs is still 4...
  708. */
  709. if (indent > 4) {
  710. indent = 4;
  711. }
  712. if ( (q = skipempty(t->next)) == 0 ) {
  713. ___mkd_freeLineRange(t,q);
  714. return 0;
  715. }
  716. /* after a blank line, the next block needs to start with a line
  717. * that's indented 4(? -- reference implementation allows a 1
  718. * character indent, but that has unfortunate side effects here)
  719. * spaces, but after that the line doesn't need any indentation
  720. */
  721. if ( q != t->next ) {
  722. if (q->dle < indent) {
  723. q = t->next;
  724. t->next = 0;
  725. return q;
  726. }
  727. /* indent at least 2, and at most as
  728. * as far as the initial line was indented. */
  729. indent = clip ? clip : 2;
  730. }
  731. if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z)
  732. || (check && (*check)(q)))
  733. && !issetext(q,&z,flags) ) {
  734. q = t->next;
  735. t->next = 0;
  736. return q;
  737. }
  738. clip = (q->dle > indent) ? indent : q->dle;
  739. }
  740. return t;
  741. }
  742. static Line *
  743. definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
  744. {
  745. ParagraphRoot d = { 0, 0 };
  746. Paragraph *p;
  747. Line *q = top->text, *text = 0, *labels;
  748. int z, para;
  749. while (( labels = q )) {
  750. if ( (q = isdefinition(labels, &z, &kind, &(f->flags))) == 0 )
  751. break;
  752. if ( (text = skipempty(q->next)) == 0 )
  753. break;
  754. if ( para = (text != q->next) )
  755. ___mkd_freeLineRange(q, text);
  756. q->next = 0;
  757. if ( kind == 1 /* discount dl */ )
  758. for ( q = labels; q; q = q->next ) {
  759. CLIP(q->text, 0, 1);
  760. UNCHECK(q);
  761. S(q->text)--;
  762. }
  763. do {
  764. p = Pp(&d, text, LISTITEM);
  765. text = listitem(p, clip, &(f->flags), (kind==2) ? is_extra_dd : 0);
  766. p->down = compile(p->text, 0, f);
  767. p->text = labels; labels = 0;
  768. if ( para && p->down ) p->down->align = PARA;
  769. if ( (q = skipempty(text)) == 0 )
  770. goto flee;
  771. if ( para = (q != text) ) {
  772. Line anchor;
  773. anchor.next = text;
  774. ___mkd_freeLineRange(&anchor,q);
  775. text = q;
  776. }
  777. } while ( kind == 2 && is_extra_dd(q) );
  778. }
  779. flee:
  780. top->text = 0;
  781. top->down = T(d);
  782. return text;
  783. }
  784. static Line *
  785. enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
  786. {
  787. ParagraphRoot d = { 0, 0 };
  788. Paragraph *p;
  789. Line *q = top->text, *text;
  790. int para = 0, z;
  791. while (( text = q )) {
  792. p = Pp(&d, text, LISTITEM);
  793. text = listitem(p, clip, &(f->flags), 0);
  794. p->down = compile(p->text, 0, f);
  795. p->text = 0;
  796. if ( para && p->down ) p->down->align = PARA;
  797. if ( (q = skipempty(text)) == 0
  798. || islist(q, &clip, &(f->flags), &z) != list_class )
  799. break;
  800. if ( para = (q != text) ) {
  801. Line anchor;
  802. anchor.next = text;
  803. ___mkd_freeLineRange(&anchor, q);
  804. if ( p->down ) p->down->align = PARA;
  805. }
  806. }
  807. top->text = 0;
  808. top->down = T(d);
  809. return text;
  810. }
  811. static int
  812. tgood(char c)
  813. {
  814. switch (c) {
  815. case '\'':
  816. case '"': return c;
  817. case '(': return ')';
  818. }
  819. return 0;
  820. }
  821. /*
  822. * eat lines for a markdown extra footnote
  823. */
  824. static Line *
  825. extrablock(Line *p)
  826. {
  827. Line *np;
  828. while ( p && p->next ) {
  829. np = p->next;
  830. if ( np->dle < 4 && np->dle < S(np->text) ) {
  831. p->next = 0;
  832. return np;
  833. }
  834. __mkd_trim_line(np,4);
  835. p = np;
  836. }
  837. return 0;
  838. }
  839. /*
  840. * add a new (image or link) footnote to the footnote table
  841. */
  842. static Line*
  843. addfootnote(Line *p, MMIOT* f)
  844. {
  845. int j, i;
  846. int c;
  847. Line *np = p->next;
  848. Footnote *foot = &EXPAND(f->footnotes->note);
  849. CREATE(foot->tag);
  850. CREATE(foot->link);
  851. CREATE(foot->title);
  852. foot->text = 0;
  853. foot->fn_flags = foot->height = foot->width = 0;
  854. /* keep the footnote label */
  855. for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
  856. EXPAND(foot->tag) = T(p->text)[j];
  857. EXPAND(foot->tag) = 0;
  858. S(foot->tag)--;
  859. /* consume the closing ]: */
  860. j = nextnonblank(p, j+2);
  861. if ( is_flag_set(&(f->flags), MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
  862. /* markdown extra footnote: All indented lines past this point;
  863. * the first line includes the footnote reference, so we need to
  864. * snip that out as we go.
  865. */
  866. foot->fn_flags |= EXTRA_FOOTNOTE;
  867. __mkd_trim_line(p,j);
  868. np = extrablock(p);
  869. foot->text = compile(p, 0, f);
  870. return np;
  871. }
  872. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  873. EXPAND(foot->link) = T(p->text)[j++];
  874. EXPAND(foot->link) = 0;
  875. S(foot->link)--;
  876. j = nextnonblank(p,j);
  877. if ( T(p->text)[j] == '=' ) {
  878. sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
  879. j = nextblank(p, j);
  880. j = nextnonblank(p,j);
  881. }
  882. if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
  883. ___mkd_freeLine(p);
  884. p = np;
  885. np = p->next;
  886. j = p->dle;
  887. }
  888. if ( (c = tgood(T(p->text)[j])) ) {
  889. /* Try to take the rest of the line as a comment; read to
  890. * EOL, then shrink the string back to before the final
  891. * quote.
  892. */
  893. ++j; /* skip leading quote */
  894. while ( j < S(p->text) )
  895. EXPAND(foot->title) = T(p->text)[j++];
  896. while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
  897. --S(foot->title);
  898. if ( S(foot->title) ) /* skip trailing quote */
  899. --S(foot->title);
  900. EXPAND(foot->title) = 0;
  901. --S(foot->title);
  902. }
  903. ___mkd_freeLine(p);
  904. return np;
  905. }
  906. /*
  907. * allocate a paragraph header, link it to the
  908. * tail of the current document
  909. */
  910. static Paragraph *
  911. Pp(ParagraphRoot *d, Line *ptr, int typ)
  912. {
  913. Paragraph *ret = calloc(sizeof *ret, 1);
  914. ret->text = ptr;
  915. ret->typ = typ;
  916. return ATTACH(*d, ret);
  917. }
  918. static Line*
  919. consume(Line *ptr, int *eaten)
  920. {
  921. Line *next;
  922. int blanks=0;
  923. for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
  924. next = ptr->next;
  925. ___mkd_freeLine(ptr);
  926. }
  927. if ( ptr ) *eaten = blanks;
  928. return ptr;
  929. }
  930. typedef ANCHOR(Line) Cache;
  931. static void
  932. uncache(Cache *cache, ParagraphRoot *d, MMIOT *f)
  933. {
  934. Paragraph *p;
  935. if ( T(*cache) ) {
  936. E(*cache)->next = 0;
  937. p = Pp(d, 0, SOURCE);
  938. p->down = compile(T(*cache), 1, f);
  939. T(*cache) = E(*cache) = 0;
  940. }
  941. }
  942. /*
  943. * top-level compilation; break the document into
  944. * style, html, and source blocks with footnote links
  945. * weeded out.
  946. */
  947. static Paragraph *
  948. compile_document(Line *ptr, MMIOT *f)
  949. {
  950. ParagraphRoot d = { 0, 0 };
  951. Cache source = { 0, 0 };
  952. Paragraph *p = 0;
  953. struct kw *tag;
  954. int eaten, unclosed;
  955. int previous_was_break = 1;
  956. while ( ptr ) {
  957. if ( !is_flag_set(&(f->flags), MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  958. int blocktype;
  959. /* If we encounter a html/style block, compile and save all
  960. * of the cached source BEFORE processing the html/style.
  961. */
  962. uncache(&source, &d, f);
  963. if (is_flag_set(&(f->flags), MKD_NOSTYLE) )
  964. blocktype = HTML;
  965. else
  966. blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
  967. p = Pp(&d, ptr, blocktype);
  968. ptr = htmlblock(p, tag, &unclosed);
  969. if ( unclosed ) {
  970. p->typ = SOURCE;
  971. p->down = compile(p->text, 1, f);
  972. p->text = 0;
  973. }
  974. previous_was_break = 1;
  975. }
  976. else if ( isfootnote(ptr) ) {
  977. /* footnotes, like cats, sleep anywhere; pull them
  978. * out of the input stream and file them away for
  979. * later processing
  980. */
  981. ptr = consume(addfootnote(ptr, f), &eaten);
  982. previous_was_break = 1;
  983. }
  984. else if ( previous_was_break && iscodefence(ptr,3,0,&(f->flags)) ) {
  985. uncache(&source, &d, f);
  986. if ( !fencedcodeblock(&d, &ptr, &(f->flags)) ) /* just source */
  987. goto attach;
  988. }
  989. else {
  990. attach:
  991. /* source; cache it up to wait for eof or the
  992. * next html/style block
  993. */
  994. ATTACH(source,ptr);
  995. previous_was_break = blankline(ptr);
  996. ptr = ptr->next;
  997. }
  998. }
  999. /* if there's any cached source at EOF, compile
  1000. * it now.
  1001. */
  1002. uncache(&source, &d, f);
  1003. return T(d);
  1004. }
  1005. static int
  1006. first_nonblank_before(Line *j, int dle)
  1007. {
  1008. return (j->dle < dle) ? j->dle : dle;
  1009. }
  1010. static int
  1011. actually_a_table(MMIOT *f, Line *pp)
  1012. {
  1013. Line *r;
  1014. int j;
  1015. int c;
  1016. /* tables need to be turned on */
  1017. if ( is_flag_set(&(f->flags), MKD_NOTABLES) )
  1018. return 0;
  1019. /* tables need three lines */
  1020. if ( !(pp && pp->next && pp->next->next) ) {
  1021. return 0;
  1022. }
  1023. /* all lines must contain |'s */
  1024. for (r = pp; r; r = r->next )
  1025. if ( !(r->line_flags & PIPECHAR) ) {
  1026. return 0;
  1027. }
  1028. /* if the header has a leading |, all lines must have leading |'s */
  1029. if ( T(pp->text)[pp->dle] == '|' ) {
  1030. for ( r = pp; r; r = r->next )
  1031. if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
  1032. return 0;
  1033. }
  1034. }
  1035. /* second line must be only whitespace, -, |, or : */
  1036. r = pp->next;
  1037. for ( j=r->dle; j < S(r->text); ++j ) {
  1038. c = T(r->text)[j];
  1039. if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
  1040. return 0;
  1041. }
  1042. }
  1043. return 1;
  1044. }
  1045. /*
  1046. * break a collection of markdown input into
  1047. * blocks of lists, code, html, and text to
  1048. * be marked up.
  1049. */
  1050. static Paragraph *
  1051. compile(Line *ptr, int toplevel, MMIOT *f)
  1052. {
  1053. ParagraphRoot d = { 0, 0 };
  1054. Paragraph *p = 0;
  1055. Line *r;
  1056. int para = toplevel;
  1057. int blocks = 0;
  1058. int hdr_type, list_type, list_class, indent;
  1059. ptr = consume(ptr, &para);
  1060. while ( ptr ) {
  1061. if ( iscode(ptr) ) {
  1062. p = Pp(&d, ptr, CODE);
  1063. if ( is_flag_set(&(f->flags), MKD_1_COMPAT) ) {
  1064. /* HORRIBLE STANDARDS KLUDGE: the first line of every block
  1065. * has trailing whitespace trimmed off.
  1066. */
  1067. ___mkd_tidy(&p->text->text);
  1068. }
  1069. ptr = codeblock(p);
  1070. }
  1071. else if ( iscodefence(ptr,3,0,&(f->flags)) && (p=fencedcodeblock(&d, &ptr, &(f->flags))) )
  1072. /* yay, it's already done */ ;
  1073. else if ( ishr(ptr, &(f->flags)) ) {
  1074. p = Pp(&d, 0, HR);
  1075. r = ptr;
  1076. ptr = ptr->next;
  1077. ___mkd_freeLine(r);
  1078. }
  1079. else if ( list_class = islist(ptr, &indent, &(f->flags), &list_type) ) {
  1080. if ( list_class == DL ) {
  1081. p = Pp(&d, ptr, DL);
  1082. ptr = definition_block(p, indent, f, list_type);
  1083. }
  1084. else {
  1085. p = Pp(&d, ptr, list_type);
  1086. ptr = enumerated_block(p, indent, f, list_class);
  1087. }
  1088. }
  1089. else if ( isquote(ptr) ) {
  1090. p = Pp(&d, ptr, QUOTE);
  1091. ptr = quoteblock(p, &(f->flags) );
  1092. p->down = compile(p->text, 1, f);
  1093. p->text = 0;
  1094. }
  1095. else if ( ishdr(ptr, &hdr_type, &(f->flags) ) ) {
  1096. p = Pp(&d, ptr, HDR);
  1097. ptr = headerblock(p, hdr_type);
  1098. }
  1099. else {
  1100. /* either markup or an html block element
  1101. */
  1102. struct kw *tag;
  1103. int unclosed = 1;
  1104. p = Pp(&d, ptr, MARKUP); /* default to regular markup,
  1105. * then check if it's an html
  1106. * block. If it IS an html
  1107. * block, htmlblock() will
  1108. * populate this paragraph &
  1109. * all we need to do is reset
  1110. * the paragraph type to HTML,
  1111. * otherwise the paragraph
  1112. * remains empty and ready for
  1113. * processing with textblock()
  1114. */
  1115. if ( !is_flag_set(&(f->flags), MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  1116. /* possibly an html block
  1117. */
  1118. ptr = htmlblock(p, tag, &unclosed);
  1119. if ( ! unclosed ) {
  1120. p->typ = HTML;
  1121. }
  1122. }
  1123. if ( unclosed ) {
  1124. ptr = textblock(p, toplevel, &(f->flags) );
  1125. /* tables are a special kind of paragraph */
  1126. if ( actually_a_table(f, p->text) )
  1127. p->typ = TABLE;
  1128. }
  1129. }
  1130. if ( (para||toplevel) && !p->align )
  1131. p->align = PARA;
  1132. blocks++;
  1133. para = toplevel || (blocks > 1);
  1134. ptr = consume(ptr, &para);
  1135. if ( para && !p->align )
  1136. p->align = PARA;
  1137. }
  1138. return T(d);
  1139. }
  1140. /*
  1141. * the guts of the markdown() function, ripped out so I can do
  1142. * debugging.
  1143. */
  1144. /*
  1145. * prepare and compile `text`, returning a Paragraph tree.
  1146. */
  1147. int
  1148. mkd_compile(Document *doc, mkd_flag_t* flags)
  1149. {
  1150. if ( !doc )
  1151. return 0;
  1152. if ( doc->compiled ) {
  1153. if ( doc->dirty || DIFFERENT(flags, &doc->ctx->flags) ) {
  1154. doc->compiled = doc->dirty = 0;
  1155. if ( doc->code)
  1156. ___mkd_freeParagraph(doc->code);
  1157. if ( doc->ctx->footnotes )
  1158. ___mkd_freefootnotes(doc->ctx);
  1159. }
  1160. else
  1161. return 1;
  1162. }
  1163. doc->compiled = 1;
  1164. memset(doc->ctx, 0, sizeof(MMIOT) );
  1165. doc->ctx->ref_prefix= doc->ref_prefix;
  1166. doc->ctx->cb = &(doc->cb);
  1167. if (flags)
  1168. COPY_FLAGS(doc->ctx->flags, *flags);
  1169. else
  1170. mkd_init_flags(&doc->ctx->flags);
  1171. CREATE(doc->ctx->in);
  1172. doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
  1173. doc->ctx->footnotes->reference = 0;
  1174. CREATE(doc->ctx->footnotes->note);
  1175. mkd_initialize();
  1176. doc->code = compile_document(T(doc->content), doc->ctx);
  1177. qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note),
  1178. sizeof T(doc->ctx->footnotes->note)[0],
  1179. (stfu)__mkd_footsort);
  1180. memset(&doc->content, 0, sizeof doc->content);
  1181. return 1;
  1182. }