md4c.c source code [qtbase/src/3rdparty/md4c/md4c.c]

1	/*
2	* MD4C: Markdown parser for C
3	* (http://github.com/mity/md4c)
4	*
5	* Copyright (c) 2016-2024 Martin Mitáš
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a
8	* copy of this software and associated documentation files (the "Software"),
9	* to deal in the Software without restriction, including without limitation
10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
11	* and/or sell copies of the Software, and to permit persons to whom the
12	* Software is furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23	* IN THE SOFTWARE.
24	*/
25
26	#include "md4c.h"
27
28	#include <limits.h>
29	#include <stdio.h>
30	#include <stdlib.h>
31	#include <string.h>
32
33
34	/*****************************
35	* Miscellaneous Stuff *
36	*****************************/
37
38	#if !defined(__STDC_VERSION__) \|\| __STDC_VERSION__ < 199409L
39	/ C89/90 or old compilers in general may not understand "inline". /
40	#if defined __GNUC__
41	#define inline __inline__
42	#elif defined _MSC_VER
43	#define inline __inline
44	#else
45	#define inline
46	#endif
47	#endif
48
49	/ Make the UTF-8 support the default. /
50	#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
51	#define MD4C_USE_UTF8
52	#endif
53
54	/ Magic for making wide literals with MD4C_USE_UTF16. /
55	#ifdef _T
56	#undef _T
57	#endif
58	#if defined MD4C_USE_UTF16
59	#define _T(x) L##x
60	#else
61	#define _T(x) x
62	#endif
63
64	/ Misc. macros. /
65	#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
66
67	#define STRINGIZE_(x) #x
68	#define STRINGIZE(x) STRINGIZE_(x)
69
70	#define MAX(a,b) ((a) > (b) ? (a) : (b))
71	#define MIN(a,b) ((a) < (b) ? (a) : (b))
72
73	#ifndef TRUE
74	#define TRUE 1
75	#define FALSE 0
76	#endif
77
78	#define MD_LOG(msg) \
79	do { \
80	if(ctx->parser.debug_log != NULL) \
81	ctx->parser.debug_log((msg), ctx->userdata); \
82	} while(0)
83
84	#ifdef DEBUG
85	#define MD_ASSERT(cond) \
86	do { \
87	if(!(cond)) { \
88	MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
89	"Assertion '" STRINGIZE(cond) "' failed."); \
90	exit(1); \
91	} \
92	} while(0)
93
94	#define MD_UNREACHABLE() MD_ASSERT(1 == 0)
95	#else
96	#ifdef __GNUC__
97	#define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
98	#define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
99	#elif defined _MSC_VER && _MSC_VER > 120
100	#define MD_ASSERT(cond) do { __assume(cond); } while(0)
101	#define MD_UNREACHABLE() do { __assume(0); } while(0)
102	#else
103	#define MD_ASSERT(cond) do {} while(0)
104	#define MD_UNREACHABLE() do {} while(0)
105	#endif
106	#endif
107
108	/ For falling through case labels in switch statements. /
109	#if defined __clang__ && __clang_major__ >= 12
110	#define MD_FALLTHROUGH() __attribute__((fallthrough))
111	#elif defined __GNUC__ && __GNUC__ >= 7
112	#define MD_FALLTHROUGH() __attribute__((fallthrough))
113	#else
114	#define MD_FALLTHROUGH() ((void)0)
115	#endif
116
117	/ Suppress "unused parameter" warnings. /
118	#define MD_UNUSED(x) ((void)x)
119
120
121	/******************************
122	* Some internal limits *
123	******************************/
124
125	/ We limit code span marks to lower than 32 backticks. This solves the*
126	* pathologic case of too many openers, each of different length: Their
127	* resolving would be then O(n^2). */
128	#define CODESPAN_MARK_MAXLEN 32
129
130	/ We limit column count of tables to prevent quadratic explosion of output*
131	* from pathological input of a table thousands of columns and thousands
132	* of rows where rows are requested with as little as single character
133	* per-line, relying on us to "helpfully" fill all the missing "<td></td>". */
134	#define TABLE_MAXCOLCOUNT 128
135
136
137	/************************
138	* Internal Types *
139	************************/
140
141	/ These are omnipresent so lets save some typing. /
142	#define CHAR MD_CHAR
143	#define SZ MD_SIZE
144	#define OFF MD_OFFSET
145
146	typedef struct MD_MARK_tag MD_MARK;
147	typedef struct MD_BLOCK_tag MD_BLOCK;
148	typedef struct MD_CONTAINER_tag MD_CONTAINER;
149	typedef struct MD_REF_DEF_tag MD_REF_DEF;
150
151
152	/ During analyzes of inline marks, we need to manage stacks of unresolved*
153	* openers of the given type.
154	* The stack connects the marks via MD_MARK::next;
155	*/
156	typedef struct MD_MARKSTACK_tag MD_MARKSTACK;
157	struct MD_MARKSTACK_tag {
158	int top; / -1 if empty. /
159	};
160
161	/ Context propagated through all the parsing. /
162	typedef struct MD_CTX_tag MD_CTX;
163	struct MD_CTX_tag {
164	/ Immutable stuff (parameters of md_parse()). /
165	const CHAR* text;
166	SZ size;
167	MD_PARSER parser;
168	void* userdata;
169
170	/ When this is true, it allows some optimizations. /
171	int doc_ends_with_newline;
172
173	/ Helper temporary growing buffer. /
174	CHAR* buffer;
175	unsigned alloc_buffer;
176
177	/ Reference definitions. /
178	MD_REF_DEF* ref_defs;
179	int n_ref_defs;
180	int alloc_ref_defs;
181	void** ref_def_hashtable;
182	int ref_def_hashtable_size;
183
184	/ Stack of inline/span markers.*
185	* This is only used for parsing a single block contents but by storing it
186	* here we may reuse the stack for subsequent blocks; i.e. we have fewer
187	* (re)allocations. */
188	MD_MARK* marks;
189	int n_marks;
190	int alloc_marks;
191
192	#if defined MD4C_USE_UTF16
193	char mark_char_map[`128`];
194	#else
195	char mark_char_map[`256`];
196	#endif
197
198	/ For resolving of inline spans. /
199	MD_MARKSTACK opener_stacks[`16`];
200	#define ASTERISK_OPENERS_oo_mod3_0 (ctx->opener_stacks[0]) /* Opener-only */
201	#define ASTERISK_OPENERS_oo_mod3_1 (ctx->opener_stacks[1])
202	#define ASTERISK_OPENERS_oo_mod3_2 (ctx->opener_stacks[2])
203	#define ASTERISK_OPENERS_oc_mod3_0 (ctx->opener_stacks[3]) /* Both opener and closer candidate */
204	#define ASTERISK_OPENERS_oc_mod3_1 (ctx->opener_stacks[4])
205	#define ASTERISK_OPENERS_oc_mod3_2 (ctx->opener_stacks[5])
206	#define UNDERSCORE_OPENERS_oo_mod3_0 (ctx->opener_stacks[6]) /* Opener-only */
207	#define UNDERSCORE_OPENERS_oo_mod3_1 (ctx->opener_stacks[7])
208	#define UNDERSCORE_OPENERS_oo_mod3_2 (ctx->opener_stacks[8])
209	#define UNDERSCORE_OPENERS_oc_mod3_0 (ctx->opener_stacks[9]) /* Both opener and closer candidate */
210	#define UNDERSCORE_OPENERS_oc_mod3_1 (ctx->opener_stacks[10])
211	#define UNDERSCORE_OPENERS_oc_mod3_2 (ctx->opener_stacks[11])
212	#define TILDE_OPENERS_1 (ctx->opener_stacks[12])
213	#define TILDE_OPENERS_2 (ctx->opener_stacks[13])
214	#define BRACKET_OPENERS (ctx->opener_stacks[14])
215	#define DOLLAR_OPENERS (ctx->opener_stacks[15])
216
217	/ Stack of dummies which need to call free() for pointers stored in them.*
218	* These are constructed during inline parsing and freed after all the block
219	* is processed (i.e. all callbacks referring those strings are called). */
220	MD_MARKSTACK ptr_stack;
221
222	/ For resolving table rows. /
223	int n_table_cell_boundaries;
224	int table_cell_boundaries_head;
225	int table_cell_boundaries_tail;
226
227	/ For resolving links. /
228	int unresolved_link_head;
229	int unresolved_link_tail;
230
231	/ For resolving raw HTML. /
232	OFF html_comment_horizon;
233	OFF html_proc_instr_horizon;
234	OFF html_decl_horizon;
235	OFF html_cdata_horizon;
236
237	/ For block analysis.*
238	* Notes:
239	* -- It holds MD_BLOCK as well as MD_LINE structures. After each
240	* MD_BLOCK, its (multiple) MD_LINE(s) follow.
241	* -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
242	* instead of MD_LINE(s).
243	*/
244	void* block_bytes;
245	MD_BLOCK* current_block;
246	int n_block_bytes;
247	int alloc_block_bytes;
248
249	/ For container block analysis. /
250	MD_CONTAINER* containers;
251	int n_containers;
252	int alloc_containers;
253
254	/ Minimal indentation to call the block "indented code block". /
255	unsigned code_indent_offset;
256
257	/ Contextual info for line analysis. /
258	SZ code_fence_length; / For checking closing fence length. /
259	int html_block_type; / For checking closing raw HTML condition. /
260	int last_line_has_list_loosening_effect;
261	int last_list_item_starts_with_two_blank_lines;
262	};
263
264	enum MD_LINETYPE_tag {
265	MD_LINE_BLANK,
266	MD_LINE_HR,
267	MD_LINE_ATXHEADER,
268	MD_LINE_SETEXTHEADER,
269	MD_LINE_SETEXTUNDERLINE,
270	MD_LINE_INDENTEDCODE,
271	MD_LINE_FENCEDCODE,
272	MD_LINE_HTML,
273	MD_LINE_TEXT,
274	MD_LINE_TABLE,
275	MD_LINE_TABLEUNDERLINE
276	};
277	typedef enum MD_LINETYPE_tag MD_LINETYPE;
278
279	typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
280	struct MD_LINE_ANALYSIS_tag {
281	MD_LINETYPE type;
282	unsigned data;
283	int enforce_new_block;
284	OFF beg;
285	OFF end;
286	unsigned indent; / Indentation level. /
287	};
288
289	typedef struct MD_LINE_tag MD_LINE;
290	struct MD_LINE_tag {
291	OFF beg;
292	OFF end;
293	};
294
295	typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
296	struct MD_VERBATIMLINE_tag {
297	OFF beg;
298	OFF end;
299	OFF indent;
300	};
301
302
303	/*****************
304	* Helpers *
305	*****************/
306
307	/ Character accessors. /
308	#define CH(off) (ctx->text[(off)])
309	#define STR(off) (ctx->text + (off))
310
311	/ Character classification.*
312	* Note we assume ASCII compatibility of code points < 128 here. */
313	#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
314	#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
315	#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) \|\| (ch) == (ch2))
316	#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) \|\| (ch) == (ch2) \|\| (ch) == (ch3))
317	#define ISASCII_(ch) ((unsigned)(ch) <= 127)
318	#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
319	#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
320	#define ISWHITESPACE_(ch) (ISBLANK_(ch) \|\| ISANYOF2_((ch), _T('\v'), _T('\f')))
321	#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 \|\| (unsigned)(ch) == 127)
322	#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) \|\| ISIN_(ch, 58, 64) \|\| ISIN_(ch, 91, 96) \|\| ISIN_(ch, 123, 126))
323	#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
324	#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
325	#define ISALPHA_(ch) (ISUPPER_(ch) \|\| ISLOWER_(ch))
326	#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
327	#define ISXDIGIT_(ch) (ISDIGIT_(ch) \|\| ISIN_(ch, _T('A'), _T('F')) \|\| ISIN_(ch, _T('a'), _T('f')))
328	#define ISALNUM_(ch) (ISALPHA_(ch) \|\| ISDIGIT_(ch))
329
330	#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
331	#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
332	#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
333	#define ISASCII(off) ISASCII_(CH(off))
334	#define ISBLANK(off) ISBLANK_(CH(off))
335	#define ISNEWLINE(off) ISNEWLINE_(CH(off))
336	#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
337	#define ISCNTRL(off) ISCNTRL_(CH(off))
338	#define ISPUNCT(off) ISPUNCT_(CH(off))
339	#define ISUPPER(off) ISUPPER_(CH(off))
340	#define ISLOWER(off) ISLOWER_(CH(off))
341	#define ISALPHA(off) ISALPHA_(CH(off))
342	#define ISDIGIT(off) ISDIGIT_(CH(off))
343	#define ISXDIGIT(off) ISXDIGIT_(CH(off))
344	#define ISALNUM(off) ISALNUM_(CH(off))
345
346
347	#if defined MD4C_USE_UTF16
348	#define md_strchr wcschr
349	#else
350	#define md_strchr strchr
351	#endif
352
353
354	/ Case insensitive check of string equality. /
355	static inline int
356	md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
357	{
358	OFF i;
359	for(i = `0`; i < n; i++) {
360	CHAR ch1 = s1[i];
361	CHAR ch2 = s2[i];
362
363	if(ISLOWER_(ch1))
364	ch1 += (`'A'`-`'a'`);
365	if(ISLOWER_(ch2))
366	ch2 += (`'A'`-`'a'`);
367	if(ch1 != ch2)
368	return FALSE;
369	}
370	return TRUE;
371	}
372
373	static inline int
374	md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
375	{
376	return memcmp(s1: s1, s2: s2, n: n * sizeof(CHAR)) == `0`;
377	}
378
379	static int
380	md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
381	{
382	OFF off = `0`;
383	int ret = `0`;
384
385	while(`1`) {
386	while(off < size && str[off] != _T(`'\0'`))
387	off++;
388
389	if(off > `0`) {
390	ret = ctx->parser.text(type, str, off, ctx->userdata);
391	if(ret != `0`)
392	return ret;
393
394	str += off;
395	size -= off;
396	off = `0`;
397	}
398
399	if(off >= size)
400	return `0`;
401
402	ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), `1`, ctx->userdata);
403	if(ret != `0`)
404	return ret;
405	off++;
406	}
407	}
408
409
410	#define MD_CHECK(func) \
411	do { \
412	ret = (func); \
413	if(ret < 0) \
414	goto abort; \
415	} while(0)
416
417
418	#define MD_TEMP_BUFFER(sz) \
419	do { \
420	if(sz > ctx->alloc_buffer) { \
421	CHAR* new_buffer; \
422	SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
423	\
424	new_buffer = realloc(ctx->buffer, new_size); \
425	if(new_buffer == NULL) { \
426	MD_LOG("realloc() failed."); \
427	ret = -1; \
428	goto abort; \
429	} \
430	\
431	ctx->buffer = new_buffer; \
432	ctx->alloc_buffer = new_size; \
433	} \
434	} while(0)
435
436
437	#define MD_ENTER_BLOCK(type, arg) \
438	do { \
439	ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
440	if(ret != 0) { \
441	MD_LOG("Aborted from enter_block() callback."); \
442	goto abort; \
443	} \
444	} while(0)
445
446	#define MD_LEAVE_BLOCK(type, arg) \
447	do { \
448	ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
449	if(ret != 0) { \
450	MD_LOG("Aborted from leave_block() callback."); \
451	goto abort; \
452	} \
453	} while(0)
454
455	#define MD_ENTER_SPAN(type, arg) \
456	do { \
457	ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
458	if(ret != 0) { \
459	MD_LOG("Aborted from enter_span() callback."); \
460	goto abort; \
461	} \
462	} while(0)
463
464	#define MD_LEAVE_SPAN(type, arg) \
465	do { \
466	ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
467	if(ret != 0) { \
468	MD_LOG("Aborted from leave_span() callback."); \
469	goto abort; \
470	} \
471	} while(0)
472
473	#define MD_TEXT(type, str, size) \
474	do { \
475	if(size > 0) { \
476	ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
477	if(ret != 0) { \
478	MD_LOG("Aborted from text() callback."); \
479	goto abort; \
480	} \
481	} \
482	} while(0)
483
484	#define MD_TEXT_INSECURE(type, str, size) \
485	do { \
486	if(size > 0) { \
487	ret = md_text_with_null_replacement(ctx, type, str, size); \
488	if(ret != 0) { \
489	MD_LOG("Aborted from text() callback."); \
490	goto abort; \
491	} \
492	} \
493	} while(0)
494
495
496	/ If the offset falls into a gap between line, we return the following*
497	* line. */
498	static const MD_LINE*
499	md_lookup_line(OFF off, const MD_LINE* lines, MD_SIZE n_lines, MD_SIZE* p_line_index)
500	{
501	MD_SIZE lo, hi;
502	MD_SIZE pivot;
503	const MD_LINE* line;
504
505	lo = `0`;
506	hi = n_lines - `1`;
507	while(lo <= hi) {
508	pivot = (lo + hi) / `2`;
509	line = &lines[pivot];
510
511	if(off < line->beg) {
512	if(hi == `0` \|\| lines[hi-`1`].end < off) {
513	if(p_line_index != NULL)
514	*p_line_index = pivot;
515	return line;
516	}
517	hi = pivot - `1`;
518	} else if(off > line->end) {
519	lo = pivot + `1`;
520	} else {
521	if(p_line_index != NULL)
522	*p_line_index = pivot;
523	return line;
524	}
525	}
526
527	return NULL;
528	}
529
530
531	/*************************
532	* Unicode Support *
533	*************************/
534
535	typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
536	struct MD_UNICODE_FOLD_INFO_tag {
537	unsigned codepoints[`3`];
538	unsigned n_codepoints;
539	};
540
541
542	#if defined MD4C_USE_UTF16 \|\| defined MD4C_USE_UTF8
543	/ Binary search over sorted "map" of codepoints. Consecutive sequences*
544	* of codepoints may be encoded in the map by just using the
545	* (MIN_CODEPOINT \| 0x40000000) and (MAX_CODEPOINT \| 0x80000000).
546	*
547	* Returns index of the found record in the map (in the case of ranges,
548	* the minimal value is used); or -1 on failure. */
549	static int
550	md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
551	{
552	int beg, end;
553	int pivot_beg, pivot_end;
554
555	beg = `0`;
556	end = (int) map_size-`1`;
557	while(beg <= end) {
558	/ Pivot may be a range, not just a single value. /
559	pivot_beg = pivot_end = (beg + end) / `2`;
560	if(map[pivot_end] & `0x40000000`)
561	pivot_end++;
562	if(map[pivot_beg] & `0x80000000`)
563	pivot_beg--;
564
565	if(codepoint < (map[pivot_beg] & `0x00ffffff`))
566	end = pivot_beg - `1`;
567	else if(codepoint > (map[pivot_end] & `0x00ffffff`))
568	beg = pivot_end + `1`;
569	else
570	return pivot_beg;
571	}
572
573	return -`1`;
574	}
575
576	static int
577	md_is_unicode_whitespace__(unsigned codepoint)
578	{
579	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
580	#define S(cp) (cp)
581	/ Unicode "Zs" category.*
582	* (generated by scripts/build_whitespace_map.py) */
583	static const unsigned WHITESPACE_MAP[] = {
584	S(`0x0020`), S(`0x00a0`), S(`0x1680`), R(`0x2000`,`0x200a`), S(`0x202f`), S(`0x205f`), S(`0x3000`)
585	};
586	#undef R
587	#undef S
588
589	/ The ASCII ones are the most frequently used ones, also CommonMark*
590	* specification requests few more in this range. */
591	if(codepoint <= `0x7f`)
592	return ISWHITESPACE_(codepoint);
593
594	return (md_unicode_bsearch__(codepoint, map: WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= `0`);
595	}
596
597	static int
598	md_is_unicode_punct__(unsigned codepoint)
599	{
600	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
601	#define S(cp) (cp)
602	/ Unicode general "P" and "S" categories.*
603	* (generated by scripts/build_punct_map.py) */
604	static const unsigned PUNCT_MAP[] = {
605	R(`0x0021`,`0x002f`), R(`0x003a`,`0x0040`), R(`0x005b`,`0x0060`), R(`0x007b`,`0x007e`), R(`0x00a1`,`0x00a9`),
606	R(`0x00ab`,`0x00ac`), R(`0x00ae`,`0x00b1`), S(`0x00b4`), R(`0x00b6`,`0x00b8`), S(`0x00bb`), S(`0x00bf`), S(`0x00d7`),
607	S(`0x00f7`), R(`0x02c2`,`0x02c5`), R(`0x02d2`,`0x02df`), R(`0x02e5`,`0x02eb`), S(`0x02ed`), R(`0x02ef`,`0x02ff`), S(`0x0375`),
608	S(`0x037e`), R(`0x0384`,`0x0385`), S(`0x0387`), S(`0x03f6`), S(`0x0482`), R(`0x055a`,`0x055f`), R(`0x0589`,`0x058a`),
609	R(`0x058d`,`0x058f`), S(`0x05be`), S(`0x05c0`), S(`0x05c3`), S(`0x05c6`), R(`0x05f3`,`0x05f4`), R(`0x0606`,`0x060f`),
610	S(`0x061b`), R(`0x061d`,`0x061f`), R(`0x066a`,`0x066d`), S(`0x06d4`), S(`0x06de`), S(`0x06e9`), R(`0x06fd`,`0x06fe`),
611	R(`0x0700`,`0x070d`), R(`0x07f6`,`0x07f9`), R(`0x07fe`,`0x07ff`), R(`0x0830`,`0x083e`), S(`0x085e`), S(`0x0888`),
612	R(`0x0964`,`0x0965`), S(`0x0970`), R(`0x09f2`,`0x09f3`), R(`0x09fa`,`0x09fb`), S(`0x09fd`), S(`0x0a76`), R(`0x0af0`,`0x0af1`),
613	S(`0x0b70`), R(`0x0bf3`,`0x0bfa`), S(`0x0c77`), S(`0x0c7f`), S(`0x0c84`), S(`0x0d4f`), S(`0x0d79`), S(`0x0df4`), S(`0x0e3f`),
614	S(`0x0e4f`), R(`0x0e5a`,`0x0e5b`), R(`0x0f01`,`0x0f17`), R(`0x0f1a`,`0x0f1f`), S(`0x0f34`), S(`0x0f36`), S(`0x0f38`),
615	R(`0x0f3a`,`0x0f3d`), S(`0x0f85`), R(`0x0fbe`,`0x0fc5`), R(`0x0fc7`,`0x0fcc`), R(`0x0fce`,`0x0fda`), R(`0x104a`,`0x104f`),
616	R(`0x109e`,`0x109f`), S(`0x10fb`), R(`0x1360`,`0x1368`), R(`0x1390`,`0x1399`), S(`0x1400`), R(`0x166d`,`0x166e`),
617	R(`0x169b`,`0x169c`), R(`0x16eb`,`0x16ed`), R(`0x1735`,`0x1736`), R(`0x17d4`,`0x17d6`), R(`0x17d8`,`0x17db`),
618	R(`0x1800`,`0x180a`), S(`0x1940`), R(`0x1944`,`0x1945`), R(`0x19de`,`0x19ff`), R(`0x1a1e`,`0x1a1f`), R(`0x1aa0`,`0x1aa6`),
619	R(`0x1aa8`,`0x1aad`), R(`0x1b5a`,`0x1b6a`), R(`0x1b74`,`0x1b7e`), R(`0x1bfc`,`0x1bff`), R(`0x1c3b`,`0x1c3f`),
620	R(`0x1c7e`,`0x1c7f`), R(`0x1cc0`,`0x1cc7`), S(`0x1cd3`), S(`0x1fbd`), R(`0x1fbf`,`0x1fc1`), R(`0x1fcd`,`0x1fcf`),
621	R(`0x1fdd`,`0x1fdf`), R(`0x1fed`,`0x1fef`), R(`0x1ffd`,`0x1ffe`), R(`0x2010`,`0x2027`), R(`0x2030`,`0x205e`),
622	R(`0x207a`,`0x207e`), R(`0x208a`,`0x208e`), R(`0x20a0`,`0x20c0`), R(`0x2100`,`0x2101`), R(`0x2103`,`0x2106`),
623	R(`0x2108`,`0x2109`), S(`0x2114`), R(`0x2116`,`0x2118`), R(`0x211e`,`0x2123`), S(`0x2125`), S(`0x2127`), S(`0x2129`),
624	S(`0x212e`), R(`0x213a`,`0x213b`), R(`0x2140`,`0x2144`), R(`0x214a`,`0x214d`), S(`0x214f`), R(`0x218a`,`0x218b`),
625	R(`0x2190`,`0x2426`), R(`0x2440`,`0x244a`), R(`0x249c`,`0x24e9`), R(`0x2500`,`0x2775`), R(`0x2794`,`0x2b73`),
626	R(`0x2b76`,`0x2b95`), R(`0x2b97`,`0x2bff`), R(`0x2ce5`,`0x2cea`), R(`0x2cf9`,`0x2cfc`), R(`0x2cfe`,`0x2cff`), S(`0x2d70`),
627	R(`0x2e00`,`0x2e2e`), R(`0x2e30`,`0x2e5d`), R(`0x2e80`,`0x2e99`), R(`0x2e9b`,`0x2ef3`), R(`0x2f00`,`0x2fd5`),
628	R(`0x2ff0`,`0x2fff`), R(`0x3001`,`0x3004`), R(`0x3008`,`0x3020`), S(`0x3030`), R(`0x3036`,`0x3037`), R(`0x303d`,`0x303f`),
629	R(`0x309b`,`0x309c`), S(`0x30a0`), S(`0x30fb`), R(`0x3190`,`0x3191`), R(`0x3196`,`0x319f`), R(`0x31c0`,`0x31e3`), S(`0x31ef`),
630	R(`0x3200`,`0x321e`), R(`0x322a`,`0x3247`), S(`0x3250`), R(`0x3260`,`0x327f`), R(`0x328a`,`0x32b0`), R(`0x32c0`,`0x33ff`),
631	R(`0x4dc0`,`0x4dff`), R(`0xa490`,`0xa4c6`), R(`0xa4fe`,`0xa4ff`), R(`0xa60d`,`0xa60f`), S(`0xa673`), S(`0xa67e`),
632	R(`0xa6f2`,`0xa6f7`), R(`0xa700`,`0xa716`), R(`0xa720`,`0xa721`), R(`0xa789`,`0xa78a`), R(`0xa828`,`0xa82b`),
633	R(`0xa836`,`0xa839`), R(`0xa874`,`0xa877`), R(`0xa8ce`,`0xa8cf`), R(`0xa8f8`,`0xa8fa`), S(`0xa8fc`), R(`0xa92e`,`0xa92f`),
634	S(`0xa95f`), R(`0xa9c1`,`0xa9cd`), R(`0xa9de`,`0xa9df`), R(`0xaa5c`,`0xaa5f`), R(`0xaa77`,`0xaa79`), R(`0xaade`,`0xaadf`),
635	R(`0xaaf0`,`0xaaf1`), S(`0xab5b`), R(`0xab6a`,`0xab6b`), S(`0xabeb`), S(`0xfb29`), R(`0xfbb2`,`0xfbc2`), R(`0xfd3e`,`0xfd4f`),
636	S(`0xfdcf`), R(`0xfdfc`,`0xfdff`), R(`0xfe10`,`0xfe19`), R(`0xfe30`,`0xfe52`), R(`0xfe54`,`0xfe66`), R(`0xfe68`,`0xfe6b`),
637	R(`0xff01`,`0xff0f`), R(`0xff1a`,`0xff20`), R(`0xff3b`,`0xff40`), R(`0xff5b`,`0xff65`), R(`0xffe0`,`0xffe6`),
638	R(`0xffe8`,`0xffee`), R(`0xfffc`,`0xfffd`), R(`0x10100`,`0x10102`), R(`0x10137`,`0x1013f`), R(`0x10179`,`0x10189`),
639	R(`0x1018c`,`0x1018e`), R(`0x10190`,`0x1019c`), S(`0x101a0`), R(`0x101d0`,`0x101fc`), S(`0x1039f`), S(`0x103d0`),
640	S(`0x1056f`), S(`0x10857`), R(`0x10877`,`0x10878`), S(`0x1091f`), S(`0x1093f`), R(`0x10a50`,`0x10a58`), S(`0x10a7f`),
641	S(`0x10ac8`), R(`0x10af0`,`0x10af6`), R(`0x10b39`,`0x10b3f`), R(`0x10b99`,`0x10b9c`), S(`0x10ead`), R(`0x10f55`,`0x10f59`),
642	R(`0x10f86`,`0x10f89`), R(`0x11047`,`0x1104d`), R(`0x110bb`,`0x110bc`), R(`0x110be`,`0x110c1`), R(`0x11140`,`0x11143`),
643	R(`0x11174`,`0x11175`), R(`0x111c5`,`0x111c8`), S(`0x111cd`), S(`0x111db`), R(`0x111dd`,`0x111df`), R(`0x11238`,`0x1123d`),
644	S(`0x112a9`), R(`0x1144b`,`0x1144f`), R(`0x1145a`,`0x1145b`), S(`0x1145d`), S(`0x114c6`), R(`0x115c1`,`0x115d7`),
645	R(`0x11641`,`0x11643`), R(`0x11660`,`0x1166c`), S(`0x116b9`), R(`0x1173c`,`0x1173f`), S(`0x1183b`), R(`0x11944`,`0x11946`),
646	S(`0x119e2`), R(`0x11a3f`,`0x11a46`), R(`0x11a9a`,`0x11a9c`), R(`0x11a9e`,`0x11aa2`), R(`0x11b00`,`0x11b09`),
647	R(`0x11c41`,`0x11c45`), R(`0x11c70`,`0x11c71`), R(`0x11ef7`,`0x11ef8`), R(`0x11f43`,`0x11f4f`), R(`0x11fd5`,`0x11ff1`),
648	S(`0x11fff`), R(`0x12470`,`0x12474`), R(`0x12ff1`,`0x12ff2`), R(`0x16a6e`,`0x16a6f`), S(`0x16af5`), R(`0x16b37`,`0x16b3f`),
649	R(`0x16b44`,`0x16b45`), R(`0x16e97`,`0x16e9a`), S(`0x16fe2`), S(`0x1bc9c`), S(`0x1bc9f`), R(`0x1cf50`,`0x1cfc3`),
650	R(`0x1d000`,`0x1d0f5`), R(`0x1d100`,`0x1d126`), R(`0x1d129`,`0x1d164`), R(`0x1d16a`,`0x1d16c`), R(`0x1d183`,`0x1d184`),
651	R(`0x1d18c`,`0x1d1a9`), R(`0x1d1ae`,`0x1d1ea`), R(`0x1d200`,`0x1d241`), S(`0x1d245`), R(`0x1d300`,`0x1d356`), S(`0x1d6c1`),
652	S(`0x1d6db`), S(`0x1d6fb`), S(`0x1d715`), S(`0x1d735`), S(`0x1d74f`), S(`0x1d76f`), S(`0x1d789`), S(`0x1d7a9`),
653	S(`0x1d7c3`), R(`0x1d800`,`0x1d9ff`), R(`0x1da37`,`0x1da3a`), R(`0x1da6d`,`0x1da74`), R(`0x1da76`,`0x1da83`),
654	R(`0x1da85`,`0x1da8b`), S(`0x1e14f`), S(`0x1e2ff`), R(`0x1e95e`,`0x1e95f`), S(`0x1ecac`), S(`0x1ecb0`), S(`0x1ed2e`),
655	R(`0x1eef0`,`0x1eef1`), R(`0x1f000`,`0x1f02b`), R(`0x1f030`,`0x1f093`), R(`0x1f0a0`,`0x1f0ae`), R(`0x1f0b1`,`0x1f0bf`),
656	R(`0x1f0c1`,`0x1f0cf`), R(`0x1f0d1`,`0x1f0f5`), R(`0x1f10d`,`0x1f1ad`), R(`0x1f1e6`,`0x1f202`), R(`0x1f210`,`0x1f23b`),
657	R(`0x1f240`,`0x1f248`), R(`0x1f250`,`0x1f251`), R(`0x1f260`,`0x1f265`), R(`0x1f300`,`0x1f6d7`), R(`0x1f6dc`,`0x1f6ec`),
658	R(`0x1f6f0`,`0x1f6fc`), R(`0x1f700`,`0x1f776`), R(`0x1f77b`,`0x1f7d9`), R(`0x1f7e0`,`0x1f7eb`), S(`0x1f7f0`),
659	R(`0x1f800`,`0x1f80b`), R(`0x1f810`,`0x1f847`), R(`0x1f850`,`0x1f859`), R(`0x1f860`,`0x1f887`), R(`0x1f890`,`0x1f8ad`),
660	R(`0x1f8b0`,`0x1f8b1`), R(`0x1f900`,`0x1fa53`), R(`0x1fa60`,`0x1fa6d`), R(`0x1fa70`,`0x1fa7c`), R(`0x1fa80`,`0x1fa88`),
661	R(`0x1fa90`,`0x1fabd`), R(`0x1fabf`,`0x1fac5`), R(`0x1face`,`0x1fadb`), R(`0x1fae0`,`0x1fae8`), R(`0x1faf0`,`0x1faf8`),
662	R(`0x1fb00`,`0x1fb92`), R(`0x1fb94`,`0x1fbca`)
663	};
664	#undef R
665	#undef S
666
667	/ The ASCII ones are the most frequently used ones, also CommonMark*
668	* specification requests few more in this range. */
669	if(codepoint <= `0x7f`)
670	return ISPUNCT_(codepoint);
671
672	return (md_unicode_bsearch__(codepoint, map: PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= `0`);
673	}
674
675	static void
676	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
677	{
678	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
679	#define S(cp) (cp)
680	/ Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.*
681	* (generated by scripts/build_folding_map.py) */
682	static const unsigned FOLD_MAP_1[] = {
683	R(`0x0041`,`0x005a`), S(`0x00b5`), R(`0x00c0`,`0x00d6`), R(`0x00d8`,`0x00de`), R(`0x0100`,`0x012e`), R(`0x0132`,`0x0136`),
684	R(`0x0139`,`0x0147`), R(`0x014a`,`0x0176`), S(`0x0178`), R(`0x0179`,`0x017d`), S(`0x017f`), S(`0x0181`), S(`0x0182`),
685	S(`0x0184`), S(`0x0186`), S(`0x0187`), S(`0x0189`), S(`0x018a`), S(`0x018b`), S(`0x018e`), S(`0x018f`), S(`0x0190`),
686	S(`0x0191`), S(`0x0193`), S(`0x0194`), S(`0x0196`), S(`0x0197`), S(`0x0198`), S(`0x019c`), S(`0x019d`), S(`0x019f`),
687	R(`0x01a0`,`0x01a4`), S(`0x01a6`), S(`0x01a7`), S(`0x01a9`), S(`0x01ac`), S(`0x01ae`), S(`0x01af`), S(`0x01b1`), S(`0x01b2`),
688	S(`0x01b3`), S(`0x01b5`), S(`0x01b7`), S(`0x01b8`), S(`0x01bc`), S(`0x01c4`), S(`0x01c5`), S(`0x01c7`), S(`0x01c8`),
689	S(`0x01ca`), R(`0x01cb`,`0x01db`), R(`0x01de`,`0x01ee`), S(`0x01f1`), S(`0x01f2`), S(`0x01f4`), S(`0x01f6`), S(`0x01f7`),
690	R(`0x01f8`,`0x021e`), S(`0x0220`), R(`0x0222`,`0x0232`), S(`0x023a`), S(`0x023b`), S(`0x023d`), S(`0x023e`), S(`0x0241`),
691	S(`0x0243`), S(`0x0244`), S(`0x0245`), R(`0x0246`,`0x024e`), S(`0x0345`), S(`0x0370`), S(`0x0372`), S(`0x0376`), S(`0x037f`),
692	S(`0x0386`), R(`0x0388`,`0x038a`), S(`0x038c`), S(`0x038e`), S(`0x038f`), R(`0x0391`,`0x03a1`), R(`0x03a3`,`0x03ab`),
693	S(`0x03c2`), S(`0x03cf`), S(`0x03d0`), S(`0x03d1`), S(`0x03d5`), S(`0x03d6`), R(`0x03d8`,`0x03ee`), S(`0x03f0`), S(`0x03f1`),
694	S(`0x03f4`), S(`0x03f5`), S(`0x03f7`), S(`0x03f9`), S(`0x03fa`), R(`0x03fd`,`0x03ff`), R(`0x0400`,`0x040f`),
695	R(`0x0410`,`0x042f`), R(`0x0460`,`0x0480`), R(`0x048a`,`0x04be`), S(`0x04c0`), R(`0x04c1`,`0x04cd`), R(`0x04d0`,`0x052e`),
696	R(`0x0531`,`0x0556`), R(`0x10a0`,`0x10c5`), S(`0x10c7`), S(`0x10cd`), R(`0x13f8`,`0x13fd`), S(`0x1c80`), S(`0x1c81`),
697	S(`0x1c82`), S(`0x1c83`), S(`0x1c84`), S(`0x1c85`), S(`0x1c86`), S(`0x1c87`), S(`0x1c88`), R(`0x1c90`,`0x1cba`),
698	R(`0x1cbd`,`0x1cbf`), R(`0x1e00`,`0x1e94`), S(`0x1e9b`), R(`0x1ea0`,`0x1efe`), R(`0x1f08`,`0x1f0f`), R(`0x1f18`,`0x1f1d`),
699	R(`0x1f28`,`0x1f2f`), R(`0x1f38`,`0x1f3f`), R(`0x1f48`,`0x1f4d`), S(`0x1f59`), S(`0x1f5b`), S(`0x1f5d`), S(`0x1f5f`),
700	R(`0x1f68`,`0x1f6f`), S(`0x1fb8`), S(`0x1fb9`), S(`0x1fba`), S(`0x1fbb`), S(`0x1fbe`), R(`0x1fc8`,`0x1fcb`), S(`0x1fd8`),
701	S(`0x1fd9`), S(`0x1fda`), S(`0x1fdb`), S(`0x1fe8`), S(`0x1fe9`), S(`0x1fea`), S(`0x1feb`), S(`0x1fec`), S(`0x1ff8`),
702	S(`0x1ff9`), S(`0x1ffa`), S(`0x1ffb`), S(`0x2126`), S(`0x212a`), S(`0x212b`), S(`0x2132`), R(`0x2160`,`0x216f`), S(`0x2183`),
703	R(`0x24b6`,`0x24cf`), R(`0x2c00`,`0x2c2f`), S(`0x2c60`), S(`0x2c62`), S(`0x2c63`), S(`0x2c64`), R(`0x2c67`,`0x2c6b`),
704	S(`0x2c6d`), S(`0x2c6e`), S(`0x2c6f`), S(`0x2c70`), S(`0x2c72`), S(`0x2c75`), S(`0x2c7e`), S(`0x2c7f`), R(`0x2c80`,`0x2ce2`),
705	S(`0x2ceb`), S(`0x2ced`), S(`0x2cf2`), R(`0xa640`,`0xa66c`), R(`0xa680`,`0xa69a`), R(`0xa722`,`0xa72e`), R(`0xa732`,`0xa76e`),
706	S(`0xa779`), S(`0xa77b`), S(`0xa77d`), R(`0xa77e`,`0xa786`), S(`0xa78b`), S(`0xa78d`), S(`0xa790`), S(`0xa792`),
707	R(`0xa796`,`0xa7a8`), S(`0xa7aa`), S(`0xa7ab`), S(`0xa7ac`), S(`0xa7ad`), S(`0xa7ae`), S(`0xa7b0`), S(`0xa7b1`), S(`0xa7b2`),
708	S(`0xa7b3`), R(`0xa7b4`,`0xa7c2`), S(`0xa7c4`), S(`0xa7c5`), S(`0xa7c6`), S(`0xa7c7`), S(`0xa7c9`), S(`0xa7d0`), S(`0xa7d6`),
709	S(`0xa7d8`), S(`0xa7f5`), R(`0xab70`,`0xabbf`), R(`0xff21`,`0xff3a`), R(`0x10400`,`0x10427`), R(`0x104b0`,`0x104d3`),
710	R(`0x10570`,`0x1057a`), R(`0x1057c`,`0x1058a`), R(`0x1058c`,`0x10592`), S(`0x10594`), S(`0x10595`), R(`0x10c80`,`0x10cb2`),
711	R(`0x118a0`,`0x118bf`), R(`0x16e40`,`0x16e5f`), R(`0x1e900`,`0x1e921`)
712	};
713	static const unsigned FOLD_MAP_1_DATA[] = {
714	`0x0061`, `0x007a`, `0x03bc`, `0x00e0`, `0x00f6`, `0x00f8`, `0x00fe`, `0x0101`, `0x012f`, `0x0133`, `0x0137`, `0x013a`, `0x0148`,
715	`0x014b`, `0x0177`, `0x00ff`, `0x017a`, `0x017e`, `0x0073`, `0x0253`, `0x0183`, `0x0185`, `0x0254`, `0x0188`, `0x0256`, `0x0257`,
716	`0x018c`, `0x01dd`, `0x0259`, `0x025b`, `0x0192`, `0x0260`, `0x0263`, `0x0269`, `0x0268`, `0x0199`, `0x026f`, `0x0272`, `0x0275`,
717	`0x01a1`, `0x01a5`, `0x0280`, `0x01a8`, `0x0283`, `0x01ad`, `0x0288`, `0x01b0`, `0x028a`, `0x028b`, `0x01b4`, `0x01b6`, `0x0292`,
718	`0x01b9`, `0x01bd`, `0x01c6`, `0x01c6`, `0x01c9`, `0x01c9`, `0x01cc`, `0x01cc`, `0x01dc`, `0x01df`, `0x01ef`, `0x01f3`, `0x01f3`,
719	`0x01f5`, `0x0195`, `0x01bf`, `0x01f9`, `0x021f`, `0x019e`, `0x0223`, `0x0233`, `0x2c65`, `0x023c`, `0x019a`, `0x2c66`, `0x0242`,
720	`0x0180`, `0x0289`, `0x028c`, `0x0247`, `0x024f`, `0x03b9`, `0x0371`, `0x0373`, `0x0377`, `0x03f3`, `0x03ac`, `0x03ad`, `0x03af`,
721	`0x03cc`, `0x03cd`, `0x03ce`, `0x03b1`, `0x03c1`, `0x03c3`, `0x03cb`, `0x03c3`, `0x03d7`, `0x03b2`, `0x03b8`, `0x03c6`, `0x03c0`,
722	`0x03d9`, `0x03ef`, `0x03ba`, `0x03c1`, `0x03b8`, `0x03b5`, `0x03f8`, `0x03f2`, `0x03fb`, `0x037b`, `0x037d`, `0x0450`, `0x045f`,
723	`0x0430`, `0x044f`, `0x0461`, `0x0481`, `0x048b`, `0x04bf`, `0x04cf`, `0x04c2`, `0x04ce`, `0x04d1`, `0x052f`, `0x0561`, `0x0586`,
724	`0x2d00`, `0x2d25`, `0x2d27`, `0x2d2d`, `0x13f0`, `0x13f5`, `0x0432`, `0x0434`, `0x043e`, `0x0441`, `0x0442`, `0x0442`, `0x044a`,
725	`0x0463`, `0xa64b`, `0x10d0`, `0x10fa`, `0x10fd`, `0x10ff`, `0x1e01`, `0x1e95`, `0x1e61`, `0x1ea1`, `0x1eff`, `0x1f00`, `0x1f07`,
726	`0x1f10`, `0x1f15`, `0x1f20`, `0x1f27`, `0x1f30`, `0x1f37`, `0x1f40`, `0x1f45`, `0x1f51`, `0x1f53`, `0x1f55`, `0x1f57`, `0x1f60`,
727	`0x1f67`, `0x1fb0`, `0x1fb1`, `0x1f70`, `0x1f71`, `0x03b9`, `0x1f72`, `0x1f75`, `0x1fd0`, `0x1fd1`, `0x1f76`, `0x1f77`, `0x1fe0`,
728	`0x1fe1`, `0x1f7a`, `0x1f7b`, `0x1fe5`, `0x1f78`, `0x1f79`, `0x1f7c`, `0x1f7d`, `0x03c9`, `0x006b`, `0x00e5`, `0x214e`, `0x2170`,
729	`0x217f`, `0x2184`, `0x24d0`, `0x24e9`, `0x2c30`, `0x2c5f`, `0x2c61`, `0x026b`, `0x1d7d`, `0x027d`, `0x2c68`, `0x2c6c`, `0x0251`,
730	`0x0271`, `0x0250`, `0x0252`, `0x2c73`, `0x2c76`, `0x023f`, `0x0240`, `0x2c81`, `0x2ce3`, `0x2cec`, `0x2cee`, `0x2cf3`, `0xa641`,
731	`0xa66d`, `0xa681`, `0xa69b`, `0xa723`, `0xa72f`, `0xa733`, `0xa76f`, `0xa77a`, `0xa77c`, `0x1d79`, `0xa77f`, `0xa787`, `0xa78c`,
732	`0x0265`, `0xa791`, `0xa793`, `0xa797`, `0xa7a9`, `0x0266`, `0x025c`, `0x0261`, `0x026c`, `0x026a`, `0x029e`, `0x0287`, `0x029d`,
733	`0xab53`, `0xa7b5`, `0xa7c3`, `0xa794`, `0x0282`, `0x1d8e`, `0xa7c8`, `0xa7ca`, `0xa7d1`, `0xa7d7`, `0xa7d9`, `0xa7f6`, `0x13a0`,
734	`0x13ef`, `0xff41`, `0xff5a`, `0x10428`, `0x1044f`, `0x104d8`, `0x104fb`, `0x10597`, `0x105a1`, `0x105a3`, `0x105b1`, `0x105b3`,
735	`0x105b9`, `0x105bb`, `0x105bc`, `0x10cc0`, `0x10cf2`, `0x118c0`, `0x118df`, `0x16e60`, `0x16e7f`, `0x1e922`, `0x1e943`
736	};
737	static const unsigned FOLD_MAP_2[] = {
738	S(`0x00df`), S(`0x0130`), S(`0x0149`), S(`0x01f0`), S(`0x0587`), S(`0x1e96`), S(`0x1e97`), S(`0x1e98`), S(`0x1e99`),
739	S(`0x1e9a`), S(`0x1e9e`), S(`0x1f50`), R(`0x1f80`,`0x1f87`), R(`0x1f88`,`0x1f8f`), R(`0x1f90`,`0x1f97`), R(`0x1f98`,`0x1f9f`),
740	R(`0x1fa0`,`0x1fa7`), R(`0x1fa8`,`0x1faf`), S(`0x1fb2`), S(`0x1fb3`), S(`0x1fb4`), S(`0x1fb6`), S(`0x1fbc`), S(`0x1fc2`),
741	S(`0x1fc3`), S(`0x1fc4`), S(`0x1fc6`), S(`0x1fcc`), S(`0x1fd6`), S(`0x1fe4`), S(`0x1fe6`), S(`0x1ff2`), S(`0x1ff3`),
742	S(`0x1ff4`), S(`0x1ff6`), S(`0x1ffc`), S(`0xfb00`), S(`0xfb01`), S(`0xfb02`), S(`0xfb05`), S(`0xfb06`), S(`0xfb13`),
743	S(`0xfb14`), S(`0xfb15`), S(`0xfb16`), S(`0xfb17`)
744	};
745	static const unsigned FOLD_MAP_2_DATA[] = {
746	`0x0073`,`0x0073`, `0x0069`,`0x0307`, `0x02bc`,`0x006e`, `0x006a`,`0x030c`, `0x0565`,`0x0582`, `0x0068`,`0x0331`, `0x0074`,`0x0308`,
747	`0x0077`,`0x030a`, `0x0079`,`0x030a`, `0x0061`,`0x02be`, `0x0073`,`0x0073`, `0x03c5`,`0x0313`, `0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`,
748	`0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f60`,`0x03b9`,
749	`0x1f67`,`0x03b9`, `0x1f60`,`0x03b9`, `0x1f67`,`0x03b9`, `0x1f70`,`0x03b9`, `0x03b1`,`0x03b9`, `0x03ac`,`0x03b9`, `0x03b1`,`0x0342`,
750	`0x03b1`,`0x03b9`, `0x1f74`,`0x03b9`, `0x03b7`,`0x03b9`, `0x03ae`,`0x03b9`, `0x03b7`,`0x0342`, `0x03b7`,`0x03b9`, `0x03b9`,`0x0342`,
751	`0x03c1`,`0x0313`, `0x03c5`,`0x0342`, `0x1f7c`,`0x03b9`, `0x03c9`,`0x03b9`, `0x03ce`,`0x03b9`, `0x03c9`,`0x0342`, `0x03c9`,`0x03b9`,
752	`0x0066`,`0x0066`, `0x0066`,`0x0069`, `0x0066`,`0x006c`, `0x0073`,`0x0074`, `0x0073`,`0x0074`, `0x0574`,`0x0576`, `0x0574`,`0x0565`,
753	`0x0574`,`0x056b`, `0x057e`,`0x0576`, `0x0574`,`0x056d`
754	};
755	static const unsigned FOLD_MAP_3[] = {
756	S(`0x0390`), S(`0x03b0`), S(`0x1f52`), S(`0x1f54`), S(`0x1f56`), S(`0x1fb7`), S(`0x1fc7`), S(`0x1fd2`), S(`0x1fd3`),
757	S(`0x1fd7`), S(`0x1fe2`), S(`0x1fe3`), S(`0x1fe7`), S(`0x1ff7`), S(`0xfb03`), S(`0xfb04`)
758	};
759	static const unsigned FOLD_MAP_3_DATA[] = {
760	`0x03b9`,`0x0308`,`0x0301`, `0x03c5`,`0x0308`,`0x0301`, `0x03c5`,`0x0313`,`0x0300`, `0x03c5`,`0x0313`,`0x0301`,
761	`0x03c5`,`0x0313`,`0x0342`, `0x03b1`,`0x0342`,`0x03b9`, `0x03b7`,`0x0342`,`0x03b9`, `0x03b9`,`0x0308`,`0x0300`,
762	`0x03b9`,`0x0308`,`0x0301`, `0x03b9`,`0x0308`,`0x0342`, `0x03c5`,`0x0308`,`0x0300`, `0x03c5`,`0x0308`,`0x0301`,
763	`0x03c5`,`0x0308`,`0x0342`, `0x03c9`,`0x0342`,`0x03b9`, `0x0066`,`0x0066`,`0x0069`, `0x0066`,`0x0066`,`0x006c`
764	};
765	#undef R
766	#undef S
767	static const struct {
768	const unsigned* map;
769	const unsigned* data;
770	size_t map_size;
771	unsigned n_codepoints;
772	} FOLD_MAP_LIST[] = {
773	{ FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), `1` },
774	{ FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), `2` },
775	{ FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), `3` }
776	};
777
778	int i;
779
780	/ Fast path for ASCII characters. /
781	if(codepoint <= `0x7f`) {
782	info->codepoints[`0`] = codepoint;
783	if(ISUPPER_(codepoint))
784	info->codepoints[`0`] += `'a'` - `'A'`;
785	info->n_codepoints = `1`;
786	return;
787	}
788
789	/ Try to locate the codepoint in any of the maps. /
790	for(i = `0`; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
791	int index;
792
793	index = md_unicode_bsearch__(codepoint, map: FOLD_MAP_LIST[i].map, map_size: FOLD_MAP_LIST[i].map_size);
794	if(index >= `0`) {
795	/ Found the mapping. /
796	unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
797	const unsigned* map = FOLD_MAP_LIST[i].map;
798	const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
799
800	memcpy(dest: info->codepoints, src: codepoints, n: sizeof(unsigned) * n_codepoints);
801	info->n_codepoints = n_codepoints;
802
803	if(FOLD_MAP_LIST[i].map[index] != codepoint) {
804	/ The found mapping maps whole range of codepoints,*
805	* i.e. we have to offset info->codepoints[0] accordingly. */
806	if((map[index] & `0x00ffffff`)+`1` == codepoints[`0`]) {
807	/ Alternating type of the range. /
808	info->codepoints[`0`] = codepoint + ((codepoint & `0x1`) == (map[index] & `0x1`) ? `1` : `0`);
809	} else {
810	/ Range to range kind of mapping. /
811	info->codepoints[`0`] += (codepoint - (map[index] & `0x00ffffff`));
812	}
813	}
814
815	return;
816	}
817	}
818
819	/ No mapping found. Map the codepoint to itself. /
820	info->codepoints[`0`] = codepoint;
821	info->n_codepoints = `1`;
822	}
823	#endif
824
825
826	#if defined MD4C_USE_UTF16
827	#define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
828	#define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
829	#define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) \| (((unsigned)(lo) & 0x3ff) << 0)))
830
831	static unsigned
832	md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
833	{
834	if(IS_UTF16_SURROGATE_HI(str[`0`])) {
835	if(`1` < str_size && IS_UTF16_SURROGATE_LO(str[`1`])) {
836	if(p_size != NULL)
837	*p_size = `2`;
838	return UTF16_DECODE_SURROGATE(str[`0`], str[`1`]);
839	}
840	}
841
842	if(p_size != NULL)
843	*p_size = `1`;
844	return str[`0`];
845	}
846
847	static unsigned
848	md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
849	{
850	if(off > `2` && IS_UTF16_SURROGATE_HI(CH(off-`2`)) && IS_UTF16_SURROGATE_LO(CH(off-`1`)))
851	return UTF16_DECODE_SURROGATE(CH(off-`2`), CH(off-`1`));
852
853	return CH(off);
854	}
855
856	/ No whitespace uses surrogates, so no decoding needed here. /
857	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
858	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
859	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
860
861	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
862	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
863
864	static inline int
865	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
866	{
867	return md_decode_utf16le__(str+off, str_size-off, p_char_size);
868	}
869	#elif defined MD4C_USE_UTF8
870	#define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
871	#define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
872	#define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
873	#define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
874	#define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
875
876	static unsigned
877	md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
878	{
879	if(!IS_UTF8_LEAD1(str[`0`])) {
880	if(IS_UTF8_LEAD2(str[`0`])) {
881	if(`1` < str_size && IS_UTF8_TAIL(str[`1`])) {
882	if(p_size != NULL)
883	*p_size = `2`;
884
885	return (((unsigned int)str[`0`] & `0x1f`) << `6`) \|
886	(((unsigned int)str[`1`] & `0x3f`) << `0`);
887	}
888	} else if(IS_UTF8_LEAD3(str[`0`])) {
889	if(`2` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`])) {
890	if(p_size != NULL)
891	*p_size = `3`;
892
893	return (((unsigned int)str[`0`] & `0x0f`) << `12`) \|
894	(((unsigned int)str[`1`] & `0x3f`) << `6`) \|
895	(((unsigned int)str[`2`] & `0x3f`) << `0`);
896	}
897	} else if(IS_UTF8_LEAD4(str[`0`])) {
898	if(`3` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`]) && IS_UTF8_TAIL(str[`3`])) {
899	if(p_size != NULL)
900	*p_size = `4`;
901
902	return (((unsigned int)str[`0`] & `0x07`) << `18`) \|
903	(((unsigned int)str[`1`] & `0x3f`) << `12`) \|
904	(((unsigned int)str[`2`] & `0x3f`) << `6`) \|
905	(((unsigned int)str[`3`] & `0x3f`) << `0`);
906	}
907	}
908	}
909
910	if(p_size != NULL)
911	*p_size = `1`;
912	return (unsigned) str[`0`];
913	}
914
915	static unsigned
916	md_decode_utf8_before__(MD_CTX* ctx, OFF off)
917	{
918	if(!IS_UTF8_LEAD1(CH(off-`1`))) {
919	if(off > `1` && IS_UTF8_LEAD2(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
920	return (((unsigned int)CH(off-`2`) & `0x1f`) << `6`) \|
921	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
922
923	if(off > `2` && IS_UTF8_LEAD3(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
924	return (((unsigned int)CH(off-`3`) & `0x0f`) << `12`) \|
925	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
926	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
927
928	if(off > `3` && IS_UTF8_LEAD4(CH(off-`4`)) && IS_UTF8_TAIL(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
929	return (((unsigned int)CH(off-`4`) & `0x07`) << `18`) \|
930	(((unsigned int)CH(off-`3`) & `0x3f`) << `12`) \|
931	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
932	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
933	}
934
935	return (unsigned) CH(off-`1`);
936	}
937
938	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
939	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
940	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
941
942	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
943	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
944
945	static inline unsigned
946	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
947	{
948	return md_decode_utf8__(str: str+off, str_size: str_size-off, p_size: p_char_size);
949	}
950	#else
951	#define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
952	#define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
953	#define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
954
955	#define ISUNICODEPUNCT(off) ISPUNCT(off)
956	#define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
957
958	static inline void
959	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
960	{
961	info->codepoints[`0`] = codepoint;
962	if(ISUPPER_(codepoint))
963	info->codepoints[`0`] += `'a'` - `'A'`;
964	info->n_codepoints = `1`;
965	}
966
967	static inline unsigned
968	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
969	{
970	*p_size = `1`;
971	return (unsigned) str[off];
972	}
973	#endif
974
975
976	/*************************************
977	* Helper string manipulations *
978	*************************************/
979
980	/ Fill buffer with copy of the string between 'beg' and 'end' but replace any*
981	* line breaks with given replacement character.
982	*
983	* NOTE: Caller is responsible to make sure the buffer is large enough.
984	* (Given the output is always shorter then input, (end - beg) is good idea
985	* what the caller should allocate.)
986	*/
987	static void
988	md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
989	CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
990	{
991	CHAR* ptr = buffer;
992	int line_index = `0`;
993	OFF off = beg;
994
995	MD_UNUSED(n_lines);
996
997	while(`1`) {
998	const MD_LINE* line = &lines[line_index];
999	OFF line_end = line->end;
1000	if(end < line_end)
1001	line_end = end;
1002
1003	while(off < line_end) {
1004	*ptr = CH(off);
1005	ptr++;
1006	off++;
1007	}
1008
1009	if(off >= end) {
1010	*p_size = (MD_SIZE)(ptr - buffer);
1011	return;
1012	}
1013
1014	*ptr = line_break_replacement_char;
1015	ptr++;
1016
1017	line_index++;
1018	off = lines[line_index].beg;
1019	}
1020	}
1021
1022	/ Wrapper of md_merge_lines() which allocates new buffer for the output string.*
1023	*/
1024	static int
1025	md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
1026	CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
1027	{
1028	CHAR* buffer;
1029
1030	buffer = (CHAR) malloc(size: sizeof(CHAR) (end - beg));
1031	if(buffer == NULL) {
1032	MD_LOG("malloc() failed.");
1033	return -`1`;
1034	}
1035
1036	md_merge_lines(ctx, beg, end, lines, n_lines,
1037	line_break_replacement_char, buffer, p_size);
1038
1039	*p_str = buffer;
1040	return `0`;
1041	}
1042
1043	static OFF
1044	md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
1045	{
1046	SZ char_size;
1047	unsigned codepoint;
1048
1049	while(off < size) {
1050	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
1051	if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
1052	break;
1053	off += char_size;
1054	}
1055
1056	return off;
1057	}
1058
1059
1060	/******************************
1061	* Recognizing raw HTML *
1062	******************************/
1063
1064	/ md_is_html_tag() may be called when processing inlines (inline raw HTML)*
1065	* or when breaking document to blocks (checking for start of HTML block type 7).
1066	*
1067	* When breaking document to blocks, we do not yet know line boundaries, but
1068	* in that case the whole tag has to live on a single line. We distinguish this
1069	* by n_lines == 0.
1070	*/
1071	static int
1072	md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1073	{
1074	int attr_state;
1075	OFF off = beg;
1076	OFF line_end = (n_lines > `0`) ? lines[`0`].end : ctx->size;
1077	MD_SIZE line_index = `0`;
1078
1079	MD_ASSERT(CH(beg) == _T(`'<'`));
1080
1081	if(off + `1` >= line_end)
1082	return FALSE;
1083	off++;
1084
1085	/ For parsing attributes, we need a little state automaton below.*
1086	* State -1: no attributes are allowed.
1087	* State 0: attribute could follow after some whitespace.
1088	* State 1: after a whitespace (attribute name may follow).
1089	* State 2: after attribute name ('=' MAY follow).
1090	* State 3: after '=' (value specification MUST follow).
1091	* State 41: in middle of unquoted attribute value.
1092	* State 42: in middle of single-quoted attribute value.
1093	* State 43: in middle of double-quoted attribute value.
1094	*/
1095	attr_state = `0`;
1096
1097	if(CH(off) == _T(`'/'`)) {
1098	/ Closer tag "</ ... >". No attributes may be present. /
1099	attr_state = -`1`;
1100	off++;
1101	}
1102
1103	/ Tag name /
1104	if(off >= line_end \|\| !ISALPHA(off))
1105	return FALSE;
1106	off++;
1107	while(off < line_end && (ISALNUM(off) \|\| CH(off) == _T(`'-'`)))
1108	off++;
1109
1110	/ (Optional) attributes (if not closer), (optional) '/' (if not closer)*
1111	* and final '>'. */
1112	while(`1`) {
1113	while(off < line_end && !ISNEWLINE(off)) {
1114	if(attr_state > `40`) {
1115	if(attr_state == `41` && (ISBLANK(off) \|\| ISANYOF(off, _T("\"'=<>`")))) {
1116	attr_state = `0`;
1117	off--; / Put the char back for re-inspection in the new state. /
1118	} else if(attr_state == `42` && CH(off) == _T(`'\''`)) {
1119	attr_state = `0`;
1120	} else if(attr_state == `43` && CH(off) == _T(`'"'`)) {
1121	attr_state = `0`;
1122	}
1123	off++;
1124	} else if(ISWHITESPACE(off)) {
1125	if(attr_state == `0`)
1126	attr_state = `1`;
1127	off++;
1128	} else if(attr_state <= `2` && CH(off) == _T(`'>'`)) {
1129	/ End. /
1130	goto done;
1131	} else if(attr_state <= `2` && CH(off) == _T(`'/'`) && off+`1` < line_end && CH(off+`1`) == _T(`'>'`)) {
1132	/ End with digraph '/>' /
1133	off++;
1134	goto done;
1135	} else if((attr_state == `1` \|\| attr_state == `2`) && (ISALPHA(off) \|\| CH(off) == _T(`'_'`) \|\| CH(off) == _T(`':'`))) {
1136	off++;
1137	/ Attribute name /
1138	while(off < line_end && (ISALNUM(off) \|\| ISANYOF(off, _T("_.:-"))))
1139	off++;
1140	attr_state = `2`;
1141	} else if(attr_state == `2` && CH(off) == _T(`'='`)) {
1142	/ Attribute assignment sign /
1143	off++;
1144	attr_state = `3`;
1145	} else if(attr_state == `3`) {
1146	/ Expecting start of attribute value. /
1147	if(CH(off) == _T(`'"'`))
1148	attr_state = `43`;
1149	else if(CH(off) == _T(`'\''`))
1150	attr_state = `42`;
1151	else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
1152	attr_state = `41`;
1153	else
1154	return FALSE;
1155	off++;
1156	} else {
1157	/ Anything unexpected. /
1158	return FALSE;
1159	}
1160	}
1161
1162	/ We have to be on a single line. See definition of start condition*
1163	* of HTML block, type 7. */
1164	if(n_lines == `0`)
1165	return FALSE;
1166
1167	line_index++;
1168	if(line_index >= n_lines)
1169	return FALSE;
1170
1171	off = lines[line_index].beg;
1172	line_end = lines[line_index].end;
1173
1174	if(attr_state == `0` \|\| attr_state == `41`)
1175	attr_state = `1`;
1176
1177	if(off >= max_end)
1178	return FALSE;
1179	}
1180
1181	done:
1182	if(off >= max_end)
1183	return FALSE;
1184
1185	*p_end = off+`1`;
1186	return TRUE;
1187	}
1188
1189	static int
1190	md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1191	const MD_LINE* lines, MD_SIZE n_lines,
1192	OFF beg, OFF max_end, OFF* p_end,
1193	OFF* p_scan_horizon)
1194	{
1195	OFF off = beg;
1196	MD_SIZE line_index = `0`;
1197
1198	if(off < p_scan_horizon && p_scan_horizon >= max_end - len) {
1199	/ We have already scanned the range up to the max_end so we know*
1200	* there is nothing to see. */
1201	return FALSE;
1202	}
1203
1204	while(TRUE) {
1205	while(off + len <= lines[line_index].end && off + len <= max_end) {
1206	if(md_ascii_eq(STR(off), s2: str, n: len)) {
1207	/ Success. /
1208	*p_end = off + len;
1209	return TRUE;
1210	}
1211	off++;
1212	}
1213
1214	line_index++;
1215	if(off >= max_end \|\| line_index >= n_lines) {
1216	/ Failure. /
1217	*p_scan_horizon = off;
1218	return FALSE;
1219	}
1220
1221	off = lines[line_index].beg;
1222	}
1223	}
1224
1225	static int
1226	md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1227	{
1228	OFF off = beg;
1229
1230	MD_ASSERT(CH(beg) == _T(`'<'`));
1231
1232	if(off + `4` >= lines[`0`].end)
1233	return FALSE;
1234	if(CH(off+`1`) != _T(`'!'`) \|\| CH(off+`2`) != _T(`'-'`) \|\| CH(off+`3`) != _T(`'-'`))
1235	return FALSE;
1236
1237	/ Skip only "<!" so that we accept also "<!-->" or "<!--->" /
1238	off += `2`;
1239
1240	/ Scan for ordinary comment closer "-->". /
1241	return md_scan_for_html_closer(ctx, _T("-->"), len: `3`,
1242	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_comment_horizon);
1243	}
1244
1245	static int
1246	md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1247	{
1248	OFF off = beg;
1249
1250	if(off + `2` >= lines[`0`].end)
1251	return FALSE;
1252	if(CH(off+`1`) != _T(`'?'`))
1253	return FALSE;
1254	off += `2`;
1255
1256	return md_scan_for_html_closer(ctx, _T("?>"), len: `2`,
1257	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_proc_instr_horizon);
1258	}
1259
1260	static int
1261	md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1262	{
1263	OFF off = beg;
1264
1265	if(off + `2` >= lines[`0`].end)
1266	return FALSE;
1267	if(CH(off+`1`) != _T(`'!'`))
1268	return FALSE;
1269	off += `2`;
1270
1271	/ Declaration name. /
1272	if(off >= lines[`0`].end \|\| !ISALPHA(off))
1273	return FALSE;
1274	off++;
1275	while(off < lines[`0`].end && ISALPHA(off))
1276	off++;
1277
1278	return md_scan_for_html_closer(ctx, _T(">"), len: `1`,
1279	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_decl_horizon);
1280	}
1281
1282	static int
1283	md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1284	{
1285	static const CHAR open_str[] = _T("<![CDATA[");
1286	static const SZ open_size = SIZEOF_ARRAY(open_str) - `1`;
1287
1288	OFF off = beg;
1289
1290	if(off + open_size >= lines[`0`].end)
1291	return FALSE;
1292	if(memcmp(STR(off), s2: open_str, n: open_size) != `0`)
1293	return FALSE;
1294	off += open_size;
1295
1296	return md_scan_for_html_closer(ctx, _T("]]>"), len: `3`,
1297	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_cdata_horizon);
1298	}
1299
1300	static int
1301	md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1302	{
1303	MD_ASSERT(CH(beg) == _T(`'<'`));
1304	return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1305	md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1306	md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1307	md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1308	md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1309	}
1310
1311
1312	/****************************
1313	* Recognizing Entity *
1314	****************************/
1315
1316	static int
1317	md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1318	{
1319	OFF off = beg;
1320	MD_UNUSED(ctx);
1321
1322	while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= `8`)
1323	off++;
1324
1325	if(`1` <= off - beg && off - beg <= `6`) {
1326	*p_end = off;
1327	return TRUE;
1328	} else {
1329	return FALSE;
1330	}
1331	}
1332
1333	static int
1334	md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1335	{
1336	OFF off = beg;
1337	MD_UNUSED(ctx);
1338
1339	while(off < max_end && ISDIGIT_(text[off]) && off - beg <= `8`)
1340	off++;
1341
1342	if(`1` <= off - beg && off - beg <= `7`) {
1343	*p_end = off;
1344	return TRUE;
1345	} else {
1346	return FALSE;
1347	}
1348	}
1349
1350	static int
1351	md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1352	{
1353	OFF off = beg;
1354	MD_UNUSED(ctx);
1355
1356	if(off < max_end && ISALPHA_(text[off]))
1357	off++;
1358	else
1359	return FALSE;
1360
1361	while(off < max_end && ISALNUM_(text[off]) && off - beg <= `48`)
1362	off++;
1363
1364	if(`2` <= off - beg && off - beg <= `48`) {
1365	*p_end = off;
1366	return TRUE;
1367	} else {
1368	return FALSE;
1369	}
1370	}
1371
1372	static int
1373	md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1374	{
1375	int is_contents;
1376	OFF off = beg;
1377
1378	MD_ASSERT(text[off] == _T(`'&'`));
1379	off++;
1380
1381	if(off+`2` < max_end && text[off] == _T(`'#'`) && (text[off+`1`] == _T(`'x'`) \|\| text[off+`1`] == _T(`'X'`)))
1382	is_contents = md_is_hex_entity_contents(ctx, text, beg: off+`2`, max_end, p_end: &off);
1383	else if(off+`1` < max_end && text[off] == _T(`'#'`))
1384	is_contents = md_is_dec_entity_contents(ctx, text, beg: off+`1`, max_end, p_end: &off);
1385	else
1386	is_contents = md_is_named_entity_contents(ctx, text, beg: off, max_end, p_end: &off);
1387
1388	if(is_contents && off < max_end && text[off] == _T(`';'`)) {
1389	*p_end = off+`1`;
1390	return TRUE;
1391	} else {
1392	return FALSE;
1393	}
1394	}
1395
1396	static inline int
1397	md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1398	{
1399	return md_is_entity_str(ctx, text: ctx->text, beg, max_end, p_end);
1400	}
1401
1402
1403	/******************************
1404	* Attribute Management *
1405	******************************/
1406
1407	typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1408	struct MD_ATTRIBUTE_BUILD_tag {
1409	CHAR* text;
1410	MD_TEXTTYPE* substr_types;
1411	OFF* substr_offsets;
1412	int substr_count;
1413	int substr_alloc;
1414	MD_TEXTTYPE trivial_types[`1`];
1415	OFF trivial_offsets[`2`];
1416	};
1417
1418
1419	#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1420
1421	static int
1422	md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1423	MD_TEXTTYPE type, OFF off)
1424	{
1425	if(build->substr_count >= build->substr_alloc) {
1426	MD_TEXTTYPE* new_substr_types;
1427	OFF* new_substr_offsets;
1428
1429	build->substr_alloc = (build->substr_alloc > `0`
1430	? build->substr_alloc + build->substr_alloc / `2`
1431	: `8`);
1432	new_substr_types = (MD_TEXTTYPE*) realloc(ptr: build->substr_types,
1433	size: build->substr_alloc * sizeof(MD_TEXTTYPE));
1434	if(new_substr_types == NULL) {
1435	MD_LOG("realloc() failed.");
1436	return -`1`;
1437	}
1438	/ Note +1 to reserve space for final offset (== raw_size). /
1439	new_substr_offsets = (OFF*) realloc(ptr: build->substr_offsets,
1440	size: (build->substr_alloc+`1`) * sizeof(OFF));
1441	if(new_substr_offsets == NULL) {
1442	MD_LOG("realloc() failed.");
1443	free(ptr: new_substr_types);
1444	return -`1`;
1445	}
1446
1447	build->substr_types = new_substr_types;
1448	build->substr_offsets = new_substr_offsets;
1449	}
1450
1451	build->substr_types[build->substr_count] = type;
1452	build->substr_offsets[build->substr_count] = off;
1453	build->substr_count++;
1454	return `0`;
1455	}
1456
1457	static void
1458	md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1459	{
1460	MD_UNUSED(ctx);
1461
1462	if(build->substr_alloc > `0`) {
1463	free(ptr: build->text);
1464	free(ptr: build->substr_types);
1465	free(ptr: build->substr_offsets);
1466	}
1467	}
1468
1469	static int
1470	md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1471	unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1472	{
1473	OFF raw_off, off;
1474	int is_trivial;
1475	int ret = `0`;
1476
1477	memset(s: build, c: `0`, n: sizeof(MD_ATTRIBUTE_BUILD));
1478
1479	/ If there is no backslash and no ampersand, build trivial attribute*
1480	* without any malloc(). */
1481	is_trivial = TRUE;
1482	for(raw_off = `0`; raw_off < raw_size; raw_off++) {
1483	if(ISANYOF3_(raw_text[raw_off], _T(`'\\'`), _T(`'&'`), _T(`'\0'`))) {
1484	is_trivial = FALSE;
1485	break;
1486	}
1487	}
1488
1489	if(is_trivial) {
1490	build->text = (CHAR*) (raw_size ? raw_text : NULL);
1491	build->substr_types = build->trivial_types;
1492	build->substr_offsets = build->trivial_offsets;
1493	build->substr_count = `1`;
1494	build->substr_alloc = `0`;
1495	build->trivial_types[`0`] = MD_TEXT_NORMAL;
1496	build->trivial_offsets[`0`] = `0`;
1497	build->trivial_offsets[`1`] = raw_size;
1498	off = raw_size;
1499	} else {
1500	build->text = (CHAR) malloc(size: raw_size sizeof(CHAR));
1501	if(build->text == NULL) {
1502	MD_LOG("malloc() failed.");
1503	goto abort;
1504	}
1505
1506	raw_off = `0`;
1507	off = `0`;
1508
1509	while(raw_off < raw_size) {
1510	if(raw_text[raw_off] == _T(`'\0'`)) {
1511	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1512	memcpy(dest: build->text + off, src: raw_text + raw_off, n: `1`);
1513	off++;
1514	raw_off++;
1515	continue;
1516	}
1517
1518	if(raw_text[raw_off] == _T(`'&'`)) {
1519	OFF ent_end;
1520
1521	if(md_is_entity_str(ctx, text: raw_text, beg: raw_off, max_end: raw_size, p_end: &ent_end)) {
1522	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1523	memcpy(dest: build->text + off, src: raw_text + raw_off, n: ent_end - raw_off);
1524	off += ent_end - raw_off;
1525	raw_off = ent_end;
1526	continue;
1527	}
1528	}
1529
1530	if(build->substr_count == `0` \|\| build->substr_types[build->substr_count-`1`] != MD_TEXT_NORMAL)
1531	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1532
1533	if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1534	raw_text[raw_off] == _T(`'\\'`) && raw_off+`1` < raw_size &&
1535	(ISPUNCT_(raw_text[raw_off+`1`]) \|\| ISNEWLINE_(raw_text[raw_off+`1`])))
1536	raw_off++;
1537
1538	build->text[off++] = raw_text[raw_off++];
1539	}
1540	build->substr_offsets[build->substr_count] = off;
1541	}
1542
1543	attr->text = build->text;
1544	attr->size = off;
1545	attr->substr_offsets = build->substr_offsets;
1546	attr->substr_types = build->substr_types;
1547	return `0`;
1548
1549	abort:
1550	md_free_attribute(ctx, build);
1551	return -`1`;
1552	}
1553
1554
1555	/*********************************************
1556	* Dictionary of Reference Definitions *
1557	*********************************************/
1558
1559	#define MD_FNV1A_BASE 2166136261U
1560	#define MD_FNV1A_PRIME 16777619U
1561
1562	static inline unsigned
1563	md_fnv1a(unsigned base, const void* data, size_t n)
1564	{
1565	const unsigned char* buf = (const unsigned char*) data;
1566	unsigned hash = base;
1567	size_t i;
1568
1569	for(i = `0`; i < n; i++) {
1570	hash ^= buf[i];
1571	hash *= MD_FNV1A_PRIME;
1572	}
1573
1574	return hash;
1575	}
1576
1577
1578	struct MD_REF_DEF_tag {
1579	CHAR* label;
1580	CHAR* title;
1581	unsigned hash;
1582	SZ label_size;
1583	SZ title_size;
1584	OFF dest_beg;
1585	OFF dest_end;
1586	unsigned char label_needs_free : `1`;
1587	unsigned char title_needs_free : `1`;
1588	};
1589
1590	/ Label equivalence is quite complicated with regards to whitespace and case*
1591	* folding. This complicates computing a hash of it as well as direct comparison
1592	* of two labels. */
1593
1594	static unsigned
1595	md_link_label_hash(const CHAR* label, SZ size)
1596	{
1597	unsigned hash = MD_FNV1A_BASE;
1598	OFF off;
1599	unsigned codepoint;
1600	int is_whitespace = FALSE;
1601
1602	off = md_skip_unicode_whitespace(label, off: `0`, size);
1603	while(off < size) {
1604	SZ char_size;
1605
1606	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
1607	is_whitespace = ISUNICODEWHITESPACE_(codepoint) \|\| ISNEWLINE_(label[off]);
1608
1609	if(is_whitespace) {
1610	codepoint = `' '`;
1611	hash = md_fnv1a(base: hash, data: &codepoint, n: sizeof(unsigned));
1612	off = md_skip_unicode_whitespace(label, off, size);
1613	} else {
1614	MD_UNICODE_FOLD_INFO fold_info;
1615
1616	md_get_unicode_fold_info(codepoint, info: &fold_info);
1617	hash = md_fnv1a(base: hash, data: fold_info.codepoints, n: fold_info.n_codepoints * sizeof(unsigned));
1618	off += char_size;
1619	}
1620	}
1621
1622	return hash;
1623	}
1624
1625	static OFF
1626	md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1627	MD_UNICODE_FOLD_INFO* fold_info)
1628	{
1629	unsigned codepoint;
1630	SZ char_size;
1631
1632	if(off >= size) {
1633	/ Treat end of a link label as a whitespace. /
1634	goto whitespace;
1635	}
1636
1637	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
1638	off += char_size;
1639	if(ISUNICODEWHITESPACE_(codepoint)) {
1640	/ Treat all whitespace as equivalent /
1641	goto whitespace;
1642	}
1643
1644	/ Get real folding info. /
1645	md_get_unicode_fold_info(codepoint, info: fold_info);
1646	return off;
1647
1648	whitespace:
1649	fold_info->codepoints[`0`] = _T(`' '`);
1650	fold_info->n_codepoints = `1`;
1651	return md_skip_unicode_whitespace(label, off, size);
1652	}
1653
1654	static int
1655	md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1656	{
1657	OFF a_off;
1658	OFF b_off;
1659	MD_UNICODE_FOLD_INFO a_fi = { { `0` }, `0` };
1660	MD_UNICODE_FOLD_INFO b_fi = { { `0` }, `0` };
1661	OFF a_fi_off = `0`;
1662	OFF b_fi_off = `0`;
1663	int cmp;
1664
1665	a_off = md_skip_unicode_whitespace(label: a_label, off: `0`, size: a_size);
1666	b_off = md_skip_unicode_whitespace(label: b_label, off: `0`, size: b_size);
1667	while(a_off < a_size \|\| a_fi_off < a_fi.n_codepoints \|\|
1668	b_off < b_size \|\| b_fi_off < b_fi.n_codepoints)
1669	{
1670	/ If needed, load fold info for next char. /
1671	if(a_fi_off >= a_fi.n_codepoints) {
1672	a_fi_off = `0`;
1673	a_off = md_link_label_cmp_load_fold_info(label: a_label, off: a_off, size: a_size, fold_info: &a_fi);
1674	}
1675	if(b_fi_off >= b_fi.n_codepoints) {
1676	b_fi_off = `0`;
1677	b_off = md_link_label_cmp_load_fold_info(label: b_label, off: b_off, size: b_size, fold_info: &b_fi);
1678	}
1679
1680	cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1681	if(cmp != `0`)
1682	return cmp;
1683
1684	a_fi_off++;
1685	b_fi_off++;
1686	}
1687
1688	return `0`;
1689	}
1690
1691	typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1692	struct MD_REF_DEF_LIST_tag {
1693	int n_ref_defs;
1694	int alloc_ref_defs;
1695	MD_REF_DEF* ref_defs[]; / Valid items always point into ctx->ref_defs[] /
1696	};
1697
1698	static int
1699	md_ref_def_cmp(const void* a, const void* b)
1700	{
1701	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1702	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1703
1704	if(a_ref->hash < b_ref->hash)
1705	return -`1`;
1706	else if(a_ref->hash > b_ref->hash)
1707	return +`1`;
1708	else
1709	return md_link_label_cmp(a_label: a_ref->label, a_size: a_ref->label_size, b_label: b_ref->label, b_size: b_ref->label_size);
1710	}
1711
1712	static int
1713	md_ref_def_cmp_for_sort(const void* a, const void* b)
1714	{
1715	int cmp;
1716
1717	cmp = md_ref_def_cmp(a, b);
1718
1719	/ Ensure stability of the sorting. /
1720	if(cmp == `0`) {
1721	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1722	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1723
1724	if(a_ref < b_ref)
1725	cmp = -`1`;
1726	else if(a_ref > b_ref)
1727	cmp = +`1`;
1728	else
1729	cmp = `0`;
1730	}
1731
1732	return cmp;
1733	}
1734
1735	static int
1736	md_build_ref_def_hashtable(MD_CTX* ctx)
1737	{
1738	int i, j;
1739
1740	if(ctx->n_ref_defs == `0`)
1741	return `0`;
1742
1743	ctx->ref_def_hashtable_size = (ctx->n_ref_defs * `5`) / `4`;
1744	ctx->ref_def_hashtable = malloc(size: ctx->ref_def_hashtable_size * sizeof(void*));
1745	if(ctx->ref_def_hashtable == NULL) {
1746	MD_LOG("malloc() failed.");
1747	goto abort;
1748	}
1749	memset(s: ctx->ref_def_hashtable, c: `0`, n: ctx->ref_def_hashtable_size * sizeof(void*));
1750
1751	/ Each member of ctx->ref_def_hashtable[] can be:*
1752	* -- NULL,
1753	* -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1754	* -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1755	* such MD_REF_DEFs.
1756	*/
1757	for(i = `0`; i < ctx->n_ref_defs; i++) {
1758	MD_REF_DEF* def = &ctx->ref_defs[i];
1759	void* bucket;
1760	MD_REF_DEF_LIST* list;
1761
1762	def->hash = md_link_label_hash(label: def->label, size: def->label_size);
1763	bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1764
1765	if(bucket == NULL) {
1766	/ The bucket is empty. Make it just point to the def. /
1767	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1768	continue;
1769	}
1770
1771	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1772	/ The bucket already contains one ref. def. Lets see whether it*
1773	* is the same label (ref. def. duplicate) or different one
1774	* (hash conflict). */
1775	MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1776
1777	if(md_link_label_cmp(a_label: def->label, a_size: def->label_size, b_label: old_def->label, b_size: old_def->label_size) == `0`) {
1778	/ Duplicate label: Ignore this ref. def. /
1779	continue;
1780	}
1781
1782	/ Make the bucket complex, i.e. able to hold more ref. defs. /
1783	list = (MD_REF_DEF_LIST) malloc(size: sizeof(MD_REF_DEF_LIST) + `2` sizeof(MD_REF_DEF*));
1784	if(list == NULL) {
1785	MD_LOG("malloc() failed.");
1786	goto abort;
1787	}
1788	list->ref_defs[`0`] = old_def;
1789	list->ref_defs[`1`] = def;
1790	list->n_ref_defs = `2`;
1791	list->alloc_ref_defs = `2`;
1792	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1793	continue;
1794	}
1795
1796	/ Append the def to the complex bucket list.*
1797	*
1798	* Note in this case we ignore potential duplicates to avoid expensive
1799	* iterating over the complex bucket. Below, we revisit all the complex
1800	* buckets and handle it more cheaply after the complex bucket contents
1801	* is sorted. */
1802	list = (MD_REF_DEF_LIST*) bucket;
1803	if(list->n_ref_defs >= list->alloc_ref_defs) {
1804	int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / `2`;
1805	MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(ptr: list,
1806	size: sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1807	if(list_tmp == NULL) {
1808	MD_LOG("realloc() failed.");
1809	goto abort;
1810	}
1811	list = list_tmp;
1812	list->alloc_ref_defs = alloc_ref_defs;
1813	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1814	}
1815
1816	list->ref_defs[list->n_ref_defs] = def;
1817	list->n_ref_defs++;
1818	}
1819
1820	/ Sort the complex buckets so we can use bsearch() with them. /
1821	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1822	void* bucket = ctx->ref_def_hashtable[i];
1823	MD_REF_DEF_LIST* list;
1824
1825	if(bucket == NULL)
1826	continue;
1827	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1828	continue;
1829
1830	list = (MD_REF_DEF_LIST*) bucket;
1831	qsort(base: list->ref_defs, nmemb: list->n_ref_defs, size: sizeof(MD_REF_DEF*), compar: md_ref_def_cmp_for_sort);
1832
1833	/ Disable all duplicates in the complex bucket by forcing all such*
1834	* records to point to the 1st such ref. def. I.e. no matter which
1835	* record is found during the lookup, it will always point to the right
1836	* ref. def. in ctx->ref_defs[]. */
1837	for(j = `1`; j < list->n_ref_defs; j++) {
1838	if(md_ref_def_cmp(a: &list->ref_defs[j-`1`], b: &list->ref_defs[j]) == `0`)
1839	list->ref_defs[j] = list->ref_defs[j-`1`];
1840	}
1841	}
1842
1843	return `0`;
1844
1845	abort:
1846	return -`1`;
1847	}
1848
1849	static void
1850	md_free_ref_def_hashtable(MD_CTX* ctx)
1851	{
1852	if(ctx->ref_def_hashtable != NULL) {
1853	int i;
1854
1855	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1856	void* bucket = ctx->ref_def_hashtable[i];
1857	if(bucket == NULL)
1858	continue;
1859	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1860	continue;
1861	free(ptr: bucket);
1862	}
1863
1864	free(ptr: ctx->ref_def_hashtable);
1865	}
1866	}
1867
1868	static const MD_REF_DEF*
1869	md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1870	{
1871	unsigned hash;
1872	void* bucket;
1873
1874	if(ctx->ref_def_hashtable_size == `0`)
1875	return NULL;
1876
1877	hash = md_link_label_hash(label, size: label_size);
1878	bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1879
1880	if(bucket == NULL) {
1881	return NULL;
1882	} else if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1883	const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1884
1885	if(md_link_label_cmp(a_label: def->label, a_size: def->label_size, b_label: label, b_size: label_size) == `0`)
1886	return def;
1887	else
1888	return NULL;
1889	} else {
1890	MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1891	MD_REF_DEF key_buf;
1892	const MD_REF_DEF* key = &key_buf;
1893	const MD_REF_DEF** ret;
1894
1895	key_buf.label = (CHAR*) label;
1896	key_buf.label_size = label_size;
1897	key_buf.hash = md_link_label_hash(label: key_buf.label, size: key_buf.label_size);
1898
1899	ret = (const MD_REF_DEF**) bsearch(key: &key, base: list->ref_defs,
1900	nmemb: list->n_ref_defs, size: sizeof(MD_REF_DEF*), compar: md_ref_def_cmp);
1901	if(ret != NULL)
1902	return *ret;
1903	else
1904	return NULL;
1905	}
1906	}
1907
1908
1909	/***************************
1910	* Recognizing Links *
1911	***************************/
1912
1913	/ Note this code is partially shared between processing inlines and blocks*
1914	* as reference definitions and links share some helper parser functions.
1915	*/
1916
1917	typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1918	struct MD_LINK_ATTR_tag {
1919	OFF dest_beg;
1920	OFF dest_end;
1921
1922	CHAR* title;
1923	SZ title_size;
1924	int title_needs_free;
1925	};
1926
1927
1928	static int
1929	md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
1930	OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
1931	OFF* p_contents_beg, OFF* p_contents_end)
1932	{
1933	OFF off = beg;
1934	OFF contents_beg = `0`;
1935	OFF contents_end = `0`;
1936	MD_SIZE line_index = `0`;
1937	int len = `0`;
1938
1939	if(CH(off) != _T(`'['`))
1940	return FALSE;
1941	off++;
1942
1943	while(`1`) {
1944	OFF line_end = lines[line_index].end;
1945
1946	while(off < line_end) {
1947	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
1948	if(contents_end == `0`) {
1949	contents_beg = off;
1950	*p_beg_line_index = line_index;
1951	}
1952	contents_end = off + `2`;
1953	off += `2`;
1954	} else if(CH(off) == _T(`'['`)) {
1955	return FALSE;
1956	} else if(CH(off) == _T(`']'`)) {
1957	if(contents_beg < contents_end) {
1958	/ Success. /
1959	*p_contents_beg = contents_beg;
1960	*p_contents_end = contents_end;
1961	*p_end = off+`1`;
1962	*p_end_line_index = line_index;
1963	return TRUE;
1964	} else {
1965	/ Link label must have some non-whitespace contents. /
1966	return FALSE;
1967	}
1968	} else {
1969	unsigned codepoint;
1970	SZ char_size;
1971
1972	codepoint = md_decode_unicode(str: ctx->text, off, str_size: ctx->size, p_char_size: &char_size);
1973	if(!ISUNICODEWHITESPACE_(codepoint)) {
1974	if(contents_end == `0`) {
1975	contents_beg = off;
1976	*p_beg_line_index = line_index;
1977	}
1978	contents_end = off + char_size;
1979	}
1980
1981	off += char_size;
1982	}
1983
1984	len++;
1985	if(len > `999`)
1986	return FALSE;
1987	}
1988
1989	line_index++;
1990	len++;
1991	if(line_index < n_lines)
1992	off = lines[line_index].beg;
1993	else
1994	break;
1995	}
1996
1997	return FALSE;
1998	}
1999
2000	static int
2001	md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2002	OFF* p_contents_beg, OFF* p_contents_end)
2003	{
2004	OFF off = beg;
2005
2006	if(off >= max_end \|\| CH(off) != _T(`'<'`))
2007	return FALSE;
2008	off++;
2009
2010	while(off < max_end) {
2011	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
2012	off += `2`;
2013	continue;
2014	}
2015
2016	if(ISNEWLINE(off) \|\| CH(off) == _T(`'<'`))
2017	return FALSE;
2018
2019	if(CH(off) == _T(`'>'`)) {
2020	/ Success. /
2021	*p_contents_beg = beg+`1`;
2022	*p_contents_end = off;
2023	*p_end = off+`1`;
2024	return TRUE;
2025	}
2026
2027	off++;
2028	}
2029
2030	return FALSE;
2031	}
2032
2033	static int
2034	md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2035	OFF* p_contents_beg, OFF* p_contents_end)
2036	{
2037	OFF off = beg;
2038	int parenthesis_level = `0`;
2039
2040	while(off < max_end) {
2041	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
2042	off += `2`;
2043	continue;
2044	}
2045
2046	if(ISWHITESPACE(off) \|\| ISCNTRL(off))
2047	break;
2048
2049	/ Link destination may include balanced pairs of unescaped '(' ')'.*
2050	* Note we limit the maximal nesting level by 32 to protect us from
2051	* https://github.com/jgm/cmark/issues/214 */
2052	if(CH(off) == _T(`'('`)) {
2053	parenthesis_level++;
2054	if(parenthesis_level > `32`)
2055	return FALSE;
2056	} else if(CH(off) == _T(`')'`)) {
2057	if(parenthesis_level == `0`)
2058	break;
2059	parenthesis_level--;
2060	}
2061
2062	off++;
2063	}
2064
2065	if(parenthesis_level != `0` \|\| off == beg)
2066	return FALSE;
2067
2068	/ Success. /
2069	*p_contents_beg = beg;
2070	*p_contents_end = off;
2071	*p_end = off;
2072	return TRUE;
2073	}
2074
2075	static inline int
2076	md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2077	OFF* p_contents_beg, OFF* p_contents_end)
2078	{
2079	if(CH(beg) == _T(`'<'`))
2080	return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2081	else
2082	return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2083	}
2084
2085	static int
2086	md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
2087	OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
2088	OFF* p_contents_beg, OFF* p_contents_end)
2089	{
2090	OFF off = beg;
2091	CHAR closer_char;
2092	MD_SIZE line_index = `0`;
2093
2094	/ White space with up to one line break. /
2095	while(off < lines[line_index].end && ISWHITESPACE(off))
2096	off++;
2097	if(off >= lines[line_index].end) {
2098	line_index++;
2099	if(line_index >= n_lines)
2100	return FALSE;
2101	off = lines[line_index].beg;
2102	}
2103	if(off == beg)
2104	return FALSE;
2105
2106	*p_beg_line_index = line_index;
2107
2108	/ First char determines how to detect end of it. /
2109	switch(CH(off)) {
2110	case _T(`'"'`): closer_char = _T(`'"'`); break;
2111	case _T(`'\''`): closer_char = _T(`'\''`); break;
2112	case _T(`'('`): closer_char = _T(`')'`); break;
2113	default: return FALSE;
2114	}
2115	off++;
2116
2117	*p_contents_beg = off;
2118
2119	while(line_index < n_lines) {
2120	OFF line_end = lines[line_index].end;
2121
2122	while(off < line_end) {
2123	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
2124	off++;
2125	} else if(CH(off) == closer_char) {
2126	/ Success. /
2127	*p_contents_end = off;
2128	*p_end = off+`1`;
2129	*p_end_line_index = line_index;
2130	return TRUE;
2131	} else if(closer_char == _T(`')'`) && CH(off) == _T(`'('`)) {
2132	/ ()-style title cannot contain (unescaped '(')) /
2133	return FALSE;
2134	}
2135
2136	off++;
2137	}
2138
2139	line_index++;
2140	}
2141
2142	return FALSE;
2143	}
2144
2145	/ Returns 0 if it is not a reference definition.*
2146	*
2147	* Returns N > 0 if it is a reference definition. N then corresponds to the
2148	* number of lines forming it). In this case the definition is stored for
2149	* resolving any links referring to it.
2150	*
2151	* Returns -1 in case of an error (out of memory).
2152	*/
2153	static int
2154	md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
2155	{
2156	OFF label_contents_beg;
2157	OFF label_contents_end;
2158	MD_SIZE label_contents_line_index;
2159	int label_is_multiline = FALSE;
2160	OFF dest_contents_beg;
2161	OFF dest_contents_end;
2162	OFF title_contents_beg;
2163	OFF title_contents_end;
2164	MD_SIZE title_contents_line_index;
2165	int title_is_multiline = FALSE;
2166	OFF off;
2167	MD_SIZE line_index = `0`;
2168	MD_SIZE tmp_line_index;
2169	MD_REF_DEF* def = NULL;
2170	int ret = `0`;
2171
2172	/ Link label. /
2173	if(!md_is_link_label(ctx, lines, n_lines, beg: lines[`0`].beg,
2174	p_end: &off, p_beg_line_index: &label_contents_line_index, p_end_line_index: &line_index,
2175	p_contents_beg: &label_contents_beg, p_contents_end: &label_contents_end))
2176	return FALSE;
2177	label_is_multiline = (label_contents_line_index != line_index);
2178
2179	/ Colon. /
2180	if(off >= lines[line_index].end \|\| CH(off) != _T(`':'`))
2181	return FALSE;
2182	off++;
2183
2184	/ Optional white space with up to one line break. /
2185	while(off < lines[line_index].end && ISWHITESPACE(off))
2186	off++;
2187	if(off >= lines[line_index].end) {
2188	line_index++;
2189	if(line_index >= n_lines)
2190	return FALSE;
2191	off = lines[line_index].beg;
2192	}
2193
2194	/ Link destination. /
2195	if(!md_is_link_destination(ctx, beg: off, max_end: lines[line_index].end,
2196	p_end: &off, p_contents_beg: &dest_contents_beg, p_contents_end: &dest_contents_end))
2197	return FALSE;
2198
2199	/ (Optional) title. Note we interpret it as an title only if nothing*
2200	* more follows on its last line. */
2201	if(md_is_link_title(ctx, lines: lines + line_index, n_lines: n_lines - line_index, beg: off,
2202	p_end: &off, p_beg_line_index: &title_contents_line_index, p_end_line_index: &tmp_line_index,
2203	p_contents_beg: &title_contents_beg, p_contents_end: &title_contents_end)
2204	&& off >= lines[line_index + tmp_line_index].end)
2205	{
2206	title_is_multiline = (tmp_line_index != title_contents_line_index);
2207	title_contents_line_index += line_index;
2208	line_index += tmp_line_index;
2209	} else {
2210	/ Not a title. /
2211	title_is_multiline = FALSE;
2212	title_contents_beg = off;
2213	title_contents_end = off;
2214	title_contents_line_index = `0`;
2215	}
2216
2217	/ Nothing more can follow on the last line. /
2218	if(off < lines[line_index].end)
2219	return FALSE;
2220
2221	/ So, it _is_ a reference definition. Remember it. /
2222	if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2223	MD_REF_DEF* new_defs;
2224
2225	ctx->alloc_ref_defs = (ctx->alloc_ref_defs > `0`
2226	? ctx->alloc_ref_defs + ctx->alloc_ref_defs / `2`
2227	: `16`);
2228	new_defs = (MD_REF_DEF) realloc(ptr: ctx->ref_defs, size: ctx->alloc_ref_defs sizeof(MD_REF_DEF));
2229	if(new_defs == NULL) {
2230	MD_LOG("realloc() failed.");
2231	goto abort;
2232	}
2233
2234	ctx->ref_defs = new_defs;
2235	}
2236	def = &ctx->ref_defs[ctx->n_ref_defs];
2237	memset(s: def, c: `0`, n: sizeof(MD_REF_DEF));
2238
2239	if(label_is_multiline) {
2240	MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2241	lines + label_contents_line_index, n_lines - label_contents_line_index,
2242	_T(`' '`), &def->label, &def->label_size));
2243	def->label_needs_free = TRUE;
2244	} else {
2245	def->label = (CHAR*) STR(label_contents_beg);
2246	def->label_size = label_contents_end - label_contents_beg;
2247	}
2248
2249	if(title_is_multiline) {
2250	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2251	lines + title_contents_line_index, n_lines - title_contents_line_index,
2252	_T(`'\n'`), &def->title, &def->title_size));
2253	def->title_needs_free = TRUE;
2254	} else {
2255	def->title = (CHAR*) STR(title_contents_beg);
2256	def->title_size = title_contents_end - title_contents_beg;
2257	}
2258
2259	def->dest_beg = dest_contents_beg;
2260	def->dest_end = dest_contents_end;
2261
2262	/ Success. /
2263	ctx->n_ref_defs++;
2264	return line_index + `1`;
2265
2266	abort:
2267	/ Failure. /
2268	if(def != NULL && def->label_needs_free)
2269	free(ptr: def->label);
2270	if(def != NULL && def->title_needs_free)
2271	free(ptr: def->title);
2272	return ret;
2273	}
2274
2275	static int
2276	md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2277	OFF beg, OFF end, MD_LINK_ATTR* attr)
2278	{
2279	const MD_REF_DEF* def;
2280	const MD_LINE* beg_line;
2281	int is_multiline;
2282	CHAR* label;
2283	SZ label_size;
2284	int ret;
2285
2286	MD_ASSERT(CH(beg) == _T(`'['`) \|\| CH(beg) == _T(`'!'`));
2287	MD_ASSERT(CH(end-`1`) == _T(`']'`));
2288
2289	beg += (CH(beg) == _T(`'!'`) ? `2` : `1`);
2290	end--;
2291
2292	/ Find lines corresponding to the beg and end positions. /
2293	beg_line = md_lookup_line(off: beg, lines, n_lines, NULL);
2294	is_multiline = (end > beg_line->end);
2295
2296	if(is_multiline) {
2297	MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2298	(int)(n_lines - (beg_line - lines)), _T(`' '`), &label, &label_size));
2299	} else {
2300	label = (CHAR*) STR(beg);
2301	label_size = end - beg;
2302	}
2303
2304	def = md_lookup_ref_def(ctx, label, label_size);
2305	if(def != NULL) {
2306	attr->dest_beg = def->dest_beg;
2307	attr->dest_end = def->dest_end;
2308	attr->title = def->title;
2309	attr->title_size = def->title_size;
2310	attr->title_needs_free = FALSE;
2311	}
2312
2313	if(is_multiline)
2314	free(ptr: label);
2315
2316	ret = (def != NULL);
2317
2318	abort:
2319	return ret;
2320	}
2321
2322	static int
2323	md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2324	OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2325	{
2326	MD_SIZE line_index = `0`;
2327	MD_SIZE tmp_line_index;
2328	OFF title_contents_beg;
2329	OFF title_contents_end;
2330	MD_SIZE title_contents_line_index;
2331	int title_is_multiline;
2332	OFF off = beg;
2333	int ret = FALSE;
2334
2335	while(off >= lines[line_index].end)
2336	line_index++;
2337
2338	MD_ASSERT(CH(off) == _T(`'('`));
2339	off++;
2340
2341	/ Optional white space with up to one line break. /
2342	while(off < lines[line_index].end && ISWHITESPACE(off))
2343	off++;
2344	if(off >= lines[line_index].end && (off >= ctx->size \|\| ISNEWLINE(off))) {
2345	line_index++;
2346	if(line_index >= n_lines)
2347	return FALSE;
2348	off = lines[line_index].beg;
2349	}
2350
2351	/ Link destination may be omitted, but only when not also having a title. /
2352	if(off < ctx->size && CH(off) == _T(`')'`)) {
2353	attr->dest_beg = off;
2354	attr->dest_end = off;
2355	attr->title = NULL;
2356	attr->title_size = `0`;
2357	attr->title_needs_free = FALSE;
2358	off++;
2359	*p_end = off;
2360	return TRUE;
2361	}
2362
2363	/ Link destination. /
2364	if(!md_is_link_destination(ctx, beg: off, max_end: lines[line_index].end,
2365	p_end: &off, p_contents_beg: &attr->dest_beg, p_contents_end: &attr->dest_end))
2366	return FALSE;
2367
2368	/ (Optional) title. /
2369	if(md_is_link_title(ctx, lines: lines + line_index, n_lines: n_lines - line_index, beg: off,
2370	p_end: &off, p_beg_line_index: &title_contents_line_index, p_end_line_index: &tmp_line_index,
2371	p_contents_beg: &title_contents_beg, p_contents_end: &title_contents_end))
2372	{
2373	title_is_multiline = (tmp_line_index != title_contents_line_index);
2374	title_contents_line_index += line_index;
2375	line_index += tmp_line_index;
2376	} else {
2377	/ Not a title. /
2378	title_is_multiline = FALSE;
2379	title_contents_beg = off;
2380	title_contents_end = off;
2381	title_contents_line_index = `0`;
2382	}
2383
2384	/ Optional whitespace followed with final ')'. /
2385	while(off < lines[line_index].end && ISWHITESPACE(off))
2386	off++;
2387	if(off >= lines[line_index].end) {
2388	line_index++;
2389	if(line_index >= n_lines)
2390	return FALSE;
2391	off = lines[line_index].beg;
2392	}
2393	if(CH(off) != _T(`')'`))
2394	goto abort;
2395	off++;
2396
2397	if(title_contents_beg >= title_contents_end) {
2398	attr->title = NULL;
2399	attr->title_size = `0`;
2400	attr->title_needs_free = FALSE;
2401	} else if(!title_is_multiline) {
2402	attr->title = (CHAR*) STR(title_contents_beg);
2403	attr->title_size = title_contents_end - title_contents_beg;
2404	attr->title_needs_free = FALSE;
2405	} else {
2406	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2407	lines + title_contents_line_index, n_lines - title_contents_line_index,
2408	_T(`'\n'`), &attr->title, &attr->title_size));
2409	attr->title_needs_free = TRUE;
2410	}
2411
2412	*p_end = off;
2413	ret = TRUE;
2414
2415	abort:
2416	return ret;
2417	}
2418
2419	static void
2420	md_free_ref_defs(MD_CTX* ctx)
2421	{
2422	int i;
2423
2424	for(i = `0`; i < ctx->n_ref_defs; i++) {
2425	MD_REF_DEF* def = &ctx->ref_defs[i];
2426
2427	if(def->label_needs_free)
2428	free(ptr: def->label);
2429	if(def->title_needs_free)
2430	free(ptr: def->title);
2431	}
2432
2433	free(ptr: ctx->ref_defs);
2434	}
2435
2436
2437	/******************************************
2438	* Processing Inlines (a.k.a Spans) *
2439	******************************************/
2440
2441	/ We process inlines in few phases:*
2442	*
2443	* (1) We go through the block text and collect all significant characters
2444	* which may start/end a span or some other significant position into
2445	* ctx->marks[]. Core of this is what md_collect_marks() does.
2446	*
2447	* We also do some very brief preliminary context-less analysis, whether
2448	* it might be opener or closer (e.g. of an emphasis span).
2449	*
2450	* This speeds the other steps as we do not need to re-iterate over all
2451	* characters anymore.
2452	*
2453	* (2) We analyze each potential mark types, in order by their precedence.
2454	*
2455	* In each md_analyze_XXX() function, we re-iterate list of the marks,
2456	* skipping already resolved regions (in preceding precedences) and try to
2457	* resolve them.
2458	*
2459	* (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2460	* them as resolved.
2461	*
2462	* (2.2) For range-type marks, we analyze whether the mark could be closer
2463	* and, if yes, whether there is some preceding opener it could satisfy.
2464	*
2465	* If not we check whether it could be really an opener and if yes, we
2466	* remember it so subsequent closers may resolve it.
2467	*
2468	* (3) Finally, when all marks were analyzed, we render the block contents
2469	* by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2470	* or ::close_span() whenever we reach a resolved mark.
2471	*/
2472
2473
2474	/ The mark structure.*
2475	*
2476	* '\\': Maybe escape sequence.
2477	* '\0': NULL char.
2478	* '*': Maybe (strong) emphasis start/end.
2479	* '_': Maybe (strong) emphasis start/end.
2480	* '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2481	* '`': Maybe code span start/end.
2482	* '&': Maybe start of entity.
2483	* ';': Maybe end of entity.
2484	* '<': Maybe start of raw HTML or autolink.
2485	* '>': Maybe end of raw HTML or autolink.
2486	* '[': Maybe start of link label or link text.
2487	* '!': Equivalent of '[' for image.
2488	* ']': Maybe end of link label or link text.
2489	* '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2490	* ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2491	* '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2492	* 'D': Dummy mark, it reserves a space for splitting a previous mark
2493	* (e.g. emphasis) or to make more space for storing some special data
2494	* related to the preceding mark (e.g. link).
2495	*
2496	* Note that not all instances of these chars in the text imply creation of the
2497	* structure. Only those which have (or may have, after we see more context)
2498	* the special meaning.
2499	*
2500	* (Keep this struct as small as possible to fit as much of them into CPU
2501	* cache line.)
2502	*/
2503	struct MD_MARK_tag {
2504	OFF beg;
2505	OFF end;
2506
2507	/ For unresolved openers, 'next' may be used to form a stack of*
2508	* unresolved open openers.
2509	*
2510	* When resolved with MD_MARK_OPENER/CLOSER flag, next/prev is index of the
2511	* respective closer/opener.
2512	*/
2513	int prev;
2514	int next;
2515	CHAR ch;
2516	unsigned char flags;
2517	};
2518
2519	/ Mark flags (these apply to ALL mark types). /
2520	#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
2521	#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
2522	#define MD_MARK_OPENER 0x04 /* Definitely opener. */
2523	#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
2524	#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
2525
2526	/ Mark flags specific for various mark types (so they can share bits). /
2527	#define MD_MARK_EMPH_OC 0x20 /* Opener/closer mixed candidate. Helper for the "rule of 3". */
2528	#define MD_MARK_EMPH_MOD3_0 0x40
2529	#define MD_MARK_EMPH_MOD3_1 0x80
2530	#define MD_MARK_EMPH_MOD3_2 (0x40 \| 0x80)
2531	#define MD_MARK_EMPH_MOD3_MASK (0x40 \| 0x80)
2532	#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
2533	#define MD_MARK_AUTOLINK_MISSING_MAILTO 0x40
2534	#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
2535	#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */
2536
2537	static MD_MARKSTACK*
2538	md_emph_stack(MD_CTX* ctx, MD_CHAR ch, unsigned flags)
2539	{
2540	MD_MARKSTACK* stack;
2541
2542	switch(ch) {
2543	case `''`: stack = &ASTERISK_OPENERS_oo_mod3_0; break*;
2544	case `'_'`: stack = &UNDERSCORE_OPENERS_oo_mod3_0; break;
2545	default: MD_UNREACHABLE();
2546	}
2547
2548	if(flags & MD_MARK_EMPH_OC)
2549	stack += `3`;
2550
2551	switch(flags & MD_MARK_EMPH_MOD3_MASK) {
2552	case MD_MARK_EMPH_MOD3_0: stack += `0`; break;
2553	case MD_MARK_EMPH_MOD3_1: stack += `1`; break;
2554	case MD_MARK_EMPH_MOD3_2: stack += `2`; break;
2555	default: MD_UNREACHABLE();
2556	}
2557
2558	return stack;
2559	}
2560
2561	static MD_MARKSTACK*
2562	md_opener_stack(MD_CTX* ctx, int mark_index)
2563	{
2564	MD_MARK* mark = &ctx->marks[mark_index];
2565
2566	switch(mark->ch) {
2567	case _T(`'*'`):
2568	case _T(`'_'`): return md_emph_stack(ctx, ch: mark->ch, flags: mark->flags);
2569
2570	case _T(`'~'`): return (mark->end - mark->beg == `1`) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2571
2572	case _T(`'!'`):
2573	case _T(`'['`): return &BRACKET_OPENERS;
2574
2575	default: MD_UNREACHABLE();
2576	}
2577	}
2578
2579	static MD_MARK*
2580	md_add_mark(MD_CTX* ctx)
2581	{
2582	if(ctx->n_marks >= ctx->alloc_marks) {
2583	MD_MARK* new_marks;
2584
2585	ctx->alloc_marks = (ctx->alloc_marks > `0`
2586	? ctx->alloc_marks + ctx->alloc_marks / `2`
2587	: `64`);
2588	new_marks = realloc(ptr: ctx->marks, size: ctx->alloc_marks * sizeof(MD_MARK));
2589	if(new_marks == NULL) {
2590	MD_LOG("realloc() failed.");
2591	return NULL;
2592	}
2593
2594	ctx->marks = new_marks;
2595	}
2596
2597	return &ctx->marks[ctx->n_marks++];
2598	}
2599
2600	#define ADD_MARK_() \
2601	do { \
2602	mark = md_add_mark(ctx); \
2603	if(mark == NULL) { \
2604	ret = -1; \
2605	goto abort; \
2606	} \
2607	} while(0)
2608
2609	#define ADD_MARK(ch_, beg_, end_, flags_) \
2610	do { \
2611	ADD_MARK_(); \
2612	mark->beg = (beg_); \
2613	mark->end = (end_); \
2614	mark->prev = -1; \
2615	mark->next = -1; \
2616	mark->ch = (char)(ch_); \
2617	mark->flags = (flags_); \
2618	} while(0)
2619
2620
2621	static inline void
2622	md_mark_stack_push(MD_CTX* ctx, MD_MARKSTACK* stack, int mark_index)
2623	{
2624	ctx->marks[mark_index].next = stack->top;
2625	stack->top = mark_index;
2626	}
2627
2628	static inline int
2629	md_mark_stack_pop(MD_CTX* ctx, MD_MARKSTACK* stack)
2630	{
2631	int top = stack->top;
2632	if(top >= `0`)
2633	stack->top = ctx->marks[top].next;
2634	return top;
2635	}
2636
2637	/ Sometimes, we need to store a pointer into the mark. It is quite rare*
2638	* so we do not bother to make MD_MARK use union, and it can only happen
2639	* for dummy marks. */
2640	static inline void
2641	md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2642	{
2643	MD_MARK* mark = &ctx->marks[mark_index];
2644	MD_ASSERT(mark->ch == `'D'`);
2645
2646	/ Check only members beg and end are misused for this. /
2647	MD_ASSERT(sizeof(void) <= `2` sizeof(OFF));
2648	memcpy(dest: mark, src: &ptr, n: sizeof(void*));
2649	}
2650
2651	static inline void*
2652	md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2653	{
2654	void* ptr;
2655	MD_MARK* mark = &ctx->marks[mark_index];
2656	MD_ASSERT(mark->ch == `'D'`);
2657	memcpy(dest: &ptr, src: mark, n: sizeof(void*));
2658	return ptr;
2659	}
2660
2661	static inline void
2662	md_resolve_range(MD_CTX* ctx, int opener_index, int closer_index)
2663	{
2664	MD_MARK* opener = &ctx->marks[opener_index];
2665	MD_MARK* closer = &ctx->marks[closer_index];
2666
2667	/ Interconnect opener and closer and mark both as resolved. /
2668	opener->next = closer_index;
2669	closer->prev = opener_index;
2670
2671	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
2672	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
2673	}
2674
2675
2676	#define MD_ROLLBACK_CROSSING 0
2677	#define MD_ROLLBACK_ALL 1
2678
2679	/ In the range ctx->marks[opener_index] ... [closer_index], undo some or all*
2680	* resolvings accordingly to these rules:
2681	*
2682	* (1) All stacks of openers are cut so that any pending potential openers
2683	* are discarded from future consideration.
2684	*
2685	* (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2686	* are thrown away and turned into dummy marks ('D').
2687	*
2688	* WARNING: Do not call for arbitrary range of opener and closer.
2689	* This must form (potentially) valid range not crossing nesting boundaries
2690	* of already resolved ranges.
2691	*/
2692	static void
2693	md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2694	{
2695	int i;
2696
2697	for(i = `0`; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++) {
2698	MD_MARKSTACK* stack = &ctx->opener_stacks[i];
2699	while(stack->top >= opener_index)
2700	md_mark_stack_pop(ctx, stack);
2701	}
2702
2703	if(how == MD_ROLLBACK_ALL) {
2704	for(i = opener_index + `1`; i < closer_index; i++) {
2705	ctx->marks[i].ch = `'D'`;
2706	ctx->marks[i].flags = `0`;
2707	}
2708	}
2709	}
2710
2711	static void
2712	md_build_mark_char_map(MD_CTX* ctx)
2713	{
2714	memset(s: ctx->mark_char_map, c: `0`, n: sizeof(ctx->mark_char_map));
2715
2716	ctx->mark_char_map[`'\\'`] = `1`;
2717	ctx->mark_char_map[`'*'`] = `1`;
2718	ctx->mark_char_map[`'_'`] = `1`;
2719	ctx->mark_char_map['`'] = `1`;
2720	ctx->mark_char_map[`'&'`] = `1`;
2721	ctx->mark_char_map[`';'`] = `1`;
2722	ctx->mark_char_map[`'<'`] = `1`;
2723	ctx->mark_char_map[`'>'`] = `1`;
2724	ctx->mark_char_map[`'['`] = `1`;
2725	ctx->mark_char_map[`'!'`] = `1`;
2726	ctx->mark_char_map[`']'`] = `1`;
2727	ctx->mark_char_map[`'\0'`] = `1`;
2728
2729	if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2730	ctx->mark_char_map[`'~'`] = `1`;
2731
2732	if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2733	ctx->mark_char_map[`'$'`] = `1`;
2734
2735	if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2736	ctx->mark_char_map[`'@'`] = `1`;
2737
2738	if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2739	ctx->mark_char_map[`':'`] = `1`;
2740
2741	if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2742	ctx->mark_char_map[`'.'`] = `1`;
2743
2744	if((ctx->parser.flags & MD_FLAG_TABLES) \|\| (ctx->parser.flags & MD_FLAG_WIKILINKS))
2745	ctx->mark_char_map[`'\|'`] = `1`;
2746
2747	if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2748	int i;
2749
2750	for(i = `0`; i < (int) sizeof(ctx->mark_char_map); i++) {
2751	if(ISWHITESPACE_(i))
2752	ctx->mark_char_map[i] = `1`;
2753	}
2754	}
2755	}
2756
2757	static int
2758	md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
2759	MD_MARK* opener, MD_MARK* closer,
2760	OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2761	int* p_reached_paragraph_end)
2762	{
2763	OFF opener_beg = beg;
2764	OFF opener_end;
2765	OFF closer_beg;
2766	OFF closer_end;
2767	SZ mark_len;
2768	OFF line_end;
2769	int has_space_after_opener = FALSE;
2770	int has_eol_after_opener = FALSE;
2771	int has_space_before_closer = FALSE;
2772	int has_eol_before_closer = FALSE;
2773	int has_only_space = TRUE;
2774	MD_SIZE line_index = `0`;
2775
2776	line_end = lines[`0`].end;
2777	opener_end = opener_beg;
2778	while(opener_end < line_end && CH(opener_end) == _T('`'))
2779	opener_end++;
2780	has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(`' '`));
2781	has_eol_after_opener = (opener_end == line_end);
2782
2783	/ The caller needs to know end of the opening mark even if we fail. /
2784	opener->end = opener_end;
2785
2786	mark_len = opener_end - opener_beg;
2787	if(mark_len > CODESPAN_MARK_MAXLEN)
2788	return FALSE;
2789
2790	/ Check whether we already know there is no closer of this length.*
2791	* If so, re-scan does no sense. This fixes issue #59. */
2792	if(last_potential_closers[mark_len-`1`] >= lines[n_lines-`1`].end \|\|
2793	(*p_reached_paragraph_end && last_potential_closers[mark_len-`1`] < opener_end))
2794	return FALSE;
2795
2796	closer_beg = opener_end;
2797	closer_end = opener_end;
2798
2799	/ Find closer mark. /
2800	while(TRUE) {
2801	while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2802	if(CH(closer_beg) != _T(`' '`))
2803	has_only_space = FALSE;
2804	closer_beg++;
2805	}
2806	closer_end = closer_beg;
2807	while(closer_end < line_end && CH(closer_end) == _T('`'))
2808	closer_end++;
2809
2810	if(closer_end - closer_beg == mark_len) {
2811	/ Success. /
2812	has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-`1`) == _T(`' '`));
2813	has_eol_before_closer = (closer_beg == lines[line_index].beg);
2814	break;
2815	}
2816
2817	if(closer_end - closer_beg > `0`) {
2818	/ We have found a back-tick which is not part of the closer. /
2819	has_only_space = FALSE;
2820
2821	/ But if we eventually fail, remember it as a potential closer*
2822	* of its own length for future attempts. This mitigates needs for
2823	* rescans. */
2824	if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2825	if(closer_beg > last_potential_closers[closer_end - closer_beg - `1`])
2826	last_potential_closers[closer_end - closer_beg - `1`] = closer_beg;
2827	}
2828	}
2829
2830	if(closer_end >= line_end) {
2831	line_index++;
2832	if(line_index >= n_lines) {
2833	/ Reached end of the paragraph and still nothing. /
2834	*p_reached_paragraph_end = TRUE;
2835	return FALSE;
2836	}
2837	/ Try on the next line. /
2838	line_end = lines[line_index].end;
2839	closer_beg = lines[line_index].beg;
2840	} else {
2841	closer_beg = closer_end;
2842	}
2843	}
2844
2845	/ If there is a space or a new line both after and before the opener*
2846	* (and if the code span is not made of spaces only), consume one initial
2847	* and one trailing space as part of the marks. */
2848	if(!has_only_space &&
2849	(has_space_after_opener \|\| has_eol_after_opener) &&
2850	(has_space_before_closer \|\| has_eol_before_closer))
2851	{
2852	if(has_space_after_opener)
2853	opener_end++;
2854	else
2855	opener_end = lines[`1`].beg;
2856
2857	if(has_space_before_closer)
2858	closer_beg--;
2859	else {
2860	/ Go back to the end of prev line /
2861	closer_beg = lines[line_index-`1`].end;
2862	/ But restore any trailing whitespace /
2863	while(closer_beg < ctx->size && ISBLANK(closer_beg))
2864	closer_beg++;
2865	}
2866	}
2867
2868	opener->ch = _T('`');
2869	opener->beg = opener_beg;
2870	opener->end = opener_end;
2871	opener->flags = MD_MARK_POTENTIAL_OPENER;
2872	closer->ch = _T('`');
2873	closer->beg = closer_beg;
2874	closer->end = closer_end;
2875	closer->flags = MD_MARK_POTENTIAL_CLOSER;
2876	return TRUE;
2877	}
2878
2879	static int
2880	md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2881	{
2882	OFF off = beg+`1`;
2883
2884	MD_ASSERT(CH(beg) == _T(`'<'`));
2885
2886	/ Check for scheme. /
2887	if(off >= max_end \|\| !ISASCII(off))
2888	return FALSE;
2889	off++;
2890	while(`1`) {
2891	if(off >= max_end)
2892	return FALSE;
2893	if(off - beg > `32`)
2894	return FALSE;
2895	if(CH(off) == _T(`':'`) && off - beg >= `3`)
2896	break;
2897	if(!ISALNUM(off) && CH(off) != _T(`'+'`) && CH(off) != _T(`'-'`) && CH(off) != _T(`'.'`))
2898	return FALSE;
2899	off++;
2900	}
2901
2902	/ Check the path after the scheme. /
2903	while(off < max_end && CH(off) != _T(`'>'`)) {
2904	if(ISWHITESPACE(off) \|\| ISCNTRL(off) \|\| CH(off) == _T(`'<'`))
2905	return FALSE;
2906	off++;
2907	}
2908
2909	if(off >= max_end)
2910	return FALSE;
2911
2912	MD_ASSERT(CH(off) == _T(`'>'`));
2913	*p_end = off+`1`;
2914	return TRUE;
2915	}
2916
2917	static int
2918	md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2919	{
2920	OFF off = beg + `1`;
2921	int label_len;
2922
2923	MD_ASSERT(CH(beg) == _T(`'<'`));
2924
2925	/ The code should correspond to this regexp:*
2926	/^[a-zA-Z0-9.!#$%&'+\/=?^_`{\|}~-]+*
2927	@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2928	(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)$/*
2929	*/
2930
2931	/ Username (before '@'). /
2932	while(off < max_end && (ISALNUM(off) \|\| ISANYOF(off, _T(".!#$%&'*+/=?^_`{\|}~-"))))
2933	off++;
2934	if(off <= beg+`1`)
2935	return FALSE;
2936
2937	/ '@' /
2938	if(off >= max_end \|\| CH(off) != _T(`'@'`))
2939	return FALSE;
2940	off++;
2941
2942	/ Labels delimited with '.'; each label is sequence of 1 - 63 alnum*
2943	* characters or '-', but '-' is not allowed as first or last char. */
2944	label_len = `0`;
2945	while(off < max_end) {
2946	if(ISALNUM(off))
2947	label_len++;
2948	else if(CH(off) == _T(`'-'`) && label_len > `0`)
2949	label_len++;
2950	else if(CH(off) == _T(`'.'`) && label_len > `0` && CH(off-`1`) != _T(`'-'`))
2951	label_len = `0`;
2952	else
2953	break;
2954
2955	if(label_len > `63`)
2956	return FALSE;
2957
2958	off++;
2959	}
2960
2961	if(label_len <= `0` \|\| off >= max_end \|\| CH(off) != _T(`'>'`) \|\| CH(off-`1`) == _T(`'-'`))
2962	return FALSE;
2963
2964	*p_end = off+`1`;
2965	return TRUE;
2966	}
2967
2968	static int
2969	md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2970	{
2971	if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2972	*p_missing_mailto = FALSE;
2973	return TRUE;
2974	}
2975
2976	if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2977	*p_missing_mailto = TRUE;
2978	return TRUE;
2979	}
2980
2981	return FALSE;
2982	}
2983
2984	static int
2985	md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
2986	{
2987	MD_SIZE line_index;
2988	int ret = `0`;
2989	MD_MARK* mark;
2990	OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { `0` };
2991	int codespan_scanned_till_paragraph_end = FALSE;
2992
2993	for(line_index = `0`; line_index < n_lines; line_index++) {
2994	const MD_LINE* line = &lines[line_index];
2995	OFF off = line->beg;
2996
2997	while(TRUE) {
2998	CHAR ch;
2999
3000	#ifdef MD4C_USE_UTF16
3001	/ For UTF-16, mark_char_map[] covers only ASCII. /
3002	#define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
3003	(ctx->mark_char_map[(unsigned char) CH(off)]))
3004	#else
3005	/ For 8-bit encodings, mark_char_map[] covers all 256 elements. /
3006	#define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
3007	#endif
3008
3009	/ Optimization: Use some loop unrolling. /
3010	while(off + `3` < line->end && !IS_MARK_CHAR(off+`0`) && !IS_MARK_CHAR(off+`1`)
3011	&& !IS_MARK_CHAR(off+`2`) && !IS_MARK_CHAR(off+`3`))
3012	off += `4`;
3013	while(off < line->end && !IS_MARK_CHAR(off+`0`))
3014	off++;
3015
3016	if(off >= line->end)
3017	break;
3018
3019	ch = CH(off);
3020
3021	/ A backslash escape.*
3022	* It can go beyond line->end as it may involve escaped new
3023	* line to form a hard break. */
3024	if(ch == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
3025	/ Hard-break cannot be on the last line of the block. /
3026	if(!ISNEWLINE(off+`1`) \|\| line_index+`1` < n_lines)
3027	ADD_MARK(ch, off, off+`2`, MD_MARK_RESOLVED);
3028	off += `2`;
3029	continue;
3030	}
3031
3032	/ A potential (string) emphasis start/end. /
3033	if(ch == _T(`'*'`) \|\| ch == _T(`'_'`)) {
3034	OFF tmp = off+`1`;
3035	int left_level; / What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. /
3036	int right_level; / What follows: 0 = whitespace; 1 = punctuation; 2 = other char. /
3037
3038	while(tmp < line->end && CH(tmp) == ch)
3039	tmp++;
3040
3041	if(off == line->beg \|\| ISUNICODEWHITESPACEBEFORE(off))
3042	left_level = `0`;
3043	else if(ISUNICODEPUNCTBEFORE(off))
3044	left_level = `1`;
3045	else
3046	left_level = `2`;
3047
3048	if(tmp == line->end \|\| ISUNICODEWHITESPACE(tmp))
3049	right_level = `0`;
3050	else if(ISUNICODEPUNCT(tmp))
3051	right_level = `1`;
3052	else
3053	right_level = `2`;
3054
3055	/ Intra-word underscore doesn't have special meaning. /
3056	if(ch == _T(`'_'`) && left_level == `2` && right_level == `2`) {
3057	left_level = `0`;
3058	right_level = `0`;
3059	}
3060
3061	if(left_level != `0` \|\| right_level != `0`) {
3062	unsigned flags = `0`;
3063
3064	if(left_level > `0` && left_level >= right_level)
3065	flags \|= MD_MARK_POTENTIAL_CLOSER;
3066	if(right_level > `0` && right_level >= left_level)
3067	flags \|= MD_MARK_POTENTIAL_OPENER;
3068	if(flags == (MD_MARK_POTENTIAL_OPENER \| MD_MARK_POTENTIAL_CLOSER))
3069	flags \|= MD_MARK_EMPH_OC;
3070
3071	/ For "the rule of three" we need to remember the original*
3072	* size of the mark (modulo three), before we potentially
3073	* split the mark when being later resolved partially by some
3074	* shorter closer. */
3075	switch((tmp - off) % `3`) {
3076	case `0`: flags \|= MD_MARK_EMPH_MOD3_0; break;
3077	case `1`: flags \|= MD_MARK_EMPH_MOD3_1; break;
3078	case `2`: flags \|= MD_MARK_EMPH_MOD3_2; break;
3079	}
3080
3081	ADD_MARK(ch, off, tmp, flags);
3082
3083	/ During resolving, multiple asterisks may have to be*
3084	* split into independent span start/ends. Consider e.g.
3085	* "*foo bar*". Therefore we push also some empty dummy
3086	* marks to have enough space for that. */
3087	off++;
3088	while(off < tmp) {
3089	ADD_MARK(`'D'`, off, off, `0`);
3090	off++;
3091	}
3092	continue;
3093	}
3094
3095	off = tmp;
3096	continue;
3097	}
3098
3099	/ A potential code span start/end. /
3100	if(ch == _T('`')) {
3101	MD_MARK opener;
3102	MD_MARK closer;
3103	int is_code_span;
3104
3105	is_code_span = md_is_code_span(ctx, lines: line, n_lines: n_lines - line_index, beg: off,
3106	opener: &opener, closer: &closer, last_potential_closers: codespan_last_potential_closers,
3107	p_reached_paragraph_end: &codespan_scanned_till_paragraph_end);
3108	if(is_code_span) {
3109	ADD_MARK(opener.ch, opener.beg, opener.end, opener.flags);
3110	ADD_MARK(closer.ch, closer.beg, closer.end, closer.flags);
3111	md_resolve_range(ctx, opener_index: ctx->n_marks-`2`, closer_index: ctx->n_marks-`1`);
3112	off = closer.end;
3113
3114	/ Advance the current line accordingly. /
3115	if(off > line->end)
3116	line = md_lookup_line(off, lines, n_lines, p_line_index: &line_index);
3117	continue;
3118	}
3119
3120	off = opener.end;
3121	continue;
3122	}
3123
3124	/ A potential entity start. /
3125	if(ch == _T(`'&'`)) {
3126	ADD_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3127	off++;
3128	continue;
3129	}
3130
3131	/ A potential entity end. /
3132	if(ch == _T(`';'`)) {
3133	/ We surely cannot be entity unless the previous mark is '&'. /
3134	if(ctx->n_marks > `0` && ctx->marks[ctx->n_marks-`1`].ch == _T(`'&'`))
3135	ADD_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3136
3137	off++;
3138	continue;
3139	}
3140
3141	/ A potential autolink or raw HTML start/end. /
3142	if(ch == _T(`'<'`)) {
3143	int is_autolink;
3144	OFF autolink_end;
3145	int missing_mailto;
3146
3147	if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3148	int is_html;
3149	OFF html_end;
3150
3151	/ Given the nature of the raw HTML, we have to recognize*
3152	* it here. Doing so later in md_analyze_lt_gt() could
3153	* open can of worms of quadratic complexity. */
3154	is_html = md_is_html_any(ctx, lines: line, n_lines: n_lines - line_index, beg: off,
3155	max_end: lines[n_lines-`1`].end, p_end: &html_end);
3156	if(is_html) {
3157	ADD_MARK(_T(`'<'`), off, off, MD_MARK_OPENER \| MD_MARK_RESOLVED);
3158	ADD_MARK(_T(`'>'`), html_end, html_end, MD_MARK_CLOSER \| MD_MARK_RESOLVED);
3159	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3160	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3161	off = html_end;
3162
3163	/ Advance the current line accordingly. /
3164	if(off > line->end)
3165	line = md_lookup_line(off, lines, n_lines, p_line_index: &line_index);
3166	continue;
3167	}
3168	}
3169
3170	is_autolink = md_is_autolink(ctx, beg: off, max_end: lines[n_lines-`1`].end,
3171	p_end: &autolink_end, p_missing_mailto: &missing_mailto);
3172	if(is_autolink) {
3173	unsigned flags = MD_MARK_RESOLVED \| MD_MARK_AUTOLINK;
3174	if(missing_mailto)
3175	flags \|= MD_MARK_AUTOLINK_MISSING_MAILTO;
3176
3177	ADD_MARK(_T(`'<'`), off, off+`1`, MD_MARK_OPENER \| flags);
3178	ADD_MARK(_T(`'>'`), autolink_end-`1`, autolink_end, MD_MARK_CLOSER \| flags);
3179	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3180	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3181	off = autolink_end;
3182	continue;
3183	}
3184
3185	off++;
3186	continue;
3187	}
3188
3189	/ A potential link or its part. /
3190	if(ch == _T(`'['`) \|\| (ch == _T(`'!'`) && off+`1` < line->end && CH(off+`1`) == _T(`'['`))) {
3191	OFF tmp = (ch == _T(`'['`) ? off+`1` : off+`2`);
3192	ADD_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3193	off = tmp;
3194	/ Two dummies to make enough place for data we need if it is*
3195	* a link. */
3196	ADD_MARK(`'D'`, off, off, `0`);
3197	ADD_MARK(`'D'`, off, off, `0`);
3198	continue;
3199	}
3200	if(ch == _T(`']'`)) {
3201	ADD_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3202	off++;
3203	continue;
3204	}
3205
3206	/ A potential permissive e-mail autolink. /
3207	if(ch == _T(`'@'`)) {
3208	if(line->beg + `1` <= off && ISALNUM(off-`1`) &&
3209	off + `3` < line->end && ISALNUM(off+`1`))
3210	{
3211	ADD_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3212	/ Push a dummy as a reserve for a closer. /
3213	ADD_MARK(`'D'`, line->beg, line->end, `0`);
3214	}
3215
3216	off++;
3217	continue;
3218	}
3219
3220	/ A potential permissive URL autolink. /
3221	if(ch == _T(`':'`)) {
3222	static struct {
3223	const CHAR* scheme;
3224	SZ scheme_size;
3225	const CHAR* suffix;
3226	SZ suffix_size;
3227	} scheme_map[] = {
3228	/ In the order from the most frequently used, arguably. /
3229	{ _T("http"), `4`, _T("//"), `2` },
3230	{ _T("https"), `5`, _T("//"), `2` },
3231	{ _T("ftp"), `3`, _T("//"), `2` }
3232	};
3233	int scheme_index;
3234
3235	for(scheme_index = `0`; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3236	const CHAR* scheme = scheme_map[scheme_index].scheme;
3237	const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3238	const CHAR* suffix = scheme_map[scheme_index].suffix;
3239	const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3240
3241	if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), s2: scheme, n: scheme_size) &&
3242	off + `1` + suffix_size < line->end && md_ascii_eq(STR(off+`1`), s2: suffix, n: suffix_size))
3243	{
3244	ADD_MARK(ch, off-scheme_size, off+`1`+suffix_size, MD_MARK_POTENTIAL_OPENER);
3245	/ Push a dummy as a reserve for a closer. /
3246	ADD_MARK(`'D'`, line->beg, line->end, `0`);
3247	off += `1` + suffix_size;
3248	break;
3249	}
3250	}
3251
3252	off++;
3253	continue;
3254	}
3255
3256	/ A potential permissive WWW autolink. /
3257	if(ch == _T(`'.'`)) {
3258	if(line->beg + `3` <= off && md_ascii_eq(STR(off-`3`), _T("www"), n: `3`) &&
3259	(off-`3` == line->beg \|\| ISUNICODEWHITESPACEBEFORE(off-`3`) \|\| ISUNICODEPUNCTBEFORE(off-`3`)))
3260	{
3261	ADD_MARK(ch, off-`3`, off+`1`, MD_MARK_POTENTIAL_OPENER);
3262	/ Push a dummy as a reserve for a closer. /
3263	ADD_MARK(`'D'`, line->beg, line->end, `0`);
3264	off++;
3265	continue;
3266	}
3267
3268	off++;
3269	continue;
3270	}
3271
3272	/ A potential table cell boundary or wiki link label delimiter. /
3273	if((table_mode \|\| ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T(`'\|'`)) {
3274	ADD_MARK(ch, off, off+`1`, `0`);
3275	off++;
3276	continue;
3277	}
3278
3279	/ A potential strikethrough start/end. /
3280	if(ch == _T(`'~'`)) {
3281	OFF tmp = off+`1`;
3282
3283	while(tmp < line->end && CH(tmp) == _T(`'~'`))
3284	tmp++;
3285
3286	if(tmp - off < `3`) {
3287	unsigned flags = `0`;
3288
3289	if(tmp < line->end && !ISUNICODEWHITESPACE(tmp))
3290	flags \|= MD_MARK_POTENTIAL_OPENER;
3291	if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
3292	flags \|= MD_MARK_POTENTIAL_CLOSER;
3293	if(flags != `0`)
3294	ADD_MARK(ch, off, tmp, flags);
3295	}
3296
3297	off = tmp;
3298	continue;
3299	}
3300
3301	/ A potential equation start/end /
3302	if(ch == _T(`'$'`)) {
3303	/ We can have at most two consecutive $ signs,*
3304	* where two dollar signs signify a display equation. */
3305	OFF tmp = off+`1`;
3306
3307	while(tmp < line->end && CH(tmp) == _T(`'$'`))
3308	tmp++;
3309
3310	if(tmp - off <= `2`) {
3311	unsigned flags = MD_MARK_POTENTIAL_OPENER \| MD_MARK_POTENTIAL_CLOSER;
3312
3313	if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off) && !ISUNICODEPUNCTBEFORE(off))
3314	flags &= ~MD_MARK_POTENTIAL_OPENER;
3315	if(tmp < line->end && !ISUNICODEWHITESPACE(tmp) && !ISUNICODEPUNCT(tmp))
3316	flags &= ~MD_MARK_POTENTIAL_CLOSER;
3317	if(flags != `0`)
3318	ADD_MARK(ch, off, tmp, flags);
3319	}
3320
3321	off = tmp;
3322	continue;
3323	}
3324
3325	/ Turn non-trivial whitespace into single space. /
3326	if(ISWHITESPACE_(ch)) {
3327	OFF tmp = off+`1`;
3328
3329	while(tmp < line->end && ISWHITESPACE(tmp))
3330	tmp++;
3331
3332	if(tmp - off > `1` \|\| ch != _T(`' '`))
3333	ADD_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3334
3335	off = tmp;
3336	continue;
3337	}
3338
3339	/ NULL character. /
3340	if(ch == _T(`'\0'`)) {
3341	ADD_MARK(ch, off, off+`1`, MD_MARK_RESOLVED);
3342	off++;
3343	continue;
3344	}
3345
3346	off++;
3347	}
3348	}
3349
3350	/ Add a dummy mark at the end of the mark vector to simplify*
3351	* process_inlines(). */
3352	ADD_MARK(`127`, ctx->size, ctx->size, MD_MARK_RESOLVED);
3353
3354	abort:
3355	return ret;
3356	}
3357
3358	static void
3359	md_analyze_bracket(MD_CTX* ctx, int mark_index)
3360	{
3361	/ We cannot really resolve links here as for that we would need*
3362	* more context. E.g. a following pair of brackets (reference link),
3363	* or enclosing pair of brackets (if the inner is the link, the outer
3364	* one cannot be.)
3365	*
3366	* Therefore we here only construct a list of '[' ']' pairs ordered by
3367	* position of the closer. This allows us to analyze what is or is not
3368	* link in the right order, from inside to outside in case of nested
3369	* brackets.
3370	*
3371	* The resolving itself is deferred to md_resolve_links().
3372	*/
3373
3374	MD_MARK* mark = &ctx->marks[mark_index];
3375
3376	if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3377	if(BRACKET_OPENERS.top >= `0`)
3378	ctx->marks[BRACKET_OPENERS.top].flags \|= MD_MARK_HASNESTEDBRACKETS;
3379
3380	md_mark_stack_push(ctx, stack: &BRACKET_OPENERS, mark_index);
3381	return;
3382	}
3383
3384	if(BRACKET_OPENERS.top >= `0`) {
3385	int opener_index = md_mark_stack_pop(ctx, stack: &BRACKET_OPENERS);
3386	MD_MARK* opener = &ctx->marks[opener_index];
3387
3388	/ Interconnect the opener and closer. /
3389	opener->next = mark_index;
3390	mark->prev = opener_index;
3391
3392	/ Add the pair into a list of potential links for md_resolve_links().*
3393	* Note we misuse opener->prev for this as opener->next points to its
3394	* closer. */
3395	if(ctx->unresolved_link_tail >= `0`)
3396	ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3397	else
3398	ctx->unresolved_link_head = opener_index;
3399	ctx->unresolved_link_tail = opener_index;
3400	opener->prev = -`1`;
3401	}
3402	}
3403
3404	/ Forward declaration. /
3405	static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
3406	int mark_beg, int mark_end);
3407
3408	static int
3409	md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
3410	{
3411	int opener_index = ctx->unresolved_link_head;
3412	OFF last_link_beg = `0`;
3413	OFF last_link_end = `0`;
3414	OFF last_img_beg = `0`;
3415	OFF last_img_end = `0`;
3416
3417	while(opener_index >= `0`) {
3418	MD_MARK* opener = &ctx->marks[opener_index];
3419	int closer_index = opener->next;
3420	MD_MARK* closer = &ctx->marks[closer_index];
3421	int next_index = opener->prev;
3422	MD_MARK* next_opener;
3423	MD_MARK* next_closer;
3424	MD_LINK_ATTR attr;
3425	int is_link = FALSE;
3426
3427	if(next_index >= `0`) {
3428	next_opener = &ctx->marks[next_index];
3429	next_closer = &ctx->marks[next_opener->next];
3430	} else {
3431	next_opener = NULL;
3432	next_closer = NULL;
3433	}
3434
3435	/ If nested ("[ [ ] ]"), we need to make sure that:*
3436	* - The outer does not end inside of (...) belonging to the inner.
3437	* - The outer cannot be link if the inner is link (i.e. not image).
3438	*
3439	* (Note we here analyze from inner to outer as the marks are ordered
3440	* by closer->beg.)
3441	*/
3442	if((opener->beg < last_link_beg && closer->end < last_link_end) \|\|
3443	(opener->beg < last_img_beg && closer->end < last_img_end) \|\|
3444	(opener->beg < last_link_end && opener->ch == `'['`))
3445	{
3446	opener_index = next_index;
3447	continue;
3448	}
3449
3450	/ Recognize and resolve wiki links.*
3451	* Wiki-links maybe '[[destination]]' or '[[destination\|label]]'.
3452	*/
3453	if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3454	(opener->end - opener->beg == `1`) && / not image /
3455	next_opener != NULL && / double '[' opener /
3456	next_opener->ch == `'['` &&
3457	(next_opener->beg == opener->beg - `1`) &&
3458	(next_opener->end - next_opener->beg == `1`) &&
3459	next_closer != NULL && / double ']' closer /
3460	next_closer->ch == `']'` &&
3461	(next_closer->beg == closer->beg + `1`) &&
3462	(next_closer->end - next_closer->beg == `1`))
3463	{
3464	MD_MARK* delim = NULL;
3465	int delim_index;
3466	OFF dest_beg, dest_end;
3467
3468	is_link = TRUE;
3469
3470	/ We don't allow destination to be longer than 100 characters.*
3471	* Lets scan to see whether there is '\|'. (If not then the whole
3472	* wiki-link has to be below the 100 characters.) */
3473	delim_index = opener_index + `1`;
3474	while(delim_index < closer_index) {
3475	MD_MARK* m = &ctx->marks[delim_index];
3476	if(m->ch == `'\|'`) {
3477	delim = m;
3478	break;
3479	}
3480	if(m->ch != `'D'`) {
3481	if(m->beg - opener->end > `100`)
3482	break;
3483	if(m->ch != `'D'` && (m->flags & MD_MARK_OPENER))
3484	delim_index = m->next;
3485	}
3486	delim_index++;
3487	}
3488
3489	dest_beg = opener->end;
3490	dest_end = (delim != NULL) ? delim->beg : closer->beg;
3491	if(dest_end - dest_beg == `0` \|\| dest_end - dest_beg > `100`)
3492	is_link = FALSE;
3493
3494	/ There may not be any new line in the destination. /
3495	if(is_link) {
3496	OFF off;
3497	for(off = dest_beg; off < dest_end; off++) {
3498	if(ISNEWLINE(off)) {
3499	is_link = FALSE;
3500	break;
3501	}
3502	}
3503	}
3504
3505	if(is_link) {
3506	if(delim != NULL) {
3507	if(delim->end < closer->beg) {
3508	md_rollback(ctx, opener_index, closer_index: delim_index, MD_ROLLBACK_ALL);
3509	md_rollback(ctx, opener_index: delim_index, closer_index, MD_ROLLBACK_CROSSING);
3510	delim->flags \|= MD_MARK_RESOLVED;
3511	opener->end = delim->beg;
3512	} else {
3513	/ The pipe is just before the closer: [[foo\|]] /
3514	md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3515	closer->beg = delim->beg;
3516	delim = NULL;
3517	}
3518	}
3519
3520	opener->beg = next_opener->beg;
3521	opener->next = closer_index;
3522	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3523
3524	closer->end = next_closer->end;
3525	closer->prev = opener_index;
3526	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3527
3528	last_link_beg = opener->beg;
3529	last_link_end = closer->end;
3530
3531	if(delim != NULL)
3532	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: delim_index+`1`, mark_end: closer_index);
3533
3534	opener_index = next_opener->prev;
3535	continue;
3536	}
3537	}
3538
3539	if(next_opener != NULL && next_opener->beg == closer->end) {
3540	if(next_closer->beg > closer->end + `1`) {
3541	/ Might be full reference link. /
3542	if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
3543	is_link = md_is_link_reference(ctx, lines, n_lines, beg: next_opener->beg, end: next_closer->end, attr: &attr);
3544	} else {
3545	/ Might be shortcut reference link. /
3546	if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3547	is_link = md_is_link_reference(ctx, lines, n_lines, beg: opener->beg, end: closer->end, attr: &attr);
3548	}
3549
3550	if(is_link < `0`)
3551	return -`1`;
3552
3553	if(is_link) {
3554	/ Eat the 2nd "[...]". /
3555	closer->end = next_closer->end;
3556
3557	/ Do not analyze the label as a standalone link in the next*
3558	* iteration. */
3559	next_index = ctx->marks[next_index].prev;
3560	}
3561	} else {
3562	if(closer->end < ctx->size && CH(closer->end) == _T(`'('`)) {
3563	/ Might be inline link. /
3564	OFF inline_link_end = UINT_MAX;
3565
3566	is_link = md_is_inline_link_spec(ctx, lines, n_lines, beg: closer->end, p_end: &inline_link_end, attr: &attr);
3567	if(is_link < `0`)
3568	return -`1`;
3569
3570	/ Check the closing ')' is not inside an already resolved range*
3571	* (i.e. a range with a higher priority), e.g. a code span. */
3572	if(is_link) {
3573	int i = closer_index + `1`;
3574
3575	while(i < ctx->n_marks) {
3576	MD_MARK* mark = &ctx->marks[i];
3577
3578	if(mark->beg >= inline_link_end)
3579	break;
3580	if((mark->flags & (MD_MARK_OPENER \| MD_MARK_RESOLVED)) == (MD_MARK_OPENER \| MD_MARK_RESOLVED)) {
3581	if(ctx->marks[mark->next].beg >= inline_link_end) {
3582	/ Cancel the link status. /
3583	if(attr.title_needs_free)
3584	free(ptr: attr.title);
3585	is_link = FALSE;
3586	break;
3587	}
3588
3589	i = mark->next + `1`;
3590	} else {
3591	i++;
3592	}
3593	}
3594	}
3595
3596	if(is_link) {
3597	/ Eat the "(...)" /
3598	closer->end = inline_link_end;
3599	}
3600	}
3601
3602	if(!is_link) {
3603	/ Might be collapsed reference link. /
3604	if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3605	is_link = md_is_link_reference(ctx, lines, n_lines, beg: opener->beg, end: closer->end, attr: &attr);
3606	if(is_link < `0`)
3607	return -`1`;
3608	}
3609	}
3610
3611	if(is_link) {
3612	/ Resolve the brackets as a link. /
3613	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3614	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3615
3616	/ If it is a link, we store the destination and title in the two*
3617	* dummy marks after the opener. */
3618	MD_ASSERT(ctx->marks[opener_index+`1`].ch == `'D'`);
3619	ctx->marks[opener_index+`1`].beg = attr.dest_beg;
3620	ctx->marks[opener_index+`1`].end = attr.dest_end;
3621
3622	MD_ASSERT(ctx->marks[opener_index+`2`].ch == `'D'`);
3623	md_mark_store_ptr(ctx, mark_index: opener_index+`2`, ptr: attr.title);
3624	/ The title might or might not have been allocated for us. /
3625	if(attr.title_needs_free)
3626	md_mark_stack_push(ctx, stack: &ctx->ptr_stack, mark_index: opener_index+`2`);
3627	ctx->marks[opener_index+`2`].prev = attr.title_size;
3628
3629	if(opener->ch == `'['`) {
3630	last_link_beg = opener->beg;
3631	last_link_end = closer->end;
3632	} else {
3633	last_img_beg = opener->beg;
3634	last_img_end = closer->end;
3635	}
3636
3637	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: opener_index+`1`, mark_end: closer_index);
3638
3639	/ If the link text is formed by nothing but permissive autolink,*
3640	* suppress the autolink.
3641	* See https://github.com/mity/md4c/issues/152 for more info. */
3642	if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
3643	MD_MARK* first_nested;
3644	MD_MARK* last_nested;
3645
3646	first_nested = opener + `1`;
3647	while(first_nested->ch == _T(`'D'`) && first_nested < closer)
3648	first_nested++;
3649
3650	last_nested = closer - `1`;
3651	while(first_nested->ch == _T(`'D'`) && last_nested > opener)
3652	last_nested--;
3653
3654	if((first_nested->flags & MD_MARK_RESOLVED) &&
3655	first_nested->beg == opener->end &&
3656	ISANYOF_(first_nested->ch, _T("@:.")) &&
3657	first_nested->next == (last_nested - ctx->marks) &&
3658	last_nested->end == closer->beg)
3659	{
3660	first_nested->ch = _T(`'D'`);
3661	first_nested->flags &= ~MD_MARK_RESOLVED;
3662	last_nested->ch = _T(`'D'`);
3663	last_nested->flags &= ~MD_MARK_RESOLVED;
3664	}
3665	}
3666	}
3667
3668	opener_index = next_index;
3669	}
3670
3671	return `0`;
3672	}
3673
3674	/ Analyze whether the mark '&' starts a HTML entity.*
3675	* If so, update its flags as well as flags of corresponding closer ';'. */
3676	static void
3677	md_analyze_entity(MD_CTX* ctx, int mark_index)
3678	{
3679	MD_MARK* opener = &ctx->marks[mark_index];
3680	MD_MARK* closer;
3681	OFF off;
3682
3683	/ Cannot be entity if there is no closer as the next mark.*
3684	* (Any other mark between would mean strange character which cannot be
3685	* part of the entity.
3686	*
3687	* So we can do all the work on '&' and do not call this later for the
3688	* closing mark ';'.
3689	*/
3690	if(mark_index + `1` >= ctx->n_marks)
3691	return;
3692	closer = &ctx->marks[mark_index+`1`];
3693	if(closer->ch != `';'`)
3694	return;
3695
3696	if(md_is_entity(ctx, beg: opener->beg, max_end: closer->end, p_end: &off)) {
3697	MD_ASSERT(off == closer->end);
3698
3699	md_resolve_range(ctx, opener_index: mark_index, closer_index: mark_index+`1`);
3700	opener->end = closer->end;
3701	}
3702	}
3703
3704	static void
3705	md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3706	{
3707	MD_MARK* mark = &ctx->marks[mark_index];
3708	mark->flags \|= MD_MARK_RESOLVED;
3709	mark->next = -`1`;
3710
3711	if(ctx->table_cell_boundaries_head < `0`)
3712	ctx->table_cell_boundaries_head = mark_index;
3713	else
3714	ctx->marks[ctx->table_cell_boundaries_tail].next = mark_index;
3715	ctx->table_cell_boundaries_tail = mark_index;
3716	ctx->n_table_cell_boundaries++;
3717	}
3718
3719	/ Split a longer mark into two. The new mark takes the given count of*
3720	* characters. May only be called if an adequate number of dummy 'D' marks
3721	* follows.
3722	*/
3723	static int
3724	md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3725	{
3726	MD_MARK* mark = &ctx->marks[mark_index];
3727	int new_mark_index = mark_index + (mark->end - mark->beg - n);
3728	MD_MARK* dummy = &ctx->marks[new_mark_index];
3729
3730	MD_ASSERT(mark->end - mark->beg > n);
3731	MD_ASSERT(dummy->ch == `'D'`);
3732
3733	memcpy(dest: dummy, src: mark, n: sizeof(MD_MARK));
3734	mark->end -= n;
3735	dummy->beg = mark->end;
3736
3737	return new_mark_index;
3738	}
3739
3740	static void
3741	md_analyze_emph(MD_CTX* ctx, int mark_index)
3742	{
3743	MD_MARK* mark = &ctx->marks[mark_index];
3744
3745	/ If we can be a closer, try to resolve with the preceding opener. /
3746	if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3747	MD_MARK* opener = NULL;
3748	int opener_index = `0`;
3749	MD_MARKSTACK* opener_stacks[`6`];
3750	int i, n_opener_stacks;
3751	unsigned flags = mark->flags;
3752
3753	n_opener_stacks = `0`;
3754
3755	/ Apply the rule of 3 /
3756	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_0 \| MD_MARK_EMPH_OC);
3757	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3758	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_1 \| MD_MARK_EMPH_OC);
3759	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3760	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_2 \| MD_MARK_EMPH_OC);
3761	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_0);
3762	if(!(flags & MD_MARK_EMPH_OC) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3763	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_1);
3764	if(!(flags & MD_MARK_EMPH_OC) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3765	opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, ch: mark->ch, MD_MARK_EMPH_MOD3_2);
3766
3767	/ Opener is the most recent mark from the allowed stacks. /
3768	for(i = `0`; i < n_opener_stacks; i++) {
3769	if(opener_stacks[i]->top >= `0`) {
3770	int m_index = opener_stacks[i]->top;
3771	MD_MARK* m = &ctx->marks[m_index];
3772
3773	if(opener == NULL \|\| m->end > opener->end) {
3774	opener_index = m_index;
3775	opener = m;
3776	}
3777	}
3778	}
3779
3780	/ Resolve, if we have found matching opener. /
3781	if(opener != NULL) {
3782	SZ opener_size = opener->end - opener->beg;
3783	SZ closer_size = mark->end - mark->beg;
3784	MD_MARKSTACK* stack = md_opener_stack(ctx, mark_index: opener_index);
3785
3786	if(opener_size > closer_size) {
3787	opener_index = md_split_emph_mark(ctx, mark_index: opener_index, n: closer_size);
3788	md_mark_stack_push(ctx, stack, mark_index: opener_index);
3789	} else if(opener_size < closer_size) {
3790	md_split_emph_mark(ctx, mark_index, n: closer_size - opener_size);
3791	}
3792
3793	/ Above we were only peeking. /
3794	md_mark_stack_pop(ctx, stack);
3795
3796	md_rollback(ctx, opener_index, closer_index: mark_index, MD_ROLLBACK_CROSSING);
3797	md_resolve_range(ctx, opener_index, closer_index: mark_index);
3798	return;
3799	}
3800	}
3801
3802	/ If we could not resolve as closer, we may be yet be an opener. /
3803	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3804	md_mark_stack_push(ctx, stack: md_emph_stack(ctx, ch: mark->ch, flags: mark->flags), mark_index);
3805	}
3806
3807	static void
3808	md_analyze_tilde(MD_CTX* ctx, int mark_index)
3809	{
3810	MD_MARK* mark = &ctx->marks[mark_index];
3811	MD_MARKSTACK* stack = md_opener_stack(ctx, mark_index);
3812
3813	/ We attempt to be Github Flavored Markdown compatible here. GFM accepts*
3814	* only tildes sequences of length 1 and 2, and the length of the opener
3815	* and closer has to match. */
3816
3817	if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && stack->top >= `0`) {
3818	int opener_index = stack->top;
3819
3820	md_mark_stack_pop(ctx, stack);
3821	md_rollback(ctx, opener_index, closer_index: mark_index, MD_ROLLBACK_CROSSING);
3822	md_resolve_range(ctx, opener_index, closer_index: mark_index);
3823	return;
3824	}
3825
3826	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3827	md_mark_stack_push(ctx, stack, mark_index);
3828	}
3829
3830	static void
3831	md_analyze_dollar(MD_CTX* ctx, int mark_index)
3832	{
3833	MD_MARK* mark = &ctx->marks[mark_index];
3834
3835	if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && DOLLAR_OPENERS.top >= `0`) {
3836	/ If the potential closer has a non-matching number of $, discard /
3837	MD_MARK* opener = &ctx->marks[DOLLAR_OPENERS.top];
3838	int opener_index = DOLLAR_OPENERS.top;
3839	MD_MARK* closer = mark;
3840	int closer_index = mark_index;
3841
3842	if(opener->end - opener->beg == closer->end - closer->beg) {
3843	/ We are the matching closer /
3844	md_mark_stack_pop(ctx, stack: &DOLLAR_OPENERS);
3845	md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3846	md_resolve_range(ctx, opener_index, closer_index);
3847
3848	/ Discard all pending openers: Latex math span do not allow*
3849	* nesting. */
3850	DOLLAR_OPENERS.top = -`1`;
3851	return;
3852	}
3853	}
3854
3855	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3856	md_mark_stack_push(ctx, stack: &DOLLAR_OPENERS, mark_index);
3857	}
3858
3859	static MD_MARK*
3860	md_scan_left_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3861	{
3862	MD_MARK* mark;
3863
3864	for(mark = mark_from; mark >= ctx->marks; mark--) {
3865	if(mark->ch == `'D'` \|\| mark->beg > off)
3866	continue;
3867	if(mark->beg <= off && off < mark->end && (mark->flags & MD_MARK_RESOLVED)) {
3868	if(p_cursor != NULL)
3869	*p_cursor = mark;
3870	return mark;
3871	}
3872	if(mark->end <= off)
3873	break;
3874	}
3875
3876	if(p_cursor != NULL)
3877	*p_cursor = mark;
3878	return NULL;
3879	}
3880
3881	static MD_MARK*
3882	md_scan_right_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3883	{
3884	MD_MARK* mark;
3885
3886	for(mark = mark_from; mark < ctx->marks + ctx->n_marks; mark++) {
3887	if(mark->ch == `'D'` \|\| mark->end <= off)
3888	continue;
3889	if(mark->beg <= off && off < mark->end && (mark->flags & MD_MARK_RESOLVED)) {
3890	if(p_cursor != NULL)
3891	*p_cursor = mark;
3892	return mark;
3893	}
3894	if(mark->beg > off)
3895	break;
3896	}
3897
3898	if(p_cursor != NULL)
3899	*p_cursor = mark;
3900	return NULL;
3901	}
3902
3903	static void
3904	md_analyze_permissive_autolink(MD_CTX* ctx, int mark_index)
3905	{
3906	static const struct {
3907	const MD_CHAR start_char;
3908	const MD_CHAR delim_char;
3909	const MD_CHAR* allowed_nonalnum_chars;
3910	int min_components;
3911	const MD_CHAR optional_end_char;
3912	} URL_MAP[] = {
3913	{ _T(`'\0'`), _T(`'.'`), _T(".-_"), `2`, _T(`'\0'`) }, / host, mandatory /
3914	{ _T(`'/'`), _T(`'/'`), _T("/.-_"), `0`, _T(`'/'`) }, / path /
3915	{ _T(`'?'`), _T(`'&'`), _T("&.-+_=()"), `1`, _T(`'\0'`) }, / query /
3916	{ _T(`'#'`), _T(`'\0'`), _T(".-+_") , `1`, _T(`'\0'`) } / fragment /
3917	};
3918
3919	MD_MARK* opener = &ctx->marks[mark_index];
3920	MD_MARK* closer = &ctx->marks[mark_index + `1`]; / The dummy. /
3921	OFF line_beg = closer->beg; / md_collect_mark() set this for us /
3922	OFF line_end = closer->end; / ditto /
3923	OFF beg = opener->beg;
3924	OFF end = opener->end;
3925	MD_MARK* left_cursor = opener;
3926	int left_boundary_ok = FALSE;
3927	MD_MARK* right_cursor = opener;
3928	int right_boundary_ok = FALSE;
3929	unsigned i;
3930
3931	MD_ASSERT(closer->ch == `'D'`);
3932
3933	if(opener->ch == `'@'`) {
3934	MD_ASSERT(CH(opener->beg) == _T(`'@'`));
3935
3936	/ Scan backwards for the user name (before '@'). /
3937	while(beg > line_beg) {
3938	if(ISALNUM(beg-`1`))
3939	beg--;
3940	else if(beg >= line_beg+`2` && ISALNUM(beg-`2`) &&
3941	ISANYOF(beg-`1`, _T(".-_+")) &&
3942	md_scan_left_for_resolved_mark(ctx, mark_from: left_cursor, off: beg-`1`, p_cursor: &left_cursor) == NULL &&
3943	ISALNUM(beg))
3944	beg--;
3945	else
3946	break;
3947	}
3948	if(beg == opener->beg) / empty user name /
3949	return;
3950	}
3951
3952	/ Verify there's line boundary, whitespace, allowed punctuation or*
3953	* resolved emphasis mark just before the suspected autolink. */
3954	if(beg == line_beg \|\| ISUNICODEWHITESPACEBEFORE(beg) \|\| ISANYOF(beg-`1`, _T("({["))) {
3955	left_boundary_ok = TRUE;
3956	} else if(ISANYOF(beg-`1`, _T("*_~"))) {
3957	MD_MARK* left_mark;
3958
3959	left_mark = md_scan_left_for_resolved_mark(ctx, mark_from: left_cursor, off: beg-`1`, p_cursor: &left_cursor);
3960	if(left_mark != NULL && (left_mark->flags & MD_MARK_OPENER))
3961	left_boundary_ok = TRUE;
3962	}
3963	if(!left_boundary_ok)
3964	return;
3965
3966	for(i = `0`; i < SIZEOF_ARRAY(URL_MAP); i++) {
3967	int n_components = `0`;
3968	int n_open_brackets = `0`;
3969
3970	if(URL_MAP[i].start_char != _T(`'\0'`)) {
3971	if(end >= line_end \|\| CH(end) != URL_MAP[i].start_char)
3972	continue;
3973	if(URL_MAP[i].min_components > `0` && (end+`1` >= line_end \|\| !ISALNUM(end+`1`)))
3974	continue;
3975	end++;
3976	}
3977
3978	while(end < line_end) {
3979	if(ISALNUM(end)) {
3980	if(n_components == `0`)
3981	n_components++;
3982	end++;
3983	} else if(end < line_end &&
3984	ISANYOF(end, URL_MAP[i].allowed_nonalnum_chars) &&
3985	md_scan_right_for_resolved_mark(ctx, mark_from: right_cursor, off: end, p_cursor: &right_cursor) == NULL &&
3986	((end > line_beg && (ISALNUM(end-`1`) \|\| CH(end-`1`) == _T(`')'`))) \|\| CH(end) == _T(`'('`)) &&
3987	((end+`1` < line_end && (ISALNUM(end+`1`) \|\| CH(end+`1`) == _T(`'('`))) \|\| CH(end) == _T(`')'`)))
3988	{
3989	if(CH(end) == URL_MAP[i].delim_char)
3990	n_components++;
3991
3992	/ brackets have to be balanced. /
3993	if(CH(end) == _T(`'('`)) {
3994	n_open_brackets++;
3995	} else if(CH(end) == _T(`')'`)) {
3996	if(n_open_brackets <= `0`)
3997	break;
3998	n_open_brackets--;
3999	}
4000
4001	end++;
4002	} else {
4003	break;
4004	}
4005	}
4006
4007	if(end < line_end && URL_MAP[i].optional_end_char != _T(`'\0'`) &&
4008	CH(end) == URL_MAP[i].optional_end_char)
4009	end++;
4010
4011	if(n_components < URL_MAP[i].min_components \|\| n_open_brackets != `0`)
4012	return;
4013
4014	if(opener->ch == `'@'`) / E-mail autolinks wants only the host. /
4015	break;
4016	}
4017
4018	/ Verify there's line boundary, whitespace, allowed punctuation or*
4019	* resolved emphasis mark just after the suspected autolink. */
4020	if(end == line_end \|\| ISUNICODEWHITESPACE(end) \|\| ISANYOF(end, _T(")}].!?,;"))) {
4021	right_boundary_ok = TRUE;
4022	} else {
4023	MD_MARK* right_mark;
4024
4025	right_mark = md_scan_right_for_resolved_mark(ctx, mark_from: right_cursor, off: end, p_cursor: &right_cursor);
4026	if(right_mark != NULL && (right_mark->flags & MD_MARK_CLOSER))
4027	right_boundary_ok = TRUE;
4028	}
4029	if(!right_boundary_ok)
4030	return;
4031
4032	/ Success, we are an autolink. /
4033	opener->beg = beg;
4034	opener->end = beg;
4035	closer->beg = end;
4036	closer->end = end;
4037	closer->ch = opener->ch;
4038	md_resolve_range(ctx, opener_index: mark_index, closer_index: mark_index + `1`);
4039	}
4040
4041	#define MD_ANALYZE_NOSKIP_EMPH 0x01
4042
4043	static inline void
4044	md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
4045	int mark_beg, int mark_end, const CHAR* mark_chars, unsigned flags)
4046	{
4047	int i = mark_beg;
4048	OFF last_end = lines[`0`].beg;
4049
4050	MD_UNUSED(lines);
4051	MD_UNUSED(n_lines);
4052
4053	while(i < mark_end) {
4054	MD_MARK* mark = &ctx->marks[i];
4055
4056	/ Skip resolved spans. /
4057	if(mark->flags & MD_MARK_RESOLVED) {
4058	if((mark->flags & MD_MARK_OPENER) &&
4059	!((flags & MD_ANALYZE_NOSKIP_EMPH) && ISANYOF_(mark->ch, "*_~")))
4060	{
4061	MD_ASSERT(i < mark->next);
4062	i = mark->next + `1`;
4063	} else {
4064	i++;
4065	}
4066	continue;
4067	}
4068
4069	/ Skip marks we do not want to deal with. /
4070	if(!ISANYOF_(mark->ch, mark_chars)) {
4071	i++;
4072	continue;
4073	}
4074
4075	/ The resolving in previous step could have expanded a mark. /
4076	if(mark->beg < last_end) {
4077	i++;
4078	continue;
4079	}
4080
4081	/ Analyze the mark. /
4082	switch(mark->ch) {
4083	case `'['`: / Pass through. /
4084	case `'!'`: / Pass through. /
4085	case `']'`: md_analyze_bracket(ctx, mark_index: i); break;
4086	case `'&'`: md_analyze_entity(ctx, mark_index: i); break;
4087	case `'\|'`: md_analyze_table_cell_boundary(ctx, mark_index: i); break;
4088	case `'_'`: / Pass through. /
4089	case `''`: md_analyze_emph(ctx, mark_index: i); break*;
4090	case `'~'`: md_analyze_tilde(ctx, mark_index: i); break;
4091	case `'$'`: md_analyze_dollar(ctx, mark_index: i); break;
4092	case `'.'`: / Pass through. /
4093	case `':'`: / Pass through. /
4094	case `'@'`: md_analyze_permissive_autolink(ctx, mark_index: i); break;
4095	}
4096
4097	if(mark->flags & MD_MARK_RESOLVED) {
4098	if(mark->flags & MD_MARK_OPENER)
4099	last_end = ctx->marks[mark->next].end;
4100	else
4101	last_end = mark->end;
4102	}
4103
4104	i++;
4105	}
4106	}
4107
4108	/ Analyze marks (build ctx->marks). /
4109	static int
4110	md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
4111	{
4112	int ret;
4113
4114	/ Reset the previously collected stack of marks. /
4115	ctx->n_marks = `0`;
4116
4117	/ Collect all marks. /
4118	MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
4119
4120	/ (1) Links. /
4121	md_analyze_marks(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks, _T("[]!"), flags: `0`);
4122	MD_CHECK(md_resolve_links(ctx, lines, n_lines));
4123	BRACKET_OPENERS.top = -`1`;
4124	ctx->unresolved_link_head = -`1`;
4125	ctx->unresolved_link_tail = -`1`;
4126
4127	if(table_mode) {
4128	/ (2) Analyze table cell boundaries. /
4129	MD_ASSERT(n_lines == `1`);
4130	ctx->n_table_cell_boundaries = `0`;
4131	md_analyze_marks(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks, _T("\|"), flags: `0`);
4132	return ret;
4133	}
4134
4135	/ (3) Emphasis and strong emphasis; permissive autolinks. /
4136	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks);
4137
4138	abort:
4139	return ret;
4140	}
4141
4142	static void
4143	md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
4144	int mark_beg, int mark_end)
4145	{
4146	int i;
4147
4148	md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&"), flags: `0`);
4149	md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$"), flags: `0`);
4150
4151	if((ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) != `0`) {
4152	/ These have to be processed last, as they may be greedy and expand*
4153	* from their original mark. Also their implementation must be careful
4154	* not to cross any (previously) resolved marks when doing so. */
4155	md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("@:."), MD_ANALYZE_NOSKIP_EMPH);
4156	}
4157
4158	for(i = `0`; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++)
4159	ctx->opener_stacks[i].top = -`1`;
4160	}
4161
4162	static int
4163	md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4164	const CHAR* dest, SZ dest_size, int is_autolink,
4165	const CHAR* title, SZ title_size)
4166	{
4167	MD_ATTRIBUTE_BUILD href_build = { `0` };
4168	MD_ATTRIBUTE_BUILD title_build = { `0` };
4169	MD_SPAN_A_DETAIL det;
4170	int ret = `0`;
4171
4172	/ Note we here rely on fact that MD_SPAN_A_DETAIL and*
4173	* MD_SPAN_IMG_DETAIL are binary-compatible. */
4174	memset(s: &det, c: `0`, n: sizeof(MD_SPAN_A_DETAIL));
4175	MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4176	(is_autolink ? MD_BUILD_ATTR_NO_ESCAPES : `0`),
4177	&det.href, &href_build));
4178	MD_CHECK(md_build_attribute(ctx, title, title_size, `0`, &det.title, &title_build));
4179	det.is_autolink = is_autolink;
4180	if(enter)
4181	MD_ENTER_SPAN(type, &det);
4182	else
4183	MD_LEAVE_SPAN(type, &det);
4184
4185	abort:
4186	md_free_attribute(ctx, build: &href_build);
4187	md_free_attribute(ctx, build: &title_build);
4188	return ret;
4189	}
4190
4191	static int
4192	md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4193	{
4194	MD_ATTRIBUTE_BUILD target_build = { `0` };
4195	MD_SPAN_WIKILINK_DETAIL det;
4196	int ret = `0`;
4197
4198	memset(s: &det, c: `0`, n: sizeof(MD_SPAN_WIKILINK_DETAIL));
4199	MD_CHECK(md_build_attribute(ctx, target, target_size, `0`, &det.target, &target_build));
4200
4201	if (enter)
4202	MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
4203	else
4204	MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
4205
4206	abort:
4207	md_free_attribute(ctx, build: &target_build);
4208	return ret;
4209	}
4210
4211
4212	/ Render the output, accordingly to the analyzed ctx->marks. /
4213	static int
4214	md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
4215	{
4216	MD_TEXTTYPE text_type;
4217	const MD_LINE* line = lines;
4218	MD_MARK* prev_mark = NULL;
4219	MD_MARK* mark;
4220	OFF off = lines[`0`].beg;
4221	OFF end = lines[n_lines-`1`].end;
4222	OFF tmp;
4223	int enforce_hardbreak = `0`;
4224	int ret = `0`;
4225
4226	/ Find first resolved mark. Note there is always at least one resolved*
4227	* mark, the dummy last one after the end of the latest line we actually
4228	* never really reach. This saves us of a lot of special checks and cases
4229	* in this function. */
4230	mark = ctx->marks;
4231	while(!(mark->flags & MD_MARK_RESOLVED))
4232	mark++;
4233
4234	text_type = MD_TEXT_NORMAL;
4235
4236	while(`1`) {
4237	/ Process the text up to the next mark or end-of-line. /
4238	tmp = (line->end < mark->beg ? line->end : mark->beg);
4239	if(tmp > off) {
4240	MD_TEXT(text_type, STR(off), tmp - off);
4241	off = tmp;
4242	}
4243
4244	/ If reached the mark, process it and move to next one. /
4245	if(off >= mark->beg) {
4246	switch(mark->ch) {
4247	case `'\\'`: / Backslash escape. /
4248	if(ISNEWLINE(mark->beg+`1`))
4249	enforce_hardbreak = `1`;
4250	else
4251	MD_TEXT(text_type, STR(mark->beg+`1`), `1`);
4252	break;
4253
4254	case `' '`: / Non-trivial space. /
4255	MD_TEXT(text_type, _T(" "), `1`);
4256	break;
4257
4258	case '`': / Code span. /
4259	if(mark->flags & MD_MARK_OPENER) {
4260	MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
4261	text_type = MD_TEXT_CODE;
4262	} else {
4263	MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
4264	text_type = MD_TEXT_NORMAL;
4265	}
4266	break;
4267
4268	case `'_'`: / Underline (or emphasis if we fall through). /
4269	if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4270	if(mark->flags & MD_MARK_OPENER) {
4271	while(off < mark->end) {
4272	MD_ENTER_SPAN(MD_SPAN_U, NULL);
4273	off++;
4274	}
4275	} else {
4276	while(off < mark->end) {
4277	MD_LEAVE_SPAN(MD_SPAN_U, NULL);
4278	off++;
4279	}
4280	}
4281	break;
4282	}
4283	MD_FALLTHROUGH();
4284
4285	case `''`: /* Emphasis, strong emphasis. /
4286	if(mark->flags & MD_MARK_OPENER) {
4287	if((mark->end - off) % `2`) {
4288	MD_ENTER_SPAN(MD_SPAN_EM, NULL);
4289	off++;
4290	}
4291	while(off + `1` < mark->end) {
4292	MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
4293	off += `2`;
4294	}
4295	} else {
4296	while(off + `1` < mark->end) {
4297	MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
4298	off += `2`;
4299	}
4300	if((mark->end - off) % `2`) {
4301	MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
4302	off++;
4303	}
4304	}
4305	break;
4306
4307	case `'~'`:
4308	if(mark->flags & MD_MARK_OPENER)
4309	MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
4310	else
4311	MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
4312	break;
4313
4314	case `'$'`:
4315	if(mark->flags & MD_MARK_OPENER) {
4316	MD_ENTER_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4317	text_type = MD_TEXT_LATEXMATH;
4318	} else {
4319	MD_LEAVE_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4320	text_type = MD_TEXT_NORMAL;
4321	}
4322	break;
4323
4324	case `'['`: / Link, wiki link, image. /
4325	case `'!'`:
4326	case `']'`:
4327	{
4328	const MD_MARK* opener = (mark->ch != `']'` ? mark : &ctx->marks[mark->prev]);
4329	const MD_MARK* closer = &ctx->marks[opener->next];
4330	const MD_MARK* dest_mark;
4331	const MD_MARK* title_mark;
4332
4333	if ((opener->ch == `'['` && closer->ch == `']'`) &&
4334	opener->end - opener->beg >= `2` &&
4335	closer->end - closer->beg >= `2`)
4336	{
4337	int has_label = (opener->end - opener->beg > `2`);
4338	SZ target_sz;
4339
4340	if(has_label)
4341	target_sz = opener->end - (opener->beg+`2`);
4342	else
4343	target_sz = closer->beg - opener->end;
4344
4345	MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != `']'`),
4346	has_label ? STR(opener->beg+`2`) : STR(opener->end),
4347	target_sz));
4348
4349	break;
4350	}
4351
4352	dest_mark = opener+`1`;
4353	MD_ASSERT(dest_mark->ch == `'D'`);
4354	title_mark = opener+`2`;
4355	MD_ASSERT(title_mark->ch == `'D'`);
4356
4357	MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != `']'`),
4358	(opener->ch == `'!'` ? MD_SPAN_IMG : MD_SPAN_A),
4359	STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4360	md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
4361	title_mark->prev));
4362
4363	/ link/image closer may span multiple lines. /
4364	if(mark->ch == `']'`) {
4365	while(mark->end > line->end)
4366	line++;
4367	}
4368
4369	break;
4370	}
4371
4372	case `'<'`:
4373	case `'>'`: / Autolink or raw HTML. /
4374	if(!(mark->flags & MD_MARK_AUTOLINK)) {
4375	/ Raw HTML. /
4376	if(mark->flags & MD_MARK_OPENER)
4377	text_type = MD_TEXT_HTML;
4378	else
4379	text_type = MD_TEXT_NORMAL;
4380	break;
4381	}
4382	/ Pass through, if auto-link. /
4383	MD_FALLTHROUGH();
4384
4385	case `'@'`: / Permissive e-mail autolink. /
4386	case `':'`: / Permissive URL autolink. /
4387	case `'.'`: / Permissive WWW autolink. /
4388	{
4389	MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4390	MD_MARK* closer = &ctx->marks[opener->next];
4391	const CHAR* dest = STR(opener->end);
4392	SZ dest_size = closer->beg - opener->end;
4393
4394	/ For permissive auto-links we do not know closer mark*
4395	* position at the time of md_collect_marks(), therefore
4396	* it can be out-of-order in ctx->marks[].
4397	*
4398	* With this flag, we make sure that we output the closer
4399	* only if we processed the opener. */
4400	if(mark->flags & MD_MARK_OPENER)
4401	closer->flags \|= MD_MARK_VALIDPERMISSIVEAUTOLINK;
4402
4403	if(opener->ch == `'@'` \|\| opener->ch == `'.'` \|\|
4404	(opener->ch == `'<'` && (opener->flags & MD_MARK_AUTOLINK_MISSING_MAILTO)))
4405	{
4406	dest_size += `7`;
4407	MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4408	memcpy(dest: ctx->buffer,
4409	src: (opener->ch == `'.'` ? _T("http://") : _T("mailto:")),
4410	n: `7` * sizeof(CHAR));
4411	memcpy(dest: ctx->buffer + `7`, src: dest, n: (dest_size-`7`) * sizeof(CHAR));
4412	dest = ctx->buffer;
4413	}
4414
4415	if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
4416	MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4417	MD_SPAN_A, dest, dest_size, TRUE, NULL, `0`));
4418	break;
4419	}
4420
4421	case `'&'`: / Entity. /
4422	MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4423	break;
4424
4425	case `'\0'`:
4426	MD_TEXT(MD_TEXT_NULLCHAR, _T(""), `1`);
4427	break;
4428
4429	case `127`:
4430	goto abort;
4431	}
4432
4433	off = mark->end;
4434
4435	/ Move to next resolved mark. /
4436	prev_mark = mark;
4437	mark++;
4438	while(!(mark->flags & MD_MARK_RESOLVED) \|\| mark->beg < off)
4439	mark++;
4440	}
4441
4442	/ If reached end of line, move to next one. /
4443	if(off >= line->end) {
4444	/ If it is the last line, we are done. /
4445	if(off >= end)
4446	break;
4447
4448	if(text_type == MD_TEXT_CODE \|\| text_type == MD_TEXT_LATEXMATH) {
4449	MD_ASSERT(prev_mark != NULL);
4450	MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', `'$'`) && (prev_mark->flags & MD_MARK_OPENER));
4451	MD_ASSERT(ISANYOF2_(mark->ch, '`', `'$'`) && (mark->flags & MD_MARK_CLOSER));
4452
4453	/ Inside a code span, trailing line whitespace has to be*
4454	* outputted. */
4455	tmp = off;
4456	while(off < ctx->size && ISBLANK(off))
4457	off++;
4458	if(off > tmp)
4459	MD_TEXT(text_type, STR(tmp), off-tmp);
4460
4461	/ and new lines are transformed into single spaces. /
4462	if(off == line->end)
4463	MD_TEXT(text_type, _T(" "), `1`);
4464	} else if(text_type == MD_TEXT_HTML) {
4465	/ Inside raw HTML, we output the new line verbatim, including*
4466	* any trailing spaces. */
4467	tmp = off;
4468	while(tmp < end && ISBLANK(tmp))
4469	tmp++;
4470	if(tmp > off)
4471	MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4472	MD_TEXT(MD_TEXT_HTML, _T("\n"), `1`);
4473	} else {
4474	/ Output soft or hard line break. /
4475	MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4476
4477	if(text_type == MD_TEXT_NORMAL) {
4478	if(ctx->parser.flags & MD_FLAG_HARD_SOFT_BREAKS)
4479	break_type = MD_TEXT_BR;
4480	else if(enforce_hardbreak)
4481	break_type = MD_TEXT_BR;
4482	else if((CH(line->end) == _T(`' '`) && CH(line->end+`1`) == _T(`' '`)))
4483	break_type = MD_TEXT_BR;
4484	}
4485
4486	MD_TEXT(break_type, _T("\n"), `1`);
4487	}
4488
4489	/ Move to the next line. /
4490	line++;
4491	off = line->beg;
4492
4493	enforce_hardbreak = `0`;
4494	}
4495	}
4496
4497	abort:
4498	return ret;
4499	}
4500
4501
4502	/***************************
4503	* Processing Tables *
4504	***************************/
4505
4506	static void
4507	md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4508	{
4509	static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4510	OFF off = beg;
4511
4512	while(n_align > `0`) {
4513	int index = `0`; / index into align_map[] /
4514
4515	while(CH(off) != _T(`'-'`))
4516	off++;
4517	if(off > beg && CH(off-`1`) == _T(`':'`))
4518	index \|= `1`;
4519	while(off < end && CH(off) == _T(`'-'`))
4520	off++;
4521	if(off < end && CH(off) == _T(`':'`))
4522	index \|= `2`;
4523
4524	*align = align_map[index];
4525	align++;
4526	n_align--;
4527	}
4528
4529	}
4530
4531	/ Forward declaration. /
4532	static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines);
4533
4534	static int
4535	md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4536	{
4537	MD_LINE line;
4538	MD_BLOCK_TD_DETAIL det;
4539	int ret = `0`;
4540
4541	while(beg < end && ISWHITESPACE(beg))
4542	beg++;
4543	while(end > beg && ISWHITESPACE(end-`1`))
4544	end--;
4545
4546	det.align = align;
4547	line.beg = beg;
4548	line.end = end;
4549
4550	MD_ENTER_BLOCK(cell_type, &det);
4551	MD_CHECK(md_process_normal_block_contents(ctx, &line, `1`));
4552	MD_LEAVE_BLOCK(cell_type, &det);
4553
4554	abort:
4555	return ret;
4556	}
4557
4558	static int
4559	md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4560	const MD_ALIGN* align, int col_count)
4561	{
4562	MD_LINE line;
4563	OFF* pipe_offs = NULL;
4564	int i, j, k, n;
4565	int ret = `0`;
4566
4567	line.beg = beg;
4568	line.end = end;
4569
4570	/ Break the line into table cells by identifying pipe characters who*
4571	* form the cell boundary. */
4572	MD_CHECK(md_analyze_inlines(ctx, &line, `1`, TRUE));
4573
4574	/ We have to remember the cell boundaries in local buffer because*
4575	* ctx->marks[] shall be reused during cell contents processing. */
4576	n = ctx->n_table_cell_boundaries + `2`;
4577	pipe_offs = (OFF) malloc(size: n sizeof(OFF));
4578	if(pipe_offs == NULL) {
4579	MD_LOG("malloc() failed.");
4580	ret = -`1`;
4581	goto abort;
4582	}
4583	j = `0`;
4584	pipe_offs[j++] = beg;
4585	for(i = ctx->table_cell_boundaries_head; i >= `0`; i = ctx->marks[i].next) {
4586	MD_MARK* mark = &ctx->marks[i];
4587	pipe_offs[j++] = mark->end;
4588	}
4589	pipe_offs[j++] = end+`1`;
4590
4591	/ Process cells. /
4592	MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4593	k = `0`;
4594	for(i = `0`; i < j-`1` && k < col_count; i++) {
4595	if(pipe_offs[i] < pipe_offs[i+`1`]-`1`)
4596	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+`1`]-`1`));
4597	}
4598	/ Make sure we call enough table cells even if the current table contains*
4599	* too few of them. */
4600	while(k < col_count)
4601	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], `0`, `0`));
4602	MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4603
4604	abort:
4605	free(ptr: pipe_offs);
4606
4607	ctx->table_cell_boundaries_head = -`1`;
4608	ctx->table_cell_boundaries_tail = -`1`;
4609
4610	return ret;
4611	}
4612
4613	static int
4614	md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, MD_SIZE n_lines)
4615	{
4616	MD_ALIGN* align;
4617	MD_SIZE line_index;
4618	int ret = `0`;
4619
4620	/ At least two lines have to be present: The column headers and the line*
4621	* with the underlines. */
4622	MD_ASSERT(n_lines >= `2`);
4623
4624	align = malloc(size: col_count * sizeof(MD_ALIGN));
4625	if(align == NULL) {
4626	MD_LOG("malloc() failed.");
4627	ret = -`1`;
4628	goto abort;
4629	}
4630
4631	md_analyze_table_alignment(ctx, beg: lines[`1`].beg, end: lines[`1`].end, align, n_align: col_count);
4632
4633	MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4634	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4635	lines[`0`].beg, lines[`0`].end, align, col_count));
4636	MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4637
4638	if(n_lines > `2`) {
4639	MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4640	for(line_index = `2`; line_index < n_lines; line_index++) {
4641	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4642	lines[line_index].beg, lines[line_index].end, align, col_count));
4643	}
4644	MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4645	}
4646
4647	abort:
4648	free(ptr: align);
4649	return ret;
4650	}
4651
4652
4653	/**************************
4654	* Processing Block *
4655	**************************/
4656
4657	#define MD_BLOCK_CONTAINER_OPENER 0x01
4658	#define MD_BLOCK_CONTAINER_CLOSER 0x02
4659	#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER \| MD_BLOCK_CONTAINER_CLOSER)
4660	#define MD_BLOCK_LOOSE_LIST 0x04
4661	#define MD_BLOCK_SETEXT_HEADER 0x08
4662
4663	struct MD_BLOCK_tag {
4664	MD_BLOCKTYPE type : `8`;
4665	unsigned flags : `8`;
4666
4667	/ MD_BLOCK_H: Header level (1 - 6)*
4668	* MD_BLOCK_CODE: Non-zero if fenced, zero if indented.
4669	* MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' ').
4670	* MD_BLOCK_TABLE: Column count (as determined by the table underline).
4671	*/
4672	unsigned data : `16`;
4673
4674	/ Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.*
4675	* MD_BLOCK_LI: Task mark offset in the input doc.
4676	* MD_BLOCK_OL: Start item number.
4677	*/
4678	MD_SIZE n_lines;
4679	};
4680
4681	struct MD_CONTAINER_tag {
4682	CHAR ch;
4683	unsigned is_loose : `8`;
4684	unsigned is_task : `8`;
4685	unsigned start;
4686	unsigned mark_indent;
4687	unsigned contents_indent;
4688	OFF block_byte_off;
4689	OFF task_mark_off;
4690	};
4691
4692
4693	static int
4694	md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
4695	{
4696	int i;
4697	int ret;
4698
4699	MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4700	MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4701
4702	abort:
4703	/ Free any temporary memory blocks stored within some dummy marks. /
4704	for(i = ctx->ptr_stack.top; i >= `0`; i = ctx->marks[i].next)
4705	free(ptr: md_mark_get_ptr(ctx, mark_index: i));
4706	ctx->ptr_stack.top = -`1`;
4707
4708	return ret;
4709	}
4710
4711	static int
4712	md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4713	{
4714	static const CHAR indent_chunk_str[] = _T(" ");
4715	static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - `1`;
4716
4717	MD_SIZE line_index;
4718	int ret = `0`;
4719
4720	for(line_index = `0`; line_index < n_lines; line_index++) {
4721	const MD_VERBATIMLINE* line = &lines[line_index];
4722	int indent = line->indent;
4723
4724	MD_ASSERT(indent >= `0`);
4725
4726	/ Output code indentation. /
4727	while(indent > (int) indent_chunk_size) {
4728	MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4729	indent -= indent_chunk_size;
4730	}
4731	if(indent > `0`)
4732	MD_TEXT(text_type, indent_chunk_str, indent);
4733
4734	/ Output the code line itself. /
4735	MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4736
4737	/ Enforce end-of-line. /
4738	MD_TEXT(text_type, _T("\n"), `1`);
4739	}
4740
4741	abort:
4742	return ret;
4743	}
4744
4745	static int
4746	md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4747	{
4748	if(is_fenced) {
4749	/ Skip the first line in case of fenced code: It is the fence.*
4750	* (Only the starting fence is present due to logic in md_analyze_line().) */
4751	lines++;
4752	n_lines--;
4753	} else {
4754	/ Ignore blank lines at start/end of indented code block. /
4755	while(n_lines > `0` && lines[`0`].beg == lines[`0`].end) {
4756	lines++;
4757	n_lines--;
4758	}
4759	while(n_lines > `0` && lines[n_lines-`1`].beg == lines[n_lines-`1`].end) {
4760	n_lines--;
4761	}
4762	}
4763
4764	if(n_lines == `0`)
4765	return `0`;
4766
4767	return md_process_verbatim_block_contents(ctx, text_type: MD_TEXT_CODE, lines, n_lines);
4768	}
4769
4770	static int
4771	md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4772	MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4773	{
4774	const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + `1`);
4775	OFF beg = fence_line->beg;
4776	OFF end = fence_line->end;
4777	OFF lang_end;
4778	CHAR fence_ch = CH(fence_line->beg);
4779	int ret = `0`;
4780
4781	/ Skip the fence itself. /
4782	while(beg < ctx->size && CH(beg) == fence_ch)
4783	beg++;
4784	/ Trim initial spaces. /
4785	while(beg < ctx->size && CH(beg) == _T(`' '`))
4786	beg++;
4787
4788	/ Trim trailing spaces. /
4789	while(end > beg && CH(end-`1`) == _T(`' '`))
4790	end--;
4791
4792	/ Build info string attribute. /
4793	MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, `0`, &det->info, info_build));
4794
4795	/ Build info string attribute. /
4796	lang_end = beg;
4797	while(lang_end < end && !ISWHITESPACE(lang_end))
4798	lang_end++;
4799	MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, `0`, &det->lang, lang_build));
4800
4801	det->fence_char = fence_ch;
4802
4803	abort:
4804	return ret;
4805	}
4806
4807	static int
4808	md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4809	{
4810	union {
4811	MD_BLOCK_H_DETAIL header;
4812	MD_BLOCK_CODE_DETAIL code;
4813	MD_BLOCK_TABLE_DETAIL table;
4814	} det;
4815	MD_ATTRIBUTE_BUILD info_build;
4816	MD_ATTRIBUTE_BUILD lang_build;
4817	int is_in_tight_list;
4818	int clean_fence_code_detail = FALSE;
4819	int ret = `0`;
4820
4821	memset(s: &det, c: `0`, n: sizeof(det));
4822
4823	if(ctx->n_containers == `0`)
4824	is_in_tight_list = FALSE;
4825	else
4826	is_in_tight_list = !ctx->containers[ctx->n_containers-`1`].is_loose;
4827
4828	switch(block->type) {
4829	case MD_BLOCK_H:
4830	det.header.level = block->data;
4831	break;
4832
4833	case MD_BLOCK_CODE:
4834	/ For fenced code block, we may need to set the info string. /
4835	if(block->data != `0`) {
4836	memset(s: &det.code, c: `0`, n: sizeof(MD_BLOCK_CODE_DETAIL));
4837	clean_fence_code_detail = TRUE;
4838	MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4839	}
4840	break;
4841
4842	case MD_BLOCK_TABLE:
4843	det.table.col_count = block->data;
4844	det.table.head_row_count = `1`;
4845	det.table.body_row_count = block->n_lines - `2`;
4846	break;
4847
4848	default:
4849	/ Noop. /
4850	break;
4851	}
4852
4853	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4854	MD_ENTER_BLOCK(block->type, (void*) &det);
4855
4856	/ Process the block contents accordingly to is type. /
4857	switch(block->type) {
4858	case MD_BLOCK_HR:
4859	/ noop /
4860	break;
4861
4862	case MD_BLOCK_CODE:
4863	MD_CHECK(md_process_code_block_contents(ctx, (block->data != `0`),
4864	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4865	break;
4866
4867	case MD_BLOCK_HTML:
4868	MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4869	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4870	break;
4871
4872	case MD_BLOCK_TABLE:
4873	MD_CHECK(md_process_table_block_contents(ctx, block->data,
4874	(const MD_LINE*)(block + `1`), block->n_lines));
4875	break;
4876
4877	default:
4878	MD_CHECK(md_process_normal_block_contents(ctx,
4879	(const MD_LINE*)(block + `1`), block->n_lines));
4880	break;
4881	}
4882
4883	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4884	MD_LEAVE_BLOCK(block->type, (void*) &det);
4885
4886	abort:
4887	if(clean_fence_code_detail) {
4888	md_free_attribute(ctx, build: &info_build);
4889	md_free_attribute(ctx, build: &lang_build);
4890	}
4891	return ret;
4892	}
4893
4894	static int
4895	md_process_all_blocks(MD_CTX* ctx)
4896	{
4897	int byte_off = `0`;
4898	int ret = `0`;
4899
4900	/ ctx->containers now is not needed for detection of lists and list items*
4901	* so we reuse it for tracking what lists are loose or tight. We rely
4902	* on the fact the vector is large enough to hold the deepest nesting
4903	* level of lists. */
4904	ctx->n_containers = `0`;
4905
4906	while(byte_off < ctx->n_block_bytes) {
4907	MD_BLOCK* block = (MD_BLOCK)((char**)ctx->block_bytes + byte_off);
4908	union {
4909	MD_BLOCK_UL_DETAIL ul;
4910	MD_BLOCK_OL_DETAIL ol;
4911	MD_BLOCK_LI_DETAIL li;
4912	} det;
4913
4914	switch(block->type) {
4915	case MD_BLOCK_UL:
4916	det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4917	det.ul.mark = (CHAR) block->data;
4918	break;
4919
4920	case MD_BLOCK_OL:
4921	det.ol.start = block->n_lines;
4922	det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4923	det.ol.mark_delimiter = (CHAR) block->data;
4924	break;
4925
4926	case MD_BLOCK_LI:
4927	det.li.is_task = (block->data != `0`);
4928	det.li.task_mark = (CHAR) block->data;
4929	det.li.task_mark_offset = (OFF) block->n_lines;
4930	break;
4931
4932	default:
4933	/ noop /
4934	break;
4935	}
4936
4937	if(block->flags & MD_BLOCK_CONTAINER) {
4938	if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4939	MD_LEAVE_BLOCK(block->type, &det);
4940
4941	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL \|\| block->type == MD_BLOCK_QUOTE)
4942	ctx->n_containers--;
4943	}
4944
4945	if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4946	MD_ENTER_BLOCK(block->type, &det);
4947
4948	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL) {
4949	ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4950	ctx->n_containers++;
4951	} else if(block->type == MD_BLOCK_QUOTE) {
4952	/ This causes that any text in a block quote, even if*
4953	* nested inside a tight list item, is wrapped with
4954	* <p>...</p>. */
4955	ctx->containers[ctx->n_containers].is_loose = TRUE;
4956	ctx->n_containers++;
4957	}
4958	}
4959	} else {
4960	MD_CHECK(md_process_leaf_block(ctx, block));
4961
4962	if(block->type == MD_BLOCK_CODE \|\| block->type == MD_BLOCK_HTML)
4963	byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4964	else
4965	byte_off += block->n_lines * sizeof(MD_LINE);
4966	}
4967
4968	byte_off += sizeof(MD_BLOCK);
4969	}
4970
4971	ctx->n_block_bytes = `0`;
4972
4973	abort:
4974	return ret;
4975	}
4976
4977
4978	/************************************
4979	* Grouping Lines into Blocks *
4980	************************************/
4981
4982	static void*
4983	md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4984	{
4985	void* ptr;
4986
4987	if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4988	void* new_block_bytes;
4989
4990	ctx->alloc_block_bytes = (ctx->alloc_block_bytes > `0`
4991	? ctx->alloc_block_bytes + ctx->alloc_block_bytes / `2`
4992	: `512`);
4993	new_block_bytes = realloc(ptr: ctx->block_bytes, size: ctx->alloc_block_bytes);
4994	if(new_block_bytes == NULL) {
4995	MD_LOG("realloc() failed.");
4996	return NULL;
4997	}
4998
4999	/ Fix the ->current_block after the reallocation. /
5000	if(ctx->current_block != NULL) {
5001	OFF off_current_block = (OFF) ((char) ctx->current_block - (char**) ctx->block_bytes);
5002	ctx->current_block = (MD_BLOCK) ((char**) new_block_bytes + off_current_block);
5003	}
5004
5005	ctx->block_bytes = new_block_bytes;
5006	}
5007
5008	ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
5009	ctx->n_block_bytes += n_bytes;
5010	return ptr;
5011	}
5012
5013	static int
5014	md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
5015	{
5016	MD_BLOCK* block;
5017
5018	MD_ASSERT(ctx->current_block == NULL);
5019
5020	block = (MD_BLOCK) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_BLOCK));
5021	if(block == NULL)
5022	return -`1`;
5023
5024	switch(line->type) {
5025	case MD_LINE_HR:
5026	block->type = MD_BLOCK_HR;
5027	break;
5028
5029	case MD_LINE_ATXHEADER:
5030	case MD_LINE_SETEXTHEADER:
5031	block->type = MD_BLOCK_H;
5032	break;
5033
5034	case MD_LINE_FENCEDCODE:
5035	case MD_LINE_INDENTEDCODE:
5036	block->type = MD_BLOCK_CODE;
5037	break;
5038
5039	case MD_LINE_TEXT:
5040	block->type = MD_BLOCK_P;
5041	break;
5042
5043	case MD_LINE_HTML:
5044	block->type = MD_BLOCK_HTML;
5045	break;
5046
5047	case MD_LINE_BLANK:
5048	case MD_LINE_SETEXTUNDERLINE:
5049	case MD_LINE_TABLEUNDERLINE:
5050	default:
5051	MD_UNREACHABLE();
5052	break;
5053	}
5054
5055	block->flags = `0`;
5056	block->data = line->data;
5057	block->n_lines = `0`;
5058
5059	ctx->current_block = block;
5060	return `0`;
5061	}
5062
5063	/ Eat from start of current (textual) block any reference definitions and*
5064	* remember them so we can resolve any links referring to them.
5065	*
5066	* (Reference definitions can only be at start of it as they cannot break
5067	* a paragraph.)
5068	*/
5069	static int
5070	md_consume_link_reference_definitions(MD_CTX* ctx)
5071	{
5072	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
5073	MD_SIZE n_lines = ctx->current_block->n_lines;
5074	MD_SIZE n = `0`;
5075
5076	/ Compute how many lines at the start of the block form one or more*
5077	* reference definitions. */
5078	while(n < n_lines) {
5079	int n_link_ref_lines;
5080
5081	n_link_ref_lines = md_is_link_reference_definition(ctx,
5082	lines: lines + n, n_lines: n_lines - n);
5083	/ Not a reference definition? /
5084	if(n_link_ref_lines == `0`)
5085	break;
5086
5087	/ We fail if it is the ref. def. but it could not be stored due*
5088	* a memory allocation error. */
5089	if(n_link_ref_lines < `0`)
5090	return -`1`;
5091
5092	n += n_link_ref_lines;
5093	}
5094
5095	/ If there was at least one reference definition, we need to remove*
5096	* its lines from the block, or perhaps even the whole block. */
5097	if(n > `0`) {
5098	if(n == n_lines) {
5099	/ Remove complete block. /
5100	ctx->n_block_bytes -= n * sizeof(MD_LINE);
5101	ctx->n_block_bytes -= sizeof(MD_BLOCK);
5102	ctx->current_block = NULL;
5103	} else {
5104	/ Remove just some initial lines from the block. /
5105	memmove(dest: lines, src: lines + n, n: (n_lines - n) * sizeof(MD_LINE));
5106	ctx->current_block->n_lines -= n;
5107	ctx->n_block_bytes -= n * sizeof(MD_LINE);
5108	}
5109	}
5110
5111	return `0`;
5112	}
5113
5114	static int
5115	md_end_current_block(MD_CTX* ctx)
5116	{
5117	int ret = `0`;
5118
5119	if(ctx->current_block == NULL)
5120	return ret;
5121
5122	/ Check whether there is a reference definition. (We do this here instead*
5123	* of in md_analyze_line() because reference definition can take multiple
5124	* lines.) */
5125	if(ctx->current_block->type == MD_BLOCK_P \|\|
5126	(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
5127	{
5128	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
5129	if(lines[`0`].beg < ctx->size && CH(lines[`0`].beg) == _T(`'['`)) {
5130	MD_CHECK(md_consume_link_reference_definitions(ctx));
5131	if(ctx->current_block == NULL)
5132	return ret;
5133	}
5134	}
5135
5136	if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
5137	MD_SIZE n_lines = ctx->current_block->n_lines;
5138
5139	if(n_lines > `1`) {
5140	/ Get rid of the underline. /
5141	ctx->current_block->n_lines--;
5142	ctx->n_block_bytes -= sizeof(MD_LINE);
5143	} else {
5144	/ Only the underline has left after eating the ref. defs.*
5145	* Keep the line as beginning of a new ordinary paragraph. */
5146	ctx->current_block->type = MD_BLOCK_P;
5147	return `0`;
5148	}
5149	}
5150
5151	/ Mark we are not building any block anymore. /
5152	ctx->current_block = NULL;
5153
5154	abort:
5155	return ret;
5156	}
5157
5158	static int
5159	md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5160	{
5161	MD_ASSERT(ctx->current_block != NULL);
5162
5163	if(ctx->current_block->type == MD_BLOCK_CODE \|\| ctx->current_block->type == MD_BLOCK_HTML) {
5164	MD_VERBATIMLINE* line;
5165
5166	line = (MD_VERBATIMLINE) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_VERBATIMLINE));
5167	if(line == NULL)
5168	return -`1`;
5169
5170	line->indent = analysis->indent;
5171	line->beg = analysis->beg;
5172	line->end = analysis->end;
5173	} else {
5174	MD_LINE* line;
5175
5176	line = (MD_LINE) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_LINE));
5177	if(line == NULL)
5178	return -`1`;
5179
5180	line->beg = analysis->beg;
5181	line->end = analysis->end;
5182	}
5183	ctx->current_block->n_lines++;
5184
5185	return `0`;
5186	}
5187
5188	static int
5189	md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5190	unsigned data, unsigned flags)
5191	{
5192	MD_BLOCK* block;
5193	int ret = `0`;
5194
5195	MD_CHECK(md_end_current_block(ctx));
5196
5197	block = (MD_BLOCK) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_BLOCK));
5198	if(block == NULL)
5199	return -`1`;
5200
5201	block->type = type;
5202	block->flags = flags;
5203	block->data = data;
5204	block->n_lines = start;
5205
5206	abort:
5207	return ret;
5208	}
5209
5210
5211
5212	/***********************
5213	* Line Analysis *
5214	***********************/
5215
5216	static int
5217	md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5218	{
5219	OFF off = beg + `1`;
5220	int n = `1`;
5221
5222	while(off < ctx->size && (CH(off) == CH(beg) \|\| CH(off) == _T(`' '`) \|\| CH(off) == _T(`'\t'`))) {
5223	if(CH(off) == CH(beg))
5224	n++;
5225	off++;
5226	}
5227
5228	if(n < `3`) {
5229	*p_killer = off;
5230	return FALSE;
5231	}
5232
5233	/ Nothing else can be present on the line. /
5234	if(off < ctx->size && !ISNEWLINE(off)) {
5235	*p_killer = off;
5236	return FALSE;
5237	}
5238
5239	*p_end = off;
5240	return TRUE;
5241	}
5242
5243	static int
5244	md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5245	{
5246	int n;
5247	OFF off = beg + `1`;
5248
5249	while(off < ctx->size && CH(off) == _T(`'#'`) && off - beg < `7`)
5250	off++;
5251	n = off - beg;
5252
5253	if(n > `6`)
5254	return FALSE;
5255	*p_level = n;
5256
5257	if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
5258	CH(off) != _T(`' '`) && CH(off) != _T(`'\t'`) && !ISNEWLINE(off))
5259	return FALSE;
5260
5261	while(off < ctx->size && CH(off) == _T(`' '`))
5262	off++;
5263	*p_beg = off;
5264	*p_end = off;
5265	return TRUE;
5266	}
5267
5268	static int
5269	md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5270	{
5271	OFF off = beg + `1`;
5272
5273	while(off < ctx->size && CH(off) == CH(beg))
5274	off++;
5275
5276	/ Optionally, space(s) or tabs can follow. /
5277	while(off < ctx->size && ISBLANK(off))
5278	off++;
5279
5280	/ But nothing more is allowed on the line. /
5281	if(off < ctx->size && !ISNEWLINE(off))
5282	return FALSE;
5283
5284	*p_level = (CH(beg) == _T(`'='`) ? `1` : `2`);
5285	*p_end = off;
5286	return TRUE;
5287	}
5288
5289	static int
5290	md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5291	{
5292	OFF off = beg;
5293	int found_pipe = FALSE;
5294	unsigned col_count = `0`;
5295
5296	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5297	found_pipe = TRUE;
5298	off++;
5299	while(off < ctx->size && ISWHITESPACE(off))
5300	off++;
5301	}
5302
5303	while(`1`) {
5304	int delimited = FALSE;
5305
5306	/ Cell underline ("-----", ":----", "----:" or ":----:") /
5307	if(off < ctx->size && CH(off) == _T(`':'`))
5308	off++;
5309	if(off >= ctx->size \|\| CH(off) != _T(`'-'`))
5310	return FALSE;
5311	while(off < ctx->size && CH(off) == _T(`'-'`))
5312	off++;
5313	if(off < ctx->size && CH(off) == _T(`':'`))
5314	off++;
5315
5316	col_count++;
5317	if(col_count > TABLE_MAXCOLCOUNT) {
5318	MD_LOG("Suppressing table (column_count >" STRINGIZE(TABLE_MAXCOLCOUNT) ")");
5319	return FALSE;
5320	}
5321
5322	/ Pipe delimiter (optional at the end of line). /
5323	while(off < ctx->size && ISWHITESPACE(off))
5324	off++;
5325	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5326	delimited = TRUE;
5327	found_pipe = TRUE;
5328	off++;
5329	while(off < ctx->size && ISWHITESPACE(off))
5330	off++;
5331	}
5332
5333	/ Success, if we reach end of line. /
5334	if(off >= ctx->size \|\| ISNEWLINE(off))
5335	break;
5336
5337	if(!delimited)
5338	return FALSE;
5339	}
5340
5341	if(!found_pipe)
5342	return FALSE;
5343
5344	*p_end = off;
5345	*p_col_count = col_count;
5346	return TRUE;
5347	}
5348
5349	static int
5350	md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5351	{
5352	OFF off = beg;
5353
5354	while(off < ctx->size && CH(off) == CH(beg))
5355	off++;
5356
5357	/ Fence must have at least three characters. /
5358	if(off - beg < `3`)
5359	return FALSE;
5360
5361	ctx->code_fence_length = off - beg;
5362
5363	/ Optionally, space(s) can follow. /
5364	while(off < ctx->size && CH(off) == _T(`' '`))
5365	off++;
5366
5367	/ Optionally, an info string can follow. /
5368	while(off < ctx->size && !ISNEWLINE(off)) {
5369	/ Backtick-based fence must not contain '`' in the info string. /
5370	if(CH(beg) == _T('`') && CH(off) == _T('`'))
5371	return FALSE;
5372	off++;
5373	}
5374
5375	*p_end = off;
5376	return TRUE;
5377	}
5378
5379	static int
5380	md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5381	{
5382	OFF off = beg;
5383	int ret = FALSE;
5384
5385	/ Closing fence must have at least the same length and use same char as*
5386	* opening one. */
5387	while(off < ctx->size && CH(off) == ch)
5388	off++;
5389	if(off - beg < ctx->code_fence_length)
5390	goto out;
5391
5392	/ Optionally, space(s) can follow /
5393	while(off < ctx->size && CH(off) == _T(`' '`))
5394	off++;
5395
5396	/ But nothing more is allowed on the line. /
5397	if(off < ctx->size && !ISNEWLINE(off))
5398	goto out;
5399
5400	ret = TRUE;
5401
5402	out:
5403	/ Note we set p_end even on failure: If we are not closing fence, caller
5404	* would eat the line anyway without any parsing. */
5405	*p_end = off;
5406	return ret;
5407	}
5408
5409
5410	/ Helper data for md_is_html_block_start_condition() and*
5411	* md_is_html_block_end_condition() */
5412	typedef struct TAG_tag TAG;
5413	struct TAG_tag {
5414	const CHAR* name;
5415	unsigned len : `8`;
5416	};
5417
5418	#ifdef X
5419	#undef X
5420	#endif
5421	#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5422	#define Xend { NULL, 0 }
5423
5424	static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend };
5425
5426	static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5427	static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5428	static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5429	static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5430	X("div"), X("dl"), X("dt"), Xend };
5431	static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5432	X("form"), X("frame"), X("frameset"), Xend };
5433	static const TAG h6[] = { X("h1"), X("h2"), X("h3"), X("h4"), X("h5"), X("h6"),
5434	X("head"), X("header"), X("hr"), X("html"), Xend };
5435	static const TAG i6[] = { X("iframe"), Xend };
5436	static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5437	static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5438	static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5439	static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5440	static const TAG p6[] = { X("p"), X("param"), Xend };
5441	static const TAG s6[] = { X("search"), X("section"), X("summary"), Xend };
5442	static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5443	X("thead"), X("title"), X("tr"), X("track"), Xend };
5444	static const TAG u6[] = { X("ul"), Xend };
5445	static const TAG xx[] = { Xend };
5446
5447	#undef X
5448	#undef Xend
5449
5450	/ Returns type of the raw HTML block, or FALSE if it is not HTML block.*
5451	* (Refer to CommonMark specification for details about the types.)
5452	*/
5453	static int
5454	md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5455	{
5456	/ Type 6 is started by a long list of allowed tags. We use two-level*
5457	* tree to speed-up the search. */
5458	static const TAG* map6[`26`] = {
5459	a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5460	n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5461	};
5462	OFF off = beg + `1`;
5463	int i;
5464
5465	/ Check for type 1: <script, <pre, or <style /
5466	for(i = `0`; t1[i].name != NULL; i++) {
5467	if(off + t1[i].len <= ctx->size) {
5468	if(md_ascii_case_eq(STR(off), s2: t1[i].name, n: t1[i].len))
5469	return `1`;
5470	}
5471	}
5472
5473	/ Check for type 2: <!-- /
5474	if(off + `3` < ctx->size && CH(off) == _T(`'!'`) && CH(off+`1`) == _T(`'-'`) && CH(off+`2`) == _T(`'-'`))
5475	return `2`;
5476
5477	/ Check for type 3: <? /
5478	if(off < ctx->size && CH(off) == _T(`'?'`))
5479	return `3`;
5480
5481	/ Check for type 4 or 5: <! /
5482	if(off < ctx->size && CH(off) == _T(`'!'`)) {
5483	/ Check for type 4: <! followed by uppercase letter. /
5484	if(off + `1` < ctx->size && ISASCII(off+`1`))
5485	return `4`;
5486
5487	/ Check for type 5: <![CDATA[ /
5488	if(off + `8` < ctx->size) {
5489	if(md_ascii_eq(STR(off), _T("![CDATA["), n: `8`))
5490	return `5`;
5491	}
5492	}
5493
5494	/ Check for type 6: Many possible starting tags listed above. /
5495	if(off + `1` < ctx->size && (ISALPHA(off) \|\| (CH(off) == _T(`'/'`) && ISALPHA(off+`1`)))) {
5496	int slot;
5497	const TAG* tags;
5498
5499	if(CH(off) == _T(`'/'`))
5500	off++;
5501
5502	slot = (ISUPPER(off) ? CH(off) - `'A'` : CH(off) - `'a'`);
5503	tags = map6[slot];
5504
5505	for(i = `0`; tags[i].name != NULL; i++) {
5506	if(off + tags[i].len <= ctx->size) {
5507	if(md_ascii_case_eq(STR(off), s2: tags[i].name, n: tags[i].len)) {
5508	OFF tmp = off + tags[i].len;
5509	if(tmp >= ctx->size)
5510	return `6`;
5511	if(ISBLANK(tmp) \|\| ISNEWLINE(tmp) \|\| CH(tmp) == _T(`'>'`))
5512	return `6`;
5513	if(tmp+`1` < ctx->size && CH(tmp) == _T(`'/'`) && CH(tmp+`1`) == _T(`'>'`))
5514	return `6`;
5515	break;
5516	}
5517	}
5518	}
5519	}
5520
5521	/ Check for type 7: any COMPLETE other opening or closing tag. /
5522	if(off + `1` < ctx->size) {
5523	OFF end;
5524
5525	if(md_is_html_tag(ctx, NULL, n_lines: `0`, beg, max_end: ctx->size, p_end: &end)) {
5526	/ Only optional whitespace and new line may follow. /
5527	while(end < ctx->size && ISWHITESPACE(end))
5528	end++;
5529	if(end >= ctx->size \|\| ISNEWLINE(end))
5530	return `7`;
5531	}
5532	}
5533
5534	return FALSE;
5535	}
5536
5537	/ Case sensitive check whether there is a substring 'what' between 'beg'*
5538	* and end of line. */
5539	static int
5540	md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5541	{
5542	OFF i;
5543	for(i = beg; i + what_len < ctx->size; i++) {
5544	if(ISNEWLINE(i))
5545	break;
5546	if(memcmp(STR(i), s2: what, n: what_len * sizeof(CHAR)) == `0`) {
5547	*p_end = i + what_len;
5548	return TRUE;
5549	}
5550	}
5551
5552	*p_end = i;
5553	return FALSE;
5554	}
5555
5556	/ Returns type of HTML block end condition or FALSE if not an end condition.*
5557	*
5558	* Note it fills p_end even when it is not end condition as the caller
5559	* does not need to analyze contents of a raw HTML block.
5560	*/
5561	static int
5562	md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5563	{
5564	switch(ctx->html_block_type) {
5565	case `1`:
5566	{
5567	OFF off = beg;
5568	int i;
5569
5570	while(off+`1` < ctx->size && !ISNEWLINE(off)) {
5571	if(CH(off) == _T(`'<'`) && CH(off+`1`) == _T(`'/'`)) {
5572	for(i = `0`; t1[i].name != NULL; i++) {
5573	if(off + `2` + t1[i].len < ctx->size) {
5574	if(md_ascii_case_eq(STR(off+`2`), s2: t1[i].name, n: t1[i].len) &&
5575	CH(off+`2`+t1[i].len) == _T(`'>'`))
5576	{
5577	*p_end = off+`2`+t1[i].len+`1`;
5578	return TRUE;
5579	}
5580	}
5581	}
5582	}
5583	off++;
5584	}
5585	*p_end = off;
5586	return FALSE;
5587	}
5588
5589	case `2`:
5590	return (md_line_contains(ctx, beg, _T("-->"), what_len: `3`, p_end) ? `2` : FALSE);
5591
5592	case `3`:
5593	return (md_line_contains(ctx, beg, _T("?>"), what_len: `2`, p_end) ? `3` : FALSE);
5594
5595	case `4`:
5596	return (md_line_contains(ctx, beg, _T(">"), what_len: `1`, p_end) ? `4` : FALSE);
5597
5598	case `5`:
5599	return (md_line_contains(ctx, beg, _T("]]>"), what_len: `3`, p_end) ? `5` : FALSE);
5600
5601	case `6`: / Pass through /
5602	case `7`:
5603	if(beg >= ctx->size \|\| ISNEWLINE(beg)) {
5604	/ Blank line ends types 6 and 7. /
5605	*p_end = beg;
5606	return ctx->html_block_type;
5607	}
5608	return FALSE;
5609
5610	default:
5611	MD_UNREACHABLE();
5612	}
5613	return FALSE;
5614	}
5615
5616
5617	static int
5618	md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5619	{
5620	/ Block quote has no "items" like lists. /
5621	if(container->ch == _T(`'>'`))
5622	return FALSE;
5623
5624	if(container->ch != pivot->ch)
5625	return FALSE;
5626	if(container->mark_indent > pivot->contents_indent)
5627	return FALSE;
5628
5629	return TRUE;
5630	}
5631
5632	static int
5633	md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5634	{
5635	if(ctx->n_containers >= ctx->alloc_containers) {
5636	MD_CONTAINER* new_containers;
5637
5638	ctx->alloc_containers = (ctx->alloc_containers > `0`
5639	? ctx->alloc_containers + ctx->alloc_containers / `2`
5640	: `16`);
5641	new_containers = realloc(ptr: ctx->containers, size: ctx->alloc_containers * sizeof(MD_CONTAINER));
5642	if(new_containers == NULL) {
5643	MD_LOG("realloc() failed.");
5644	return -`1`;
5645	}
5646
5647	ctx->containers = new_containers;
5648	}
5649
5650	memcpy(dest: &ctx->containers[ctx->n_containers++], src: container, n: sizeof(MD_CONTAINER));
5651	return `0`;
5652	}
5653
5654	static int
5655	md_enter_child_containers(MD_CTX* ctx, int n_children)
5656	{
5657	int i;
5658	int ret = `0`;
5659
5660	for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5661	MD_CONTAINER* c = &ctx->containers[i];
5662	int is_ordered_list = FALSE;
5663
5664	switch(c->ch) {
5665	case _T(`')'`):
5666	case _T(`'.'`):
5667	is_ordered_list = TRUE;
5668	MD_FALLTHROUGH();
5669
5670	case _T(`'-'`):
5671	case _T(`'+'`):
5672	case _T(`'*'`):
5673	/ Remember offset in ctx->block_bytes so we can revisit the*
5674	* block if we detect it is a loose list. */
5675	md_end_current_block(ctx);
5676	c->block_byte_off = ctx->n_block_bytes;
5677
5678	MD_CHECK(md_push_container_bytes(ctx,
5679	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5680	c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
5681	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5682	c->task_mark_off,
5683	(c->is_task ? CH(c->task_mark_off) : `0`),
5684	MD_BLOCK_CONTAINER_OPENER));
5685	break;
5686
5687	case _T(`'>'`):
5688	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`, `0`, MD_BLOCK_CONTAINER_OPENER));
5689	break;
5690
5691	default:
5692	MD_UNREACHABLE();
5693	break;
5694	}
5695	}
5696
5697	abort:
5698	return ret;
5699	}
5700
5701	static int
5702	md_leave_child_containers(MD_CTX* ctx, int n_keep)
5703	{
5704	int ret = `0`;
5705
5706	while(ctx->n_containers > n_keep) {
5707	MD_CONTAINER* c = &ctx->containers[ctx->n_containers-`1`];
5708	int is_ordered_list = FALSE;
5709
5710	switch(c->ch) {
5711	case _T(`')'`):
5712	case _T(`'.'`):
5713	is_ordered_list = TRUE;
5714	MD_FALLTHROUGH();
5715
5716	case _T(`'-'`):
5717	case _T(`'+'`):
5718	case _T(`'*'`):
5719	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5720	c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : `0`),
5721	MD_BLOCK_CONTAINER_CLOSER));
5722	MD_CHECK(md_push_container_bytes(ctx,
5723	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), `0`,
5724	c->ch, MD_BLOCK_CONTAINER_CLOSER));
5725	break;
5726
5727	case _T(`'>'`):
5728	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`,
5729	`0`, MD_BLOCK_CONTAINER_CLOSER));
5730	break;
5731
5732	default:
5733	MD_UNREACHABLE();
5734	break;
5735	}
5736
5737	ctx->n_containers--;
5738	}
5739
5740	abort:
5741	return ret;
5742	}
5743
5744	static int
5745	md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5746	{
5747	OFF off = beg;
5748	OFF max_end;
5749
5750	if(off >= ctx->size \|\| indent >= ctx->code_indent_offset)
5751	return FALSE;
5752
5753	/ Check for block quote mark. /
5754	if(CH(off) == _T(`'>'`)) {
5755	off++;
5756	p_container->ch = _T(`'>'`);
5757	p_container->is_loose = FALSE;
5758	p_container->is_task = FALSE;
5759	p_container->mark_indent = indent;
5760	p_container->contents_indent = indent + `1`;
5761	*p_end = off;
5762	return TRUE;
5763	}
5764
5765	/ Check for list item bullet mark. /
5766	if(ISANYOF(off, _T("-+*")) && (off+`1` >= ctx->size \|\| ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`))) {
5767	p_container->ch = CH(off);
5768	p_container->is_loose = FALSE;
5769	p_container->is_task = FALSE;
5770	p_container->mark_indent = indent;
5771	p_container->contents_indent = indent + `1`;
5772	*p_end = off+`1`;
5773	return TRUE;
5774	}
5775
5776	/ Check for ordered list item marks. /
5777	max_end = off + `9`;
5778	if(max_end > ctx->size)
5779	max_end = ctx->size;
5780	p_container->start = `0`;
5781	while(off < max_end && ISDIGIT(off)) {
5782	p_container->start = p_container->start * `10` + CH(off) - _T(`'0'`);
5783	off++;
5784	}
5785	if(off > beg &&
5786	off < ctx->size &&
5787	(CH(off) == _T(`'.'`) \|\| CH(off) == _T(`')'`)) &&
5788	(off+`1` >= ctx->size \|\| ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`)))
5789	{
5790	p_container->ch = CH(off);
5791	p_container->is_loose = FALSE;
5792	p_container->is_task = FALSE;
5793	p_container->mark_indent = indent;
5794	p_container->contents_indent = indent + off - beg + `1`;
5795	*p_end = off+`1`;
5796	return TRUE;
5797	}
5798
5799	return FALSE;
5800	}
5801
5802	static unsigned
5803	md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5804	{
5805	OFF off = beg;
5806	unsigned indent = total_indent;
5807
5808	while(off < ctx->size && ISBLANK(off)) {
5809	if(CH(off) == _T(`'\t'`))
5810	indent = (indent + `4`) & ~`3`;
5811	else
5812	indent++;
5813	off++;
5814	}
5815
5816	*p_end = off;
5817	return indent - total_indent;
5818	}
5819
5820	static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, `0`, `0`, `0`, `0`, `0` };
5821
5822	/ Analyze type of the line and find some its properties. This serves as a*
5823	* main input for determining type and boundaries of a block. */
5824	static int
5825	md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5826	const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5827	{
5828	unsigned total_indent = `0`;
5829	int n_parents = `0`;
5830	int n_brothers = `0`;
5831	int n_children = `0`;
5832	MD_CONTAINER container = { `0` };
5833	int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5834	OFF off = beg;
5835	OFF hr_killer = `0`;
5836	int ret = `0`;
5837
5838	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5839	total_indent += line->indent;
5840	line->beg = off;
5841	line->enforce_new_block = FALSE;
5842
5843	/ Given the indentation and block quote marks '>', determine how many of*
5844	* the current containers are our parents. */
5845	while(n_parents < ctx->n_containers) {
5846	MD_CONTAINER* c = &ctx->containers[n_parents];
5847
5848	if(c->ch == _T(`'>'`) && line->indent < ctx->code_indent_offset &&
5849	off < ctx->size && CH(off) == _T(`'>'`))
5850	{
5851	/ Block quote mark. /
5852	off++;
5853	total_indent++;
5854	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5855	total_indent += line->indent;
5856
5857	/ The optional 1st space after '>' is part of the block quote mark. /
5858	if(line->indent > `0`)
5859	line->indent--;
5860
5861	line->beg = off;
5862
5863	} else if(c->ch != _T(`'>'`) && line->indent >= c->contents_indent) {
5864	/ List. /
5865	line->indent -= c->contents_indent;
5866	} else {
5867	break;
5868	}
5869
5870	n_parents++;
5871	}
5872
5873	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5874	/ Blank line does not need any real indentation to be nested inside*
5875	* a list. */
5876	if(n_brothers + n_children == `0`) {
5877	while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T(`'>'`))
5878	n_parents++;
5879	}
5880	}
5881
5882	while(TRUE) {
5883	/ Check whether we are fenced code continuation. /
5884	if(pivot_line->type == MD_LINE_FENCEDCODE) {
5885	line->beg = off;
5886
5887	/ We are another MD_LINE_FENCEDCODE unless we are closing fence*
5888	* which we transform into MD_LINE_BLANK. */
5889	if(line->indent < ctx->code_indent_offset) {
5890	if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), beg: off, p_end: &off)) {
5891	line->type = MD_LINE_BLANK;
5892	ctx->last_line_has_list_loosening_effect = FALSE;
5893	break;
5894	}
5895	}
5896
5897	/ Change indentation accordingly to the initial code fence. /
5898	if(n_parents == ctx->n_containers) {
5899	if(line->indent > pivot_line->indent)
5900	line->indent -= pivot_line->indent;
5901	else
5902	line->indent = `0`;
5903
5904	line->type = MD_LINE_FENCEDCODE;
5905	break;
5906	}
5907	}
5908
5909	/ Check whether we are HTML block continuation. /
5910	if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > `0`) {
5911	if(n_parents < ctx->n_containers) {
5912	/ HTML block is implicitly ended if the enclosing container*
5913	* block ends. */
5914	ctx->html_block_type = `0`;
5915	} else {
5916	int html_block_type;
5917
5918	html_block_type = md_is_html_block_end_condition(ctx, beg: off, p_end: &off);
5919	if(html_block_type > `0`) {
5920	MD_ASSERT(html_block_type == ctx->html_block_type);
5921
5922	/ Make sure this is the last line of the block. /
5923	ctx->html_block_type = `0`;
5924
5925	/ Some end conditions serve as blank lines at the same time. /
5926	if(html_block_type == `6` \|\| html_block_type == `7`) {
5927	line->type = MD_LINE_BLANK;
5928	line->indent = `0`;
5929	break;
5930	}
5931	}
5932
5933	line->type = MD_LINE_HTML;
5934	n_parents = ctx->n_containers;
5935	break;
5936	}
5937	}
5938
5939	/ Check for blank line. /
5940	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5941	if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5942	line->type = MD_LINE_INDENTEDCODE;
5943	if(line->indent > ctx->code_indent_offset)
5944	line->indent -= ctx->code_indent_offset;
5945	else
5946	line->indent = `0`;
5947	ctx->last_line_has_list_loosening_effect = FALSE;
5948	} else {
5949	line->type = MD_LINE_BLANK;
5950	ctx->last_line_has_list_loosening_effect = (n_parents > `0` &&
5951	n_brothers + n_children == `0` &&
5952	ctx->containers[n_parents-`1`].ch != _T(`'>'`));
5953
5954	#if 1
5955	/ See https://github.com/mity/md4c/issues/6*
5956	*
5957	* This ugly checking tests we are in (yet empty) list item but
5958	* not its very first line (i.e. not the line with the list
5959	* item mark).
5960	*
5961	* If we are such a blank line, then any following non-blank
5962	* line which would be part of the list item actually has to
5963	* end the list because according to the specification, "a list
5964	* item can begin with at most one blank line."
5965	*/
5966	if(n_parents > `0` && ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5967	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5968	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5969	{
5970	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5971	if(top_block->type == MD_BLOCK_LI)
5972	ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5973	}
5974	#endif
5975	}
5976	break;
5977	} else {
5978	#if 1
5979	/ This is the 2nd half of the hack. If the flag is set (i.e. there*
5980	* was a 2nd blank line at the beginning of the list item) and if
5981	* we would otherwise still belong to the list item, we enforce
5982	* the end of the list. */
5983	if(ctx->last_list_item_starts_with_two_blank_lines) {
5984	if(n_parents > `0` && n_parents == ctx->n_containers &&
5985	ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5986	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5987	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5988	{
5989	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5990	if(top_block->type == MD_BLOCK_LI) {
5991	n_parents--;
5992
5993	line->indent = total_indent;
5994	if(n_parents > `0`)
5995	line->indent -= MIN(line->indent, ctx->containers[n_parents-`1`].contents_indent);
5996	}
5997	}
5998
5999	ctx->last_list_item_starts_with_two_blank_lines = FALSE;
6000	}
6001	#endif
6002	ctx->last_line_has_list_loosening_effect = FALSE;
6003	}
6004
6005	/ Check whether we are Setext underline. /
6006	if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
6007	&& off < ctx->size && ISANYOF2(off, _T(`'='`), _T(`'-'`))
6008	&& (n_parents == ctx->n_containers))
6009	{
6010	unsigned level;
6011
6012	if(md_is_setext_underline(ctx, beg: off, p_end: &off, p_level: &level)) {
6013	line->type = MD_LINE_SETEXTUNDERLINE;
6014	line->data = level;
6015	break;
6016	}
6017	}
6018
6019	/ Check for thematic break line. /
6020	if(line->indent < ctx->code_indent_offset
6021	&& off < ctx->size && off >= hr_killer
6022	&& ISANYOF(off, _T("-_*")))
6023	{
6024	if(md_is_hr_line(ctx, beg: off, p_end: &off, p_killer: &hr_killer)) {
6025	line->type = MD_LINE_HR;
6026	break;
6027	}
6028	}
6029
6030	/ Check for "brother" container. I.e. whether we are another list item*
6031	* in already started list. */
6032	if(n_parents < ctx->n_containers && n_brothers + n_children == `0`) {
6033	OFF tmp;
6034
6035	if(md_is_container_mark(ctx, indent: line->indent, beg: off, p_end: &tmp, p_container: &container) &&
6036	md_is_container_compatible(pivot: &ctx->containers[n_parents], container: &container))
6037	{
6038	pivot_line = &md_dummy_blank_line;
6039
6040	off = tmp;
6041
6042	total_indent += container.contents_indent - container.mark_indent;
6043	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
6044	total_indent += line->indent;
6045	line->beg = off;
6046
6047	/ Some of the following whitespace actually still belongs to the mark. /
6048	if(off >= ctx->size \|\| ISNEWLINE(off)) {
6049	container.contents_indent++;
6050	} else if(line->indent <= ctx->code_indent_offset) {
6051	container.contents_indent += line->indent;
6052	line->indent = `0`;
6053	} else {
6054	container.contents_indent += `1`;
6055	line->indent--;
6056	}
6057
6058	ctx->containers[n_parents].mark_indent = container.mark_indent;
6059	ctx->containers[n_parents].contents_indent = container.contents_indent;
6060
6061	n_brothers++;
6062	continue;
6063	}
6064	}
6065
6066	/ Check for indented code.*
6067	* Note indented code block cannot interrupt a paragraph. */
6068	if(line->indent >= ctx->code_indent_offset && (pivot_line->type != MD_LINE_TEXT)) {
6069	line->type = MD_LINE_INDENTEDCODE;
6070	line->indent -= ctx->code_indent_offset;
6071	line->data = `0`;
6072	break;
6073	}
6074
6075	/ Check for start of a new container block. /
6076	if(line->indent < ctx->code_indent_offset &&
6077	md_is_container_mark(ctx, indent: line->indent, beg: off, p_end: &off, p_container: &container))
6078	{
6079	if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
6080	(off >= ctx->size \|\| ISNEWLINE(off)) && container.ch != _T(`'>'`))
6081	{
6082	/ Noop. List mark followed by a blank line cannot interrupt a paragraph. /
6083	} else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
6084	ISANYOF2_(container.ch, _T(`'.'`), _T(`')'`)) && container.start != `1`)
6085	{
6086	/ Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. /
6087	} else {
6088	total_indent += container.contents_indent - container.mark_indent;
6089	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
6090	total_indent += line->indent;
6091
6092	line->beg = off;
6093	line->data = container.ch;
6094
6095	/ Some of the following whitespace actually still belongs to the mark. /
6096	if(off >= ctx->size \|\| ISNEWLINE(off)) {
6097	container.contents_indent++;
6098	} else if(line->indent <= ctx->code_indent_offset) {
6099	container.contents_indent += line->indent;
6100	line->indent = `0`;
6101	} else {
6102	container.contents_indent += `1`;
6103	line->indent--;
6104	}
6105
6106	if(n_brothers + n_children == `0`)
6107	pivot_line = &md_dummy_blank_line;
6108
6109	if(n_children == `0`)
6110	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6111
6112	n_children++;
6113	MD_CHECK(md_push_container(ctx, &container));
6114	continue;
6115	}
6116	}
6117
6118	/ Check whether we are table continuation. /
6119	if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
6120	line->type = MD_LINE_TABLE;
6121	break;
6122	}
6123
6124	/ Check for ATX header. /
6125	if(line->indent < ctx->code_indent_offset &&
6126	off < ctx->size && CH(off) == _T(`'#'`))
6127	{
6128	unsigned level;
6129
6130	if(md_is_atxheader_line(ctx, beg: off, p_beg: &line->beg, p_end: &off, p_level: &level)) {
6131	line->type = MD_LINE_ATXHEADER;
6132	line->data = level;
6133	break;
6134	}
6135	}
6136
6137	/ Check whether we are starting code fence. /
6138	if(line->indent < ctx->code_indent_offset &&
6139	off < ctx->size && ISANYOF2(off, _T('`'), _T(`'~'`)))
6140	{
6141	if(md_is_opening_code_fence(ctx, beg: off, p_end: &off)) {
6142	line->type = MD_LINE_FENCEDCODE;
6143	line->data = `1`;
6144	line->enforce_new_block = TRUE;
6145	break;
6146	}
6147	}
6148
6149	/ Check for start of raw HTML block. /
6150	if(off < ctx->size && CH(off) == _T(`'<'`)
6151	&& !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
6152	{
6153	ctx->html_block_type = md_is_html_block_start_condition(ctx, beg: off);
6154
6155	/ HTML block type 7 cannot interrupt paragraph. /
6156	if(ctx->html_block_type == `7` && pivot_line->type == MD_LINE_TEXT)
6157	ctx->html_block_type = `0`;
6158
6159	if(ctx->html_block_type > `0`) {
6160	/ The line itself also may immediately close the block. /
6161	if(md_is_html_block_end_condition(ctx, beg: off, p_end: &off) == ctx->html_block_type) {
6162	/ Make sure this is the last line of the block. /
6163	ctx->html_block_type = `0`;
6164	}
6165
6166	line->enforce_new_block = TRUE;
6167	line->type = MD_LINE_HTML;
6168	break;
6169	}
6170	}
6171
6172	/ Check for table underline. /
6173	if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT
6174	&& off < ctx->size && ISANYOF3(off, _T(`'\|'`), _T(`'-'`), _T(`':'`))
6175	&& n_parents == ctx->n_containers)
6176	{
6177	unsigned col_count;
6178
6179	if(ctx->current_block != NULL && ctx->current_block->n_lines == `1` &&
6180	md_is_table_underline(ctx, beg: off, p_end: &off, p_col_count: &col_count))
6181	{
6182	line->data = col_count;
6183	line->type = MD_LINE_TABLEUNDERLINE;
6184	break;
6185	}
6186	}
6187
6188	/ By default, we are normal text line. /
6189	line->type = MD_LINE_TEXT;
6190	if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == `0`) {
6191	/ Lazy continuation. /
6192	n_parents = ctx->n_containers;
6193	}
6194
6195	/ Check for task mark. /
6196	if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > `0` &&
6197	ISANYOF_(ctx->containers[ctx->n_containers-`1`].ch, _T("-+*.)")))
6198	{
6199	OFF tmp = off;
6200
6201	while(tmp < ctx->size && tmp < off + `3` && ISBLANK(tmp))
6202	tmp++;
6203	if(tmp + `2` < ctx->size && CH(tmp) == _T(`'['`) &&
6204	ISANYOF(tmp+`1`, _T("xX ")) && CH(tmp+`2`) == _T(`']'`) &&
6205	(tmp + `3` == ctx->size \|\| ISBLANK(tmp+`3`) \|\| ISNEWLINE(tmp+`3`)))
6206	{
6207	MD_CONTAINER* task_container = (n_children > `0` ? &ctx->containers[ctx->n_containers-`1`] : &container);
6208	task_container->is_task = TRUE;
6209	task_container->task_mark_off = tmp + `1`;
6210	off = tmp + `3`;
6211	while(off < ctx->size && ISWHITESPACE(off))
6212	off++;
6213	line->beg = off;
6214	}
6215	}
6216
6217	break;
6218	}
6219
6220	/ Scan for end of the line.*
6221	*
6222	* Note this is quite a bottleneck of the parsing as we here iterate almost
6223	* over compete document.
6224	*/
6225	#if defined __linux__ && !defined MD4C_USE_UTF16
6226	/ Recent glibc versions have superbly optimized strcspn(), even using*
6227	* vectorization if available. */
6228	if(ctx->doc_ends_with_newline && off < ctx->size) {
6229	while(TRUE) {
6230	off += (OFF) strcspn(STR(off), reject: "\r\n");
6231
6232	/ strcspn() can stop on zero terminator; but that can appear*
6233	* anywhere in the Markfown input... */
6234	if(CH(off) == _T(`'\0'`))
6235	off++;
6236	else
6237	break;
6238	}
6239	} else
6240	#endif
6241	{
6242	/ Optimization: Use some loop unrolling. /
6243	while(off + `3` < ctx->size && !ISNEWLINE(off+`0`) && !ISNEWLINE(off+`1`)
6244	&& !ISNEWLINE(off+`2`) && !ISNEWLINE(off+`3`))
6245	off += `4`;
6246	while(off < ctx->size && !ISNEWLINE(off))
6247	off++;
6248	}
6249
6250	/ Set end of the line. /
6251	line->end = off;
6252
6253	/ But for ATX header, we should exclude the optional trailing mark. /
6254	if(line->type == MD_LINE_ATXHEADER) {
6255	OFF tmp = line->end;
6256	while(tmp > line->beg && CH(tmp-`1`) == _T(`' '`))
6257	tmp--;
6258	while(tmp > line->beg && CH(tmp-`1`) == _T(`'#'`))
6259	tmp--;
6260	if(tmp == line->beg \|\| CH(tmp-`1`) == _T(`' '`) \|\| (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6261	line->end = tmp;
6262	}
6263
6264	/ Trim trailing spaces. /
6265	if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE && line->type != MD_LINE_HTML) {
6266	while(line->end > line->beg && CH(line->end-`1`) == _T(`' '`))
6267	line->end--;
6268	}
6269
6270	/ Eat also the new line. /
6271	if(off < ctx->size && CH(off) == _T(`'\r'`))
6272	off++;
6273	if(off < ctx->size && CH(off) == _T(`'\n'`))
6274	off++;
6275
6276	*p_end = off;
6277
6278	/ If we belong to a list after seeing a blank line, the list is loose. /
6279	if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > `0`) {
6280	MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - `1`];
6281	if(c->ch != _T(`'>'`)) {
6282	MD_BLOCK* block = (MD_BLOCK) (((char**)ctx->block_bytes) + c->block_byte_off);
6283	block->flags \|= MD_BLOCK_LOOSE_LIST;
6284	}
6285	}
6286
6287	/ Leave any containers we are not part of anymore. /
6288	if(n_children == `0` && n_parents + n_brothers < ctx->n_containers)
6289	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6290
6291	/ Enter any container we found a mark for. /
6292	if(n_brothers > `0`) {
6293	MD_ASSERT(n_brothers == `1`);
6294	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6295	ctx->containers[n_parents].task_mark_off,
6296	(ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : `0`),
6297	MD_BLOCK_CONTAINER_CLOSER));
6298	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6299	container.task_mark_off,
6300	(container.is_task ? CH(container.task_mark_off) : `0`),
6301	MD_BLOCK_CONTAINER_OPENER));
6302	ctx->containers[n_parents].is_task = container.is_task;
6303	ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6304	}
6305
6306	if(n_children > `0`)
6307	MD_CHECK(md_enter_child_containers(ctx, n_children));
6308
6309	abort:
6310	return ret;
6311	}
6312
6313	static int
6314	md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6315	{
6316	const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6317	int ret = `0`;
6318
6319	/ Blank line ends current leaf block. /
6320	if(line->type == MD_LINE_BLANK) {
6321	MD_CHECK(md_end_current_block(ctx));
6322	*p_pivot_line = &md_dummy_blank_line;
6323	return `0`;
6324	}
6325
6326	if(line->enforce_new_block)
6327	MD_CHECK(md_end_current_block(ctx));
6328
6329	/ Some line types form block on their own. /
6330	if(line->type == MD_LINE_HR \|\| line->type == MD_LINE_ATXHEADER) {
6331	MD_CHECK(md_end_current_block(ctx));
6332
6333	/ Add our single-line block. /
6334	MD_CHECK(md_start_new_block(ctx, line));
6335	MD_CHECK(md_add_line_into_current_block(ctx, line));
6336	MD_CHECK(md_end_current_block(ctx));
6337	*p_pivot_line = &md_dummy_blank_line;
6338	return `0`;
6339	}
6340
6341	/ MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. /
6342	if(line->type == MD_LINE_SETEXTUNDERLINE) {
6343	MD_ASSERT(ctx->current_block != NULL);
6344	ctx->current_block->type = MD_BLOCK_H;
6345	ctx->current_block->data = line->data;
6346	ctx->current_block->flags \|= MD_BLOCK_SETEXT_HEADER;
6347	MD_CHECK(md_add_line_into_current_block(ctx, line));
6348	MD_CHECK(md_end_current_block(ctx));
6349	if(ctx->current_block == NULL) {
6350	*p_pivot_line = &md_dummy_blank_line;
6351	} else {
6352	/ This happens if we have consumed all the body as link ref. defs.*
6353	* and downgraded the underline into start of a new paragraph block. */
6354	line->type = MD_LINE_TEXT;
6355	*p_pivot_line = line;
6356	}
6357	return `0`;
6358	}
6359
6360	/ MD_LINE_TABLEUNDERLINE changes meaning of the current block. /
6361	if(line->type == MD_LINE_TABLEUNDERLINE) {
6362	MD_ASSERT(ctx->current_block != NULL);
6363	MD_ASSERT(ctx->current_block->n_lines == `1`);
6364	ctx->current_block->type = MD_BLOCK_TABLE;
6365	ctx->current_block->data = line->data;
6366	MD_ASSERT(pivot_line != &md_dummy_blank_line);
6367	((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6368	MD_CHECK(md_add_line_into_current_block(ctx, line));
6369	return `0`;
6370	}
6371
6372	/ The current block also ends if the line has different type. /
6373	if(line->type != pivot_line->type)
6374	MD_CHECK(md_end_current_block(ctx));
6375
6376	/ The current line may start a new block. /
6377	if(ctx->current_block == NULL) {
6378	MD_CHECK(md_start_new_block(ctx, line));
6379	*p_pivot_line = line;
6380	}
6381
6382	/ In all other cases the line is just a continuation of the current block. /
6383	MD_CHECK(md_add_line_into_current_block(ctx, line));
6384
6385	abort:
6386	return ret;
6387	}
6388
6389	static int
6390	md_process_doc(MD_CTX *ctx)
6391	{
6392	const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6393	MD_LINE_ANALYSIS line_buf[`2`];
6394	MD_LINE_ANALYSIS* line = &line_buf[`0`];
6395	OFF off = `0`;
6396	int ret = `0`;
6397
6398	MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
6399
6400	while(off < ctx->size) {
6401	if(line == pivot_line)
6402	line = (line == &line_buf[`0`] ? &line_buf[`1`] : &line_buf[`0`]);
6403
6404	MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6405	MD_CHECK(md_process_line(ctx, &pivot_line, line));
6406	}
6407
6408	md_end_current_block(ctx);
6409
6410	MD_CHECK(md_build_ref_def_hashtable(ctx));
6411
6412	/ Process all blocks. /
6413	MD_CHECK(md_leave_child_containers(ctx, `0`));
6414	MD_CHECK(md_process_all_blocks(ctx));
6415
6416	MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
6417
6418	abort:
6419
6420	#if 0
6421	/ Output some memory consumption statistics. /
6422	{
6423	char buffer[`256`];
6424	sprintf(buffer, "Alloced %u bytes for block buffer.",
6425	(unsigned)(ctx->alloc_block_bytes));
6426	MD_LOG(buffer);
6427
6428	sprintf(buffer, "Alloced %u bytes for containers buffer.",
6429	(unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6430	MD_LOG(buffer);
6431
6432	sprintf(buffer, "Alloced %u bytes for marks buffer.",
6433	(unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6434	MD_LOG(buffer);
6435
6436	sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6437	(unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6438	MD_LOG(buffer);
6439	}
6440	#endif
6441
6442	return ret;
6443	}
6444
6445
6446	/********************
6447	* Public API *
6448	********************/
6449
6450	int
6451	md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6452	{
6453	MD_CTX ctx;
6454	int i;
6455	int ret;
6456
6457	if(parser->abi_version != `0`) {
6458	if(parser->debug_log != NULL)
6459	parser->debug_log("Unsupported abi_version.", userdata);
6460	return -`1`;
6461	}
6462
6463	/ Setup context structure. /
6464	memset(s: &ctx, c: `0`, n: sizeof(MD_CTX));
6465	ctx.text = text;
6466	ctx.size = size;
6467	memcpy(dest: &ctx.parser, src: parser, n: sizeof(MD_PARSER));
6468	ctx.userdata = userdata;
6469	ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-`1`) : `4`;
6470	md_build_mark_char_map(ctx: &ctx);
6471	ctx.doc_ends_with_newline = (size > `0` && ISNEWLINE_(text[size-`1`]));
6472
6473	/ Reset all mark stacks and lists. /
6474	for(i = `0`; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
6475	ctx.opener_stacks[i].top = -`1`;
6476	ctx.ptr_stack.top = -`1`;
6477	ctx.unresolved_link_head = -`1`;
6478	ctx.unresolved_link_tail = -`1`;
6479	ctx.table_cell_boundaries_head = -`1`;
6480	ctx.table_cell_boundaries_tail = -`1`;
6481
6482	/ All the work. /
6483	ret = md_process_doc(ctx: &ctx);
6484
6485	/ Clean-up. /
6486	md_free_ref_defs(ctx: &ctx);
6487	md_free_ref_def_hashtable(ctx: &ctx);
6488	free(ptr: ctx.buffer);
6489	free(ptr: ctx.marks);
6490	free(ptr: ctx.block_bytes);
6491	free(ptr: ctx.containers);
6492
6493	return ret;
6494	}
6495

source code of qtbase/src/3rdparty/md4c/md4c.c