md4c.c source code [qtbase/src/3rdparty/md4c/md4c.c]

1	/*
2	* MD4C: Markdown parser for C
3	* (http://github.com/mity/md4c)
4	*
5	* Copyright (c) 2016-2020 Martin Mitas
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a
8	* copy of this software and associated documentation files (the "Software"),
9	* to deal in the Software without restriction, including without limitation
10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
11	* and/or sell copies of the Software, and to permit persons to whom the
12	* Software is furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23	* IN THE SOFTWARE.
24	*/
25
26	#include "md4c.h"
27
28	#include <limits.h>
29	#include <stdio.h>
30	#include <stdlib.h>
31	#include <string.h>
32
33
34	/*****************************
35	* Miscellaneous Stuff *
36	*****************************/
37
38	#if !defined(__STDC_VERSION__) \|\| __STDC_VERSION__ < 199409L
39	/ C89/90 or old compilers in general may not understand "inline". /
40	#if defined __GNUC__
41	#define inline __inline__
42	#elif defined _MSC_VER
43	#define inline __inline
44	#else
45	#define inline
46	#endif
47	#endif
48
49	/ Make the UTF-8 support the default. /
50	#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
51	#define MD4C_USE_UTF8
52	#endif
53
54	/ Magic for making wide literals with MD4C_USE_UTF16. /
55	#ifdef _T
56	#undef _T
57	#endif
58	#if defined MD4C_USE_UTF16
59	#define _T(x) L##x
60	#else
61	#define _T(x) x
62	#endif
63
64	/ Misc. macros. /
65	#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
66
67	#define STRINGIZE_(x) #x
68	#define STRINGIZE(x) STRINGIZE_(x)
69
70	#ifndef TRUE
71	#define TRUE 1
72	#define FALSE 0
73	#endif
74
75
76	/************************
77	* Internal Types *
78	************************/
79
80	/ These are omnipresent so lets save some typing. /
81	#define CHAR MD_CHAR
82	#define SZ MD_SIZE
83	#define OFF MD_OFFSET
84
85	typedef struct MD_MARK_tag MD_MARK;
86	typedef struct MD_BLOCK_tag MD_BLOCK;
87	typedef struct MD_CONTAINER_tag MD_CONTAINER;
88	typedef struct MD_REF_DEF_tag MD_REF_DEF;
89
90
91	/ During analyzes of inline marks, we need to manage some "mark chains",*
92	* of (yet unresolved) openers. This structure holds start/end of the chain.
93	* The chain internals are then realized through MD_MARK::prev and ::next.
94	*/
95	typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
96	struct MD_MARKCHAIN_tag {
97	int head; / Index of first mark in the chain, or -1 if empty. /
98	int tail; / Index of last mark in the chain, or -1 if empty. /
99	};
100
101	/ Context propagated through all the parsing. /
102	typedef struct MD_CTX_tag MD_CTX;
103	struct MD_CTX_tag {
104	/ Immutable stuff (parameters of md_parse()). /
105	const CHAR* text;
106	SZ size;
107	MD_PARSER parser;
108	void* userdata;
109
110	/ When this is true, it allows some optimizations. /
111	int doc_ends_with_newline;
112
113	/ Helper temporary growing buffer. /
114	CHAR* buffer;
115	unsigned alloc_buffer;
116
117	/ Reference definitions. /
118	MD_REF_DEF* ref_defs;
119	int n_ref_defs;
120	int alloc_ref_defs;
121	void** ref_def_hashtable;
122	int ref_def_hashtable_size;
123
124	/ Stack of inline/span markers.*
125	* This is only used for parsing a single block contents but by storing it
126	* here we may reuse the stack for subsequent blocks; i.e. we have fewer
127	* (re)allocations. */
128	MD_MARK* marks;
129	int n_marks;
130	int alloc_marks;
131
132	#if defined MD4C_USE_UTF16
133	char mark_char_map[`128`];
134	#else
135	char mark_char_map[`256`];
136	#endif
137
138	/ For resolving of inline spans. /
139	MD_MARKCHAIN mark_chains[`13`];
140	#define PTR_CHAIN (ctx->mark_chains[0])
141	#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
142	#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
143	#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
144	#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
145	#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
146	#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
147	#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
148	#define UNDERSCORE_OPENERS (ctx->mark_chains[8])
149	#define TILDE_OPENERS_1 (ctx->mark_chains[9])
150	#define TILDE_OPENERS_2 (ctx->mark_chains[10])
151	#define BRACKET_OPENERS (ctx->mark_chains[11])
152	#define DOLLAR_OPENERS (ctx->mark_chains[12])
153	#define OPENERS_CHAIN_FIRST 2
154	#define OPENERS_CHAIN_LAST 12
155
156	int n_table_cell_boundaries;
157
158	/ For resolving links. /
159	int unresolved_link_head;
160	int unresolved_link_tail;
161
162	/ For resolving raw HTML. /
163	OFF html_comment_horizon;
164	OFF html_proc_instr_horizon;
165	OFF html_decl_horizon;
166	OFF html_cdata_horizon;
167
168	/ For block analysis.*
169	* Notes:
170	* -- It holds MD_BLOCK as well as MD_LINE structures. After each
171	* MD_BLOCK, its (multiple) MD_LINE(s) follow.
172	* -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
173	* instead of MD_LINE(s).
174	*/
175	void* block_bytes;
176	MD_BLOCK* current_block;
177	int n_block_bytes;
178	int alloc_block_bytes;
179
180	/ For container block analysis. /
181	MD_CONTAINER* containers;
182	int n_containers;
183	int alloc_containers;
184
185	/ Minimal indentation to call the block "indented code block". /
186	unsigned code_indent_offset;
187
188	/ Contextual info for line analysis. /
189	SZ code_fence_length; / For checking closing fence length. /
190	int html_block_type; / For checking closing raw HTML condition. /
191	int last_line_has_list_loosening_effect;
192	int last_list_item_starts_with_two_blank_lines;
193	};
194
195	enum MD_LINETYPE_tag {
196	MD_LINE_BLANK,
197	MD_LINE_HR,
198	MD_LINE_ATXHEADER,
199	MD_LINE_SETEXTHEADER,
200	MD_LINE_SETEXTUNDERLINE,
201	MD_LINE_INDENTEDCODE,
202	MD_LINE_FENCEDCODE,
203	MD_LINE_HTML,
204	MD_LINE_TEXT,
205	MD_LINE_TABLE,
206	MD_LINE_TABLEUNDERLINE
207	};
208	typedef enum MD_LINETYPE_tag MD_LINETYPE;
209
210	typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
211	struct MD_LINE_ANALYSIS_tag {
212	MD_LINETYPE type : `16`;
213	unsigned data : `16`;
214	OFF beg;
215	OFF end;
216	unsigned indent; / Indentation level. /
217	};
218
219	typedef struct MD_LINE_tag MD_LINE;
220	struct MD_LINE_tag {
221	OFF beg;
222	OFF end;
223	};
224
225	typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
226	struct MD_VERBATIMLINE_tag {
227	OFF beg;
228	OFF end;
229	OFF indent;
230	};
231
232
233	/*******************
234	* Debugging *
235	*******************/
236
237	#define MD_LOG(msg) \
238	do { \
239	if(ctx->parser.debug_log != NULL) \
240	ctx->parser.debug_log((msg), ctx->userdata); \
241	} while(0)
242
243	#ifdef DEBUG
244	#define MD_ASSERT(cond) \
245	do { \
246	if(!(cond)) { \
247	MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
248	"Assertion '" STRINGIZE(cond) "' failed."); \
249	exit(1); \
250	} \
251	} while(0)
252
253	#define MD_UNREACHABLE() MD_ASSERT(1 == 0)
254	#else
255	#ifdef __GNUC__
256	#define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
257	#define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
258	#elif defined _MSC_VER && _MSC_VER > 120
259	#define MD_ASSERT(cond) do { __assume(cond); } while(0)
260	#define MD_UNREACHABLE() do { __assume(0); } while(0)
261	#else
262	#define MD_ASSERT(cond) do {} while(0)
263	#define MD_UNREACHABLE() do {} while(0)
264	#endif
265	#endif
266
267
268	/*****************
269	* Helpers *
270	*****************/
271
272	/ Character accessors. /
273	#define CH(off) (ctx->text[(off)])
274	#define STR(off) (ctx->text + (off))
275
276	/ Character classification.*
277	* Note we assume ASCII compatibility of code points < 128 here. */
278	#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
279	#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
280	#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) \|\| (ch) == (ch2))
281	#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) \|\| (ch) == (ch2) \|\| (ch) == (ch3))
282	#define ISASCII_(ch) ((unsigned)(ch) <= 127)
283	#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
284	#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
285	#define ISWHITESPACE_(ch) (ISBLANK_(ch) \|\| ISANYOF2_((ch), _T('\v'), _T('\f')))
286	#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 \|\| (unsigned)(ch) == 127)
287	#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) \|\| ISIN_(ch, 58, 64) \|\| ISIN_(ch, 91, 96) \|\| ISIN_(ch, 123, 126))
288	#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
289	#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
290	#define ISALPHA_(ch) (ISUPPER_(ch) \|\| ISLOWER_(ch))
291	#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
292	#define ISXDIGIT_(ch) (ISDIGIT_(ch) \|\| ISIN_(ch, _T('A'), _T('F')) \|\| ISIN_(ch, _T('a'), _T('f')))
293	#define ISALNUM_(ch) (ISALPHA_(ch) \|\| ISDIGIT_(ch))
294
295	#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
296	#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
297	#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
298	#define ISASCII(off) ISASCII_(CH(off))
299	#define ISBLANK(off) ISBLANK_(CH(off))
300	#define ISNEWLINE(off) ISNEWLINE_(CH(off))
301	#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
302	#define ISCNTRL(off) ISCNTRL_(CH(off))
303	#define ISPUNCT(off) ISPUNCT_(CH(off))
304	#define ISUPPER(off) ISUPPER_(CH(off))
305	#define ISLOWER(off) ISLOWER_(CH(off))
306	#define ISALPHA(off) ISALPHA_(CH(off))
307	#define ISDIGIT(off) ISDIGIT_(CH(off))
308	#define ISXDIGIT(off) ISXDIGIT_(CH(off))
309	#define ISALNUM(off) ISALNUM_(CH(off))
310
311
312	#if defined MD4C_USE_UTF16
313	#define md_strchr wcschr
314	#else
315	#define md_strchr strchr
316	#endif
317
318
319	/ Case insensitive check of string equality. /
320	static inline int
321	md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
322	{
323	OFF i;
324	for(i = `0`; i < n; i++) {
325	CHAR ch1 = s1[i];
326	CHAR ch2 = s2[i];
327
328	if(ISLOWER_(ch1))
329	ch1 += (`'A'`-`'a'`);
330	if(ISLOWER_(ch2))
331	ch2 += (`'A'`-`'a'`);
332	if(ch1 != ch2)
333	return FALSE;
334	}
335	return TRUE;
336	}
337
338	static inline int
339	md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
340	{
341	return memcmp(s1: s1, s2: s2, n: n * sizeof(CHAR)) == `0`;
342	}
343
344	static int
345	md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
346	{
347	OFF off = `0`;
348	int ret = `0`;
349
350	while(`1`) {
351	while(off < size && str[off] != _T(`'\0'`))
352	off++;
353
354	if(off > `0`) {
355	ret = ctx->parser.text(type, str, off, ctx->userdata);
356	if(ret != `0`)
357	return ret;
358
359	str += off;
360	size -= off;
361	off = `0`;
362	}
363
364	if(off >= size)
365	return `0`;
366
367	ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), `1`, ctx->userdata);
368	if(ret != `0`)
369	return ret;
370	off++;
371	}
372	}
373
374
375	#define MD_CHECK(func) \
376	do { \
377	ret = (func); \
378	if(ret < 0) \
379	goto abort; \
380	} while(0)
381
382
383	#define MD_TEMP_BUFFER(sz) \
384	do { \
385	if(sz > ctx->alloc_buffer) { \
386	CHAR* new_buffer; \
387	SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
388	\
389	new_buffer = realloc(ctx->buffer, new_size); \
390	if(new_buffer == NULL) { \
391	MD_LOG("realloc() failed."); \
392	ret = -1; \
393	goto abort; \
394	} \
395	\
396	ctx->buffer = new_buffer; \
397	ctx->alloc_buffer = new_size; \
398	} \
399	} while(0)
400
401
402	#define MD_ENTER_BLOCK(type, arg) \
403	do { \
404	ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
405	if(ret != 0) { \
406	MD_LOG("Aborted from enter_block() callback."); \
407	goto abort; \
408	} \
409	} while(0)
410
411	#define MD_LEAVE_BLOCK(type, arg) \
412	do { \
413	ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
414	if(ret != 0) { \
415	MD_LOG("Aborted from leave_block() callback."); \
416	goto abort; \
417	} \
418	} while(0)
419
420	#define MD_ENTER_SPAN(type, arg) \
421	do { \
422	ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
423	if(ret != 0) { \
424	MD_LOG("Aborted from enter_span() callback."); \
425	goto abort; \
426	} \
427	} while(0)
428
429	#define MD_LEAVE_SPAN(type, arg) \
430	do { \
431	ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
432	if(ret != 0) { \
433	MD_LOG("Aborted from leave_span() callback."); \
434	goto abort; \
435	} \
436	} while(0)
437
438	#define MD_TEXT(type, str, size) \
439	do { \
440	if(size > 0) { \
441	ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
442	if(ret != 0) { \
443	MD_LOG("Aborted from text() callback."); \
444	goto abort; \
445	} \
446	} \
447	} while(0)
448
449	#define MD_TEXT_INSECURE(type, str, size) \
450	do { \
451	if(size > 0) { \
452	ret = md_text_with_null_replacement(ctx, type, str, size); \
453	if(ret != 0) { \
454	MD_LOG("Aborted from text() callback."); \
455	goto abort; \
456	} \
457	} \
458	} while(0)
459
460
461
462	/*************************
463	* Unicode Support *
464	*************************/
465
466	typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
467	struct MD_UNICODE_FOLD_INFO_tag {
468	unsigned codepoints[`3`];
469	int n_codepoints;
470	};
471
472
473	#if defined MD4C_USE_UTF16 \|\| defined MD4C_USE_UTF8
474	/ Binary search over sorted "map" of codepoints. Consecutive sequences*
475	* of codepoints may be encoded in the map by just using the
476	* (MIN_CODEPOINT \| 0x40000000) and (MAX_CODEPOINT \| 0x80000000).
477	*
478	* Returns index of the found record in the map (in the case of ranges,
479	* the minimal value is used); or -1 on failure. */
480	static int
481	md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
482	{
483	int beg, end;
484	int pivot_beg, pivot_end;
485
486	beg = `0`;
487	end = (int) map_size-`1`;
488	while(beg <= end) {
489	/ Pivot may be a range, not just a single value. /
490	pivot_beg = pivot_end = (beg + end) / `2`;
491	if(map[pivot_end] & `0x40000000`)
492	pivot_end++;
493	if(map[pivot_beg] & `0x80000000`)
494	pivot_beg--;
495
496	if(codepoint < (map[pivot_beg] & `0x00ffffff`))
497	end = pivot_beg - `1`;
498	else if(codepoint > (map[pivot_end] & `0x00ffffff`))
499	beg = pivot_end + `1`;
500	else
501	return pivot_beg;
502	}
503
504	return -`1`;
505	}
506
507	static int
508	md_is_unicode_whitespace__(unsigned codepoint)
509	{
510	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
511	#define S(cp) (cp)
512	/ Unicode "Zs" category.*
513	* (generated by scripts/build_whitespace_map.py) */
514	static const unsigned WHITESPACE_MAP[] = {
515	S(`0x0020`), S(`0x00a0`), S(`0x1680`), R(`0x2000`,`0x200a`), S(`0x202f`), S(`0x205f`), S(`0x3000`)
516	};
517	#undef R
518	#undef S
519
520	/ The ASCII ones are the most frequently used ones, also CommonMark*
521	* specification requests few more in this range. */
522	if(codepoint <= `0x7f`)
523	return ISWHITESPACE_(codepoint);
524
525	return (md_unicode_bsearch__(codepoint, map: WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= `0`);
526	}
527
528	static int
529	md_is_unicode_punct__(unsigned codepoint)
530	{
531	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
532	#define S(cp) (cp)
533	/ Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.*
534	* (generated by scripts/build_punct_map.py) */
535	static const unsigned PUNCT_MAP[] = {
536	R(`0x0021`,`0x0023`), R(`0x0025`,`0x002a`), R(`0x002c`,`0x002f`), R(`0x003a`,`0x003b`), R(`0x003f`,`0x0040`),
537	R(`0x005b`,`0x005d`), S(`0x005f`), S(`0x007b`), S(`0x007d`), S(`0x00a1`), S(`0x00a7`), S(`0x00ab`), R(`0x00b6`,`0x00b7`),
538	S(`0x00bb`), S(`0x00bf`), S(`0x037e`), S(`0x0387`), R(`0x055a`,`0x055f`), R(`0x0589`,`0x058a`), S(`0x05be`), S(`0x05c0`),
539	S(`0x05c3`), S(`0x05c6`), R(`0x05f3`,`0x05f4`), R(`0x0609`,`0x060a`), R(`0x060c`,`0x060d`), S(`0x061b`), R(`0x061e`,`0x061f`),
540	R(`0x066a`,`0x066d`), S(`0x06d4`), R(`0x0700`,`0x070d`), R(`0x07f7`,`0x07f9`), R(`0x0830`,`0x083e`), S(`0x085e`),
541	R(`0x0964`,`0x0965`), S(`0x0970`), S(`0x09fd`), S(`0x0a76`), S(`0x0af0`), S(`0x0c77`), S(`0x0c84`), S(`0x0df4`), S(`0x0e4f`),
542	R(`0x0e5a`,`0x0e5b`), R(`0x0f04`,`0x0f12`), S(`0x0f14`), R(`0x0f3a`,`0x0f3d`), S(`0x0f85`), R(`0x0fd0`,`0x0fd4`),
543	R(`0x0fd9`,`0x0fda`), R(`0x104a`,`0x104f`), S(`0x10fb`), R(`0x1360`,`0x1368`), S(`0x1400`), S(`0x166e`), R(`0x169b`,`0x169c`),
544	R(`0x16eb`,`0x16ed`), R(`0x1735`,`0x1736`), R(`0x17d4`,`0x17d6`), R(`0x17d8`,`0x17da`), R(`0x1800`,`0x180a`),
545	R(`0x1944`,`0x1945`), R(`0x1a1e`,`0x1a1f`), R(`0x1aa0`,`0x1aa6`), R(`0x1aa8`,`0x1aad`), R(`0x1b5a`,`0x1b60`),
546	R(`0x1bfc`,`0x1bff`), R(`0x1c3b`,`0x1c3f`), R(`0x1c7e`,`0x1c7f`), R(`0x1cc0`,`0x1cc7`), S(`0x1cd3`), R(`0x2010`,`0x2027`),
547	R(`0x2030`,`0x2043`), R(`0x2045`,`0x2051`), R(`0x2053`,`0x205e`), R(`0x207d`,`0x207e`), R(`0x208d`,`0x208e`),
548	R(`0x2308`,`0x230b`), R(`0x2329`,`0x232a`), R(`0x2768`,`0x2775`), R(`0x27c5`,`0x27c6`), R(`0x27e6`,`0x27ef`),
549	R(`0x2983`,`0x2998`), R(`0x29d8`,`0x29db`), R(`0x29fc`,`0x29fd`), R(`0x2cf9`,`0x2cfc`), R(`0x2cfe`,`0x2cff`), S(`0x2d70`),
550	R(`0x2e00`,`0x2e2e`), R(`0x2e30`,`0x2e4f`), S(`0x2e52`), R(`0x3001`,`0x3003`), R(`0x3008`,`0x3011`), R(`0x3014`,`0x301f`),
551	S(`0x3030`), S(`0x303d`), S(`0x30a0`), S(`0x30fb`), R(`0xa4fe`,`0xa4ff`), R(`0xa60d`,`0xa60f`), S(`0xa673`), S(`0xa67e`),
552	R(`0xa6f2`,`0xa6f7`), R(`0xa874`,`0xa877`), R(`0xa8ce`,`0xa8cf`), R(`0xa8f8`,`0xa8fa`), S(`0xa8fc`), R(`0xa92e`,`0xa92f`),
553	S(`0xa95f`), R(`0xa9c1`,`0xa9cd`), R(`0xa9de`,`0xa9df`), R(`0xaa5c`,`0xaa5f`), R(`0xaade`,`0xaadf`), R(`0xaaf0`,`0xaaf1`),
554	S(`0xabeb`), R(`0xfd3e`,`0xfd3f`), R(`0xfe10`,`0xfe19`), R(`0xfe30`,`0xfe52`), R(`0xfe54`,`0xfe61`), S(`0xfe63`), S(`0xfe68`),
555	R(`0xfe6a`,`0xfe6b`), R(`0xff01`,`0xff03`), R(`0xff05`,`0xff0a`), R(`0xff0c`,`0xff0f`), R(`0xff1a`,`0xff1b`),
556	R(`0xff1f`,`0xff20`), R(`0xff3b`,`0xff3d`), S(`0xff3f`), S(`0xff5b`), S(`0xff5d`), R(`0xff5f`,`0xff65`), R(`0x10100`,`0x10102`),
557	S(`0x1039f`), S(`0x103d0`), S(`0x1056f`), S(`0x10857`), S(`0x1091f`), S(`0x1093f`), R(`0x10a50`,`0x10a58`), S(`0x10a7f`),
558	R(`0x10af0`,`0x10af6`), R(`0x10b39`,`0x10b3f`), R(`0x10b99`,`0x10b9c`), S(`0x10ead`), R(`0x10f55`,`0x10f59`),
559	R(`0x11047`,`0x1104d`), R(`0x110bb`,`0x110bc`), R(`0x110be`,`0x110c1`), R(`0x11140`,`0x11143`), R(`0x11174`,`0x11175`),
560	R(`0x111c5`,`0x111c8`), S(`0x111cd`), S(`0x111db`), R(`0x111dd`,`0x111df`), R(`0x11238`,`0x1123d`), S(`0x112a9`),
561	R(`0x1144b`,`0x1144f`), R(`0x1145a`,`0x1145b`), S(`0x1145d`), S(`0x114c6`), R(`0x115c1`,`0x115d7`), R(`0x11641`,`0x11643`),
562	R(`0x11660`,`0x1166c`), R(`0x1173c`,`0x1173e`), S(`0x1183b`), R(`0x11944`,`0x11946`), S(`0x119e2`), R(`0x11a3f`,`0x11a46`),
563	R(`0x11a9a`,`0x11a9c`), R(`0x11a9e`,`0x11aa2`), R(`0x11c41`,`0x11c45`), R(`0x11c70`,`0x11c71`), R(`0x11ef7`,`0x11ef8`),
564	S(`0x11fff`), R(`0x12470`,`0x12474`), R(`0x16a6e`,`0x16a6f`), S(`0x16af5`), R(`0x16b37`,`0x16b3b`), S(`0x16b44`),
565	R(`0x16e97`,`0x16e9a`), S(`0x16fe2`), S(`0x1bc9f`), R(`0x1da87`,`0x1da8b`), R(`0x1e95e`,`0x1e95f`)
566	};
567	#undef R
568	#undef S
569
570	/ The ASCII ones are the most frequently used ones, also CommonMark*
571	* specification requests few more in this range. */
572	if(codepoint <= `0x7f`)
573	return ISPUNCT_(codepoint);
574
575	return (md_unicode_bsearch__(codepoint, map: PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= `0`);
576	}
577
578	static void
579	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
580	{
581	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
582	#define S(cp) (cp)
583	/ Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.*
584	* (generated by scripts/build_folding_map.py) */
585	static const unsigned FOLD_MAP_1[] = {
586	R(`0x0041`,`0x005a`), S(`0x00b5`), R(`0x00c0`,`0x00d6`), R(`0x00d8`,`0x00de`), R(`0x0100`,`0x012e`), R(`0x0132`,`0x0136`),
587	R(`0x0139`,`0x0147`), R(`0x014a`,`0x0176`), S(`0x0178`), R(`0x0179`,`0x017d`), S(`0x017f`), S(`0x0181`), S(`0x0182`),
588	S(`0x0184`), S(`0x0186`), S(`0x0187`), S(`0x0189`), S(`0x018a`), S(`0x018b`), S(`0x018e`), S(`0x018f`), S(`0x0190`),
589	S(`0x0191`), S(`0x0193`), S(`0x0194`), S(`0x0196`), S(`0x0197`), S(`0x0198`), S(`0x019c`), S(`0x019d`), S(`0x019f`),
590	R(`0x01a0`,`0x01a4`), S(`0x01a6`), S(`0x01a7`), S(`0x01a9`), S(`0x01ac`), S(`0x01ae`), S(`0x01af`), S(`0x01b1`), S(`0x01b2`),
591	S(`0x01b3`), S(`0x01b5`), S(`0x01b7`), S(`0x01b8`), S(`0x01bc`), S(`0x01c4`), S(`0x01c5`), S(`0x01c7`), S(`0x01c8`),
592	S(`0x01ca`), R(`0x01cb`,`0x01db`), R(`0x01de`,`0x01ee`), S(`0x01f1`), S(`0x01f2`), S(`0x01f4`), S(`0x01f6`), S(`0x01f7`),
593	R(`0x01f8`,`0x021e`), S(`0x0220`), R(`0x0222`,`0x0232`), S(`0x023a`), S(`0x023b`), S(`0x023d`), S(`0x023e`), S(`0x0241`),
594	S(`0x0243`), S(`0x0244`), S(`0x0245`), R(`0x0246`,`0x024e`), S(`0x0345`), S(`0x0370`), S(`0x0372`), S(`0x0376`), S(`0x037f`),
595	S(`0x0386`), R(`0x0388`,`0x038a`), S(`0x038c`), S(`0x038e`), S(`0x038f`), R(`0x0391`,`0x03a1`), R(`0x03a3`,`0x03ab`),
596	S(`0x03c2`), S(`0x03cf`), S(`0x03d0`), S(`0x03d1`), S(`0x03d5`), S(`0x03d6`), R(`0x03d8`,`0x03ee`), S(`0x03f0`), S(`0x03f1`),
597	S(`0x03f4`), S(`0x03f5`), S(`0x03f7`), S(`0x03f9`), S(`0x03fa`), R(`0x03fd`,`0x03ff`), R(`0x0400`,`0x040f`),
598	R(`0x0410`,`0x042f`), R(`0x0460`,`0x0480`), R(`0x048a`,`0x04be`), S(`0x04c0`), R(`0x04c1`,`0x04cd`), R(`0x04d0`,`0x052e`),
599	R(`0x0531`,`0x0556`), R(`0x10a0`,`0x10c5`), S(`0x10c7`), S(`0x10cd`), R(`0x13f8`,`0x13fd`), S(`0x1c80`), S(`0x1c81`),
600	S(`0x1c82`), S(`0x1c83`), S(`0x1c84`), S(`0x1c85`), S(`0x1c86`), S(`0x1c87`), S(`0x1c88`), R(`0x1c90`,`0x1cba`),
601	R(`0x1cbd`,`0x1cbf`), R(`0x1e00`,`0x1e94`), S(`0x1e9b`), R(`0x1ea0`,`0x1efe`), R(`0x1f08`,`0x1f0f`), R(`0x1f18`,`0x1f1d`),
602	R(`0x1f28`,`0x1f2f`), R(`0x1f38`,`0x1f3f`), R(`0x1f48`,`0x1f4d`), S(`0x1f59`), S(`0x1f5b`), S(`0x1f5d`), S(`0x1f5f`),
603	R(`0x1f68`,`0x1f6f`), S(`0x1fb8`), S(`0x1fb9`), S(`0x1fba`), S(`0x1fbb`), S(`0x1fbe`), R(`0x1fc8`,`0x1fcb`), S(`0x1fd8`),
604	S(`0x1fd9`), S(`0x1fda`), S(`0x1fdb`), S(`0x1fe8`), S(`0x1fe9`), S(`0x1fea`), S(`0x1feb`), S(`0x1fec`), S(`0x1ff8`),
605	S(`0x1ff9`), S(`0x1ffa`), S(`0x1ffb`), S(`0x2126`), S(`0x212a`), S(`0x212b`), S(`0x2132`), R(`0x2160`,`0x216f`), S(`0x2183`),
606	R(`0x24b6`,`0x24cf`), R(`0x2c00`,`0x2c2e`), S(`0x2c60`), S(`0x2c62`), S(`0x2c63`), S(`0x2c64`), R(`0x2c67`,`0x2c6b`),
607	S(`0x2c6d`), S(`0x2c6e`), S(`0x2c6f`), S(`0x2c70`), S(`0x2c72`), S(`0x2c75`), S(`0x2c7e`), S(`0x2c7f`), R(`0x2c80`,`0x2ce2`),
608	S(`0x2ceb`), S(`0x2ced`), S(`0x2cf2`), R(`0xa640`,`0xa66c`), R(`0xa680`,`0xa69a`), R(`0xa722`,`0xa72e`), R(`0xa732`,`0xa76e`),
609	S(`0xa779`), S(`0xa77b`), S(`0xa77d`), R(`0xa77e`,`0xa786`), S(`0xa78b`), S(`0xa78d`), S(`0xa790`), S(`0xa792`),
610	R(`0xa796`,`0xa7a8`), S(`0xa7aa`), S(`0xa7ab`), S(`0xa7ac`), S(`0xa7ad`), S(`0xa7ae`), S(`0xa7b0`), S(`0xa7b1`), S(`0xa7b2`),
611	S(`0xa7b3`), R(`0xa7b4`,`0xa7be`), S(`0xa7c2`), S(`0xa7c4`), S(`0xa7c5`), S(`0xa7c6`), S(`0xa7c7`), S(`0xa7c9`), S(`0xa7f5`),
612	R(`0xab70`,`0xabbf`), R(`0xff21`,`0xff3a`), R(`0x10400`,`0x10427`), R(`0x104b0`,`0x104d3`), R(`0x10c80`,`0x10cb2`),
613	R(`0x118a0`,`0x118bf`), R(`0x16e40`,`0x16e5f`), R(`0x1e900`,`0x1e921`)
614	};
615	static const unsigned FOLD_MAP_1_DATA[] = {
616	`0x0061`, `0x007a`, `0x03bc`, `0x00e0`, `0x00f6`, `0x00f8`, `0x00fe`, `0x0101`, `0x012f`, `0x0133`, `0x0137`, `0x013a`, `0x0148`,
617	`0x014b`, `0x0177`, `0x00ff`, `0x017a`, `0x017e`, `0x0073`, `0x0253`, `0x0183`, `0x0185`, `0x0254`, `0x0188`, `0x0256`, `0x0257`,
618	`0x018c`, `0x01dd`, `0x0259`, `0x025b`, `0x0192`, `0x0260`, `0x0263`, `0x0269`, `0x0268`, `0x0199`, `0x026f`, `0x0272`, `0x0275`,
619	`0x01a1`, `0x01a5`, `0x0280`, `0x01a8`, `0x0283`, `0x01ad`, `0x0288`, `0x01b0`, `0x028a`, `0x028b`, `0x01b4`, `0x01b6`, `0x0292`,
620	`0x01b9`, `0x01bd`, `0x01c6`, `0x01c6`, `0x01c9`, `0x01c9`, `0x01cc`, `0x01cc`, `0x01dc`, `0x01df`, `0x01ef`, `0x01f3`, `0x01f3`,
621	`0x01f5`, `0x0195`, `0x01bf`, `0x01f9`, `0x021f`, `0x019e`, `0x0223`, `0x0233`, `0x2c65`, `0x023c`, `0x019a`, `0x2c66`, `0x0242`,
622	`0x0180`, `0x0289`, `0x028c`, `0x0247`, `0x024f`, `0x03b9`, `0x0371`, `0x0373`, `0x0377`, `0x03f3`, `0x03ac`, `0x03ad`, `0x03af`,
623	`0x03cc`, `0x03cd`, `0x03ce`, `0x03b1`, `0x03c1`, `0x03c3`, `0x03cb`, `0x03c3`, `0x03d7`, `0x03b2`, `0x03b8`, `0x03c6`, `0x03c0`,
624	`0x03d9`, `0x03ef`, `0x03ba`, `0x03c1`, `0x03b8`, `0x03b5`, `0x03f8`, `0x03f2`, `0x03fb`, `0x037b`, `0x037d`, `0x0450`, `0x045f`,
625	`0x0430`, `0x044f`, `0x0461`, `0x0481`, `0x048b`, `0x04bf`, `0x04cf`, `0x04c2`, `0x04ce`, `0x04d1`, `0x052f`, `0x0561`, `0x0586`,
626	`0x2d00`, `0x2d25`, `0x2d27`, `0x2d2d`, `0x13f0`, `0x13f5`, `0x0432`, `0x0434`, `0x043e`, `0x0441`, `0x0442`, `0x0442`, `0x044a`,
627	`0x0463`, `0xa64b`, `0x10d0`, `0x10fa`, `0x10fd`, `0x10ff`, `0x1e01`, `0x1e95`, `0x1e61`, `0x1ea1`, `0x1eff`, `0x1f00`, `0x1f07`,
628	`0x1f10`, `0x1f15`, `0x1f20`, `0x1f27`, `0x1f30`, `0x1f37`, `0x1f40`, `0x1f45`, `0x1f51`, `0x1f53`, `0x1f55`, `0x1f57`, `0x1f60`,
629	`0x1f67`, `0x1fb0`, `0x1fb1`, `0x1f70`, `0x1f71`, `0x03b9`, `0x1f72`, `0x1f75`, `0x1fd0`, `0x1fd1`, `0x1f76`, `0x1f77`, `0x1fe0`,
630	`0x1fe1`, `0x1f7a`, `0x1f7b`, `0x1fe5`, `0x1f78`, `0x1f79`, `0x1f7c`, `0x1f7d`, `0x03c9`, `0x006b`, `0x00e5`, `0x214e`, `0x2170`,
631	`0x217f`, `0x2184`, `0x24d0`, `0x24e9`, `0x2c30`, `0x2c5e`, `0x2c61`, `0x026b`, `0x1d7d`, `0x027d`, `0x2c68`, `0x2c6c`, `0x0251`,
632	`0x0271`, `0x0250`, `0x0252`, `0x2c73`, `0x2c76`, `0x023f`, `0x0240`, `0x2c81`, `0x2ce3`, `0x2cec`, `0x2cee`, `0x2cf3`, `0xa641`,
633	`0xa66d`, `0xa681`, `0xa69b`, `0xa723`, `0xa72f`, `0xa733`, `0xa76f`, `0xa77a`, `0xa77c`, `0x1d79`, `0xa77f`, `0xa787`, `0xa78c`,
634	`0x0265`, `0xa791`, `0xa793`, `0xa797`, `0xa7a9`, `0x0266`, `0x025c`, `0x0261`, `0x026c`, `0x026a`, `0x029e`, `0x0287`, `0x029d`,
635	`0xab53`, `0xa7b5`, `0xa7bf`, `0xa7c3`, `0xa794`, `0x0282`, `0x1d8e`, `0xa7c8`, `0xa7ca`, `0xa7f6`, `0x13a0`, `0x13ef`, `0xff41`,
636	`0xff5a`, `0x10428`, `0x1044f`, `0x104d8`, `0x104fb`, `0x10cc0`, `0x10cf2`, `0x118c0`, `0x118df`, `0x16e60`, `0x16e7f`, `0x1e922`,
637	`0x1e943`
638	};
639	static const unsigned FOLD_MAP_2[] = {
640	S(`0x00df`), S(`0x0130`), S(`0x0149`), S(`0x01f0`), S(`0x0587`), S(`0x1e96`), S(`0x1e97`), S(`0x1e98`), S(`0x1e99`),
641	S(`0x1e9a`), S(`0x1e9e`), S(`0x1f50`), R(`0x1f80`,`0x1f87`), R(`0x1f88`,`0x1f8f`), R(`0x1f90`,`0x1f97`), R(`0x1f98`,`0x1f9f`),
642	R(`0x1fa0`,`0x1fa7`), R(`0x1fa8`,`0x1faf`), S(`0x1fb2`), S(`0x1fb3`), S(`0x1fb4`), S(`0x1fb6`), S(`0x1fbc`), S(`0x1fc2`),
643	S(`0x1fc3`), S(`0x1fc4`), S(`0x1fc6`), S(`0x1fcc`), S(`0x1fd6`), S(`0x1fe4`), S(`0x1fe6`), S(`0x1ff2`), S(`0x1ff3`),
644	S(`0x1ff4`), S(`0x1ff6`), S(`0x1ffc`), S(`0xfb00`), S(`0xfb01`), S(`0xfb02`), S(`0xfb05`), S(`0xfb06`), S(`0xfb13`),
645	S(`0xfb14`), S(`0xfb15`), S(`0xfb16`), S(`0xfb17`)
646	};
647	static const unsigned FOLD_MAP_2_DATA[] = {
648	`0x0073`,`0x0073`, `0x0069`,`0x0307`, `0x02bc`,`0x006e`, `0x006a`,`0x030c`, `0x0565`,`0x0582`, `0x0068`,`0x0331`, `0x0074`,`0x0308`,
649	`0x0077`,`0x030a`, `0x0079`,`0x030a`, `0x0061`,`0x02be`, `0x0073`,`0x0073`, `0x03c5`,`0x0313`, `0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`,
650	`0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f60`,`0x03b9`,
651	`0x1f67`,`0x03b9`, `0x1f60`,`0x03b9`, `0x1f67`,`0x03b9`, `0x1f70`,`0x03b9`, `0x03b1`,`0x03b9`, `0x03ac`,`0x03b9`, `0x03b1`,`0x0342`,
652	`0x03b1`,`0x03b9`, `0x1f74`,`0x03b9`, `0x03b7`,`0x03b9`, `0x03ae`,`0x03b9`, `0x03b7`,`0x0342`, `0x03b7`,`0x03b9`, `0x03b9`,`0x0342`,
653	`0x03c1`,`0x0313`, `0x03c5`,`0x0342`, `0x1f7c`,`0x03b9`, `0x03c9`,`0x03b9`, `0x03ce`,`0x03b9`, `0x03c9`,`0x0342`, `0x03c9`,`0x03b9`,
654	`0x0066`,`0x0066`, `0x0066`,`0x0069`, `0x0066`,`0x006c`, `0x0073`,`0x0074`, `0x0073`,`0x0074`, `0x0574`,`0x0576`, `0x0574`,`0x0565`,
655	`0x0574`,`0x056b`, `0x057e`,`0x0576`, `0x0574`,`0x056d`
656	};
657	static const unsigned FOLD_MAP_3[] = {
658	S(`0x0390`), S(`0x03b0`), S(`0x1f52`), S(`0x1f54`), S(`0x1f56`), S(`0x1fb7`), S(`0x1fc7`), S(`0x1fd2`), S(`0x1fd3`),
659	S(`0x1fd7`), S(`0x1fe2`), S(`0x1fe3`), S(`0x1fe7`), S(`0x1ff7`), S(`0xfb03`), S(`0xfb04`)
660	};
661	static const unsigned FOLD_MAP_3_DATA[] = {
662	`0x03b9`,`0x0308`,`0x0301`, `0x03c5`,`0x0308`,`0x0301`, `0x03c5`,`0x0313`,`0x0300`, `0x03c5`,`0x0313`,`0x0301`,
663	`0x03c5`,`0x0313`,`0x0342`, `0x03b1`,`0x0342`,`0x03b9`, `0x03b7`,`0x0342`,`0x03b9`, `0x03b9`,`0x0308`,`0x0300`,
664	`0x03b9`,`0x0308`,`0x0301`, `0x03b9`,`0x0308`,`0x0342`, `0x03c5`,`0x0308`,`0x0300`, `0x03c5`,`0x0308`,`0x0301`,
665	`0x03c5`,`0x0308`,`0x0342`, `0x03c9`,`0x0342`,`0x03b9`, `0x0066`,`0x0066`,`0x0069`, `0x0066`,`0x0066`,`0x006c`
666	};
667	#undef R
668	#undef S
669	static const struct {
670	const unsigned* map;
671	const unsigned* data;
672	size_t map_size;
673	int n_codepoints;
674	} FOLD_MAP_LIST[] = {
675	{ FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), `1` },
676	{ FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), `2` },
677	{ FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), `3` }
678	};
679
680	int i;
681
682	/ Fast path for ASCII characters. /
683	if(codepoint <= `0x7f`) {
684	info->codepoints[`0`] = codepoint;
685	if(ISUPPER_(codepoint))
686	info->codepoints[`0`] += `'a'` - `'A'`;
687	info->n_codepoints = `1`;
688	return;
689	}
690
691	/ Try to locate the codepoint in any of the maps. /
692	for(i = `0`; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
693	int index;
694
695	index = md_unicode_bsearch__(codepoint, map: FOLD_MAP_LIST[i].map, map_size: FOLD_MAP_LIST[i].map_size);
696	if(index >= `0`) {
697	/ Found the mapping. /
698	int n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
699	const unsigned* map = FOLD_MAP_LIST[i].map;
700	const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
701
702	memcpy(dest: info->codepoints, src: codepoints, n: sizeof(unsigned) * n_codepoints);
703	info->n_codepoints = n_codepoints;
704
705	if(FOLD_MAP_LIST[i].map[index] != codepoint) {
706	/ The found mapping maps whole range of codepoints,*
707	* i.e. we have to offset info->codepoints[0] accordingly. */
708	if((map[index] & `0x00ffffff`)+`1` == codepoints[`0`]) {
709	/ Alternating type of the range. /
710	info->codepoints[`0`] = codepoint + ((codepoint & `0x1`) == (map[index] & `0x1`) ? `1` : `0`);
711	} else {
712	/ Range to range kind of mapping. /
713	info->codepoints[`0`] += (codepoint - (map[index] & `0x00ffffff`));
714	}
715	}
716
717	return;
718	}
719	}
720
721	/ No mapping found. Map the codepoint to itself. /
722	info->codepoints[`0`] = codepoint;
723	info->n_codepoints = `1`;
724	}
725	#endif
726
727
728	#if defined MD4C_USE_UTF16
729	#define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
730	#define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
731	#define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) \| (((unsigned)(lo) & 0x3ff) << 0)))
732
733	static unsigned
734	md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
735	{
736	if(IS_UTF16_SURROGATE_HI(str[`0`])) {
737	if(`1` < str_size && IS_UTF16_SURROGATE_LO(str[`1`])) {
738	if(p_size != NULL)
739	*p_size = `2`;
740	return UTF16_DECODE_SURROGATE(str[`0`], str[`1`]);
741	}
742	}
743
744	if(p_size != NULL)
745	*p_size = `1`;
746	return str[`0`];
747	}
748
749	static unsigned
750	md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
751	{
752	if(off > `2` && IS_UTF16_SURROGATE_HI(CH(off-`2`)) && IS_UTF16_SURROGATE_LO(CH(off-`1`)))
753	return UTF16_DECODE_SURROGATE(CH(off-`2`), CH(off-`1`));
754
755	return CH(off);
756	}
757
758	/ No whitespace uses surrogates, so no decoding needed here. /
759	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
760	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
761	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
762
763	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
764	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
765
766	static inline int
767	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
768	{
769	return md_decode_utf16le__(str+off, str_size-off, p_char_size);
770	}
771	#elif defined MD4C_USE_UTF8
772	#define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
773	#define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
774	#define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
775	#define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
776	#define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
777
778	static unsigned
779	md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
780	{
781	if(!IS_UTF8_LEAD1(str[`0`])) {
782	if(IS_UTF8_LEAD2(str[`0`])) {
783	if(`1` < str_size && IS_UTF8_TAIL(str[`1`])) {
784	if(p_size != NULL)
785	*p_size = `2`;
786
787	return (((unsigned int)str[`0`] & `0x1f`) << `6`) \|
788	(((unsigned int)str[`1`] & `0x3f`) << `0`);
789	}
790	} else if(IS_UTF8_LEAD3(str[`0`])) {
791	if(`2` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`])) {
792	if(p_size != NULL)
793	*p_size = `3`;
794
795	return (((unsigned int)str[`0`] & `0x0f`) << `12`) \|
796	(((unsigned int)str[`1`] & `0x3f`) << `6`) \|
797	(((unsigned int)str[`2`] & `0x3f`) << `0`);
798	}
799	} else if(IS_UTF8_LEAD4(str[`0`])) {
800	if(`3` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`]) && IS_UTF8_TAIL(str[`3`])) {
801	if(p_size != NULL)
802	*p_size = `4`;
803
804	return (((unsigned int)str[`0`] & `0x07`) << `18`) \|
805	(((unsigned int)str[`1`] & `0x3f`) << `12`) \|
806	(((unsigned int)str[`2`] & `0x3f`) << `6`) \|
807	(((unsigned int)str[`3`] & `0x3f`) << `0`);
808	}
809	}
810	}
811
812	if(p_size != NULL)
813	*p_size = `1`;
814	return (unsigned) str[`0`];
815	}
816
817	static unsigned
818	md_decode_utf8_before__(MD_CTX* ctx, OFF off)
819	{
820	if(!IS_UTF8_LEAD1(CH(off-`1`))) {
821	if(off > `1` && IS_UTF8_LEAD2(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
822	return (((unsigned int)CH(off-`2`) & `0x1f`) << `6`) \|
823	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
824
825	if(off > `2` && IS_UTF8_LEAD3(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
826	return (((unsigned int)CH(off-`3`) & `0x0f`) << `12`) \|
827	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
828	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
829
830	if(off > `3` && IS_UTF8_LEAD4(CH(off-`4`)) && IS_UTF8_TAIL(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
831	return (((unsigned int)CH(off-`4`) & `0x07`) << `18`) \|
832	(((unsigned int)CH(off-`3`) & `0x3f`) << `12`) \|
833	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
834	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
835	}
836
837	return (unsigned) CH(off-`1`);
838	}
839
840	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
841	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
842	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
843
844	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
845	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
846
847	static inline unsigned
848	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
849	{
850	return md_decode_utf8__(str: str+off, str_size: str_size-off, p_size: p_char_size);
851	}
852	#else
853	#define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
854	#define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
855	#define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
856
857	#define ISUNICODEPUNCT(off) ISPUNCT(off)
858	#define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
859
860	static inline void
861	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
862	{
863	info->codepoints[`0`] = codepoint;
864	if(ISUPPER_(codepoint))
865	info->codepoints[`0`] += `'a'` - `'A'`;
866	info->n_codepoints = `1`;
867	}
868
869	static inline unsigned
870	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
871	{
872	*p_size = `1`;
873	return (unsigned) str[off];
874	}
875	#endif
876
877
878	/*************************************
879	* Helper string manipulations *
880	*************************************/
881
882	/ Fill buffer with copy of the string between 'beg' and 'end' but replace any*
883	* line breaks with given replacement character.
884	*
885	* NOTE: Caller is responsible to make sure the buffer is large enough.
886	* (Given the output is always shorter then input, (end - beg) is good idea
887	* what the caller should allocate.)
888	*/
889	static void
890	md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
891	CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
892	{
893	CHAR* ptr = buffer;
894	int line_index = `0`;
895	OFF off = beg;
896
897	while(`1`) {
898	const MD_LINE* line = &lines[line_index];
899	OFF line_end = line->end;
900	if(end < line_end)
901	line_end = end;
902
903	while(off < line_end) {
904	*ptr = CH(off);
905	ptr++;
906	off++;
907	}
908
909	if(off >= end) {
910	*p_size = ptr - buffer;
911	return;
912	}
913
914	*ptr = line_break_replacement_char;
915	ptr++;
916
917	line_index++;
918	off = lines[line_index].beg;
919	}
920	}
921
922	/ Wrapper of md_merge_lines() which allocates new buffer for the output string.*
923	*/
924	static int
925	md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
926	CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
927	{
928	CHAR* buffer;
929
930	buffer = (CHAR) malloc(size: sizeof(CHAR) (end - beg));
931	if(buffer == NULL) {
932	MD_LOG("malloc() failed.");
933	return -`1`;
934	}
935
936	md_merge_lines(ctx, beg, end, lines, n_lines,
937	line_break_replacement_char, buffer, p_size);
938
939	*p_str = buffer;
940	return `0`;
941	}
942
943	static OFF
944	md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
945	{
946	SZ char_size;
947	unsigned codepoint;
948
949	while(off < size) {
950	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
951	if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
952	break;
953	off += char_size;
954	}
955
956	return off;
957	}
958
959
960	/******************************
961	* Recognizing raw HTML *
962	******************************/
963
964	/ md_is_html_tag() may be called when processing inlines (inline raw HTML)*
965	* or when breaking document to blocks (checking for start of HTML block type 7).
966	*
967	* When breaking document to blocks, we do not yet know line boundaries, but
968	* in that case the whole tag has to live on a single line. We distinguish this
969	* by n_lines == 0.
970	*/
971	static int
972	md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
973	{
974	int attr_state;
975	OFF off = beg;
976	OFF line_end = (n_lines > `0`) ? lines[`0`].end : ctx->size;
977	int i = `0`;
978
979	MD_ASSERT(CH(beg) == _T(`'<'`));
980
981	if(off + `1` >= line_end)
982	return FALSE;
983	off++;
984
985	/ For parsing attributes, we need a little state automaton below.*
986	* State -1: no attributes are allowed.
987	* State 0: attribute could follow after some whitespace.
988	* State 1: after a whitespace (attribute name may follow).
989	* State 2: after attribute name ('=' MAY follow).
990	* State 3: after '=' (value specification MUST follow).
991	* State 41: in middle of unquoted attribute value.
992	* State 42: in middle of single-quoted attribute value.
993	* State 43: in middle of double-quoted attribute value.
994	*/
995	attr_state = `0`;
996
997	if(CH(off) == _T(`'/'`)) {
998	/ Closer tag "</ ... >". No attributes may be present. /
999	attr_state = -`1`;
1000	off++;
1001	}
1002
1003	/ Tag name /
1004	if(off >= line_end \|\| !ISALPHA(off))
1005	return FALSE;
1006	off++;
1007	while(off < line_end && (ISALNUM(off) \|\| CH(off) == _T(`'-'`)))
1008	off++;
1009
1010	/ (Optional) attributes (if not closer), (optional) '/' (if not closer)*
1011	* and final '>'. */
1012	while(`1`) {
1013	while(off < line_end && !ISNEWLINE(off)) {
1014	if(attr_state > `40`) {
1015	if(attr_state == `41` && (ISBLANK(off) \|\| ISANYOF(off, _T("\"'=<>`")))) {
1016	attr_state = `0`;
1017	off--; / Put the char back for re-inspection in the new state. /
1018	} else if(attr_state == `42` && CH(off) == _T(`'\''`)) {
1019	attr_state = `0`;
1020	} else if(attr_state == `43` && CH(off) == _T(`'"'`)) {
1021	attr_state = `0`;
1022	}
1023	off++;
1024	} else if(ISWHITESPACE(off)) {
1025	if(attr_state == `0`)
1026	attr_state = `1`;
1027	off++;
1028	} else if(attr_state <= `2` && CH(off) == _T(`'>'`)) {
1029	/ End. /
1030	goto done;
1031	} else if(attr_state <= `2` && CH(off) == _T(`'/'`) && off+`1` < line_end && CH(off+`1`) == _T(`'>'`)) {
1032	/ End with digraph '/>' /
1033	off++;
1034	goto done;
1035	} else if((attr_state == `1` \|\| attr_state == `2`) && (ISALPHA(off) \|\| CH(off) == _T(`'_'`) \|\| CH(off) == _T(`':'`))) {
1036	off++;
1037	/ Attribute name /
1038	while(off < line_end && (ISALNUM(off) \|\| ISANYOF(off, _T("_.:-"))))
1039	off++;
1040	attr_state = `2`;
1041	} else if(attr_state == `2` && CH(off) == _T(`'='`)) {
1042	/ Attribute assignment sign /
1043	off++;
1044	attr_state = `3`;
1045	} else if(attr_state == `3`) {
1046	/ Expecting start of attribute value. /
1047	if(CH(off) == _T(`'"'`))
1048	attr_state = `43`;
1049	else if(CH(off) == _T(`'\''`))
1050	attr_state = `42`;
1051	else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
1052	attr_state = `41`;
1053	else
1054	return FALSE;
1055	off++;
1056	} else {
1057	/ Anything unexpected. /
1058	return FALSE;
1059	}
1060	}
1061
1062	/ We have to be on a single line. See definition of start condition*
1063	* of HTML block, type 7. */
1064	if(n_lines == `0`)
1065	return FALSE;
1066
1067	i++;
1068	if(i >= n_lines)
1069	return FALSE;
1070
1071	off = lines[i].beg;
1072	line_end = lines[i].end;
1073
1074	if(attr_state == `0` \|\| attr_state == `41`)
1075	attr_state = `1`;
1076
1077	if(off >= max_end)
1078	return FALSE;
1079	}
1080
1081	done:
1082	if(off >= max_end)
1083	return FALSE;
1084
1085	*p_end = off+`1`;
1086	return TRUE;
1087	}
1088
1089	static int
1090	md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1091	const MD_LINE* lines, int n_lines,
1092	OFF beg, OFF max_end, OFF* p_end,
1093	OFF* p_scan_horizon)
1094	{
1095	OFF off = beg;
1096	int i = `0`;
1097
1098	if(off < p_scan_horizon && p_scan_horizon >= max_end - len) {
1099	/ We have already scanned the range up to the max_end so we know*
1100	* there is nothing to see. */
1101	return FALSE;
1102	}
1103
1104	while(TRUE) {
1105	while(off + len <= lines[i].end && off + len <= max_end) {
1106	if(md_ascii_eq(STR(off), s2: str, n: len)) {
1107	/ Success. /
1108	*p_end = off + len;
1109	return TRUE;
1110	}
1111	off++;
1112	}
1113
1114	i++;
1115	if(off >= max_end \|\| i >= n_lines) {
1116	/ Failure. /
1117	*p_scan_horizon = off;
1118	return FALSE;
1119	}
1120
1121	off = lines[i].beg;
1122	}
1123	}
1124
1125	static int
1126	md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1127	{
1128	OFF off = beg;
1129
1130	MD_ASSERT(CH(beg) == _T(`'<'`));
1131
1132	if(off + `4` >= lines[`0`].end)
1133	return FALSE;
1134	if(CH(off+`1`) != _T(`'!'`) \|\| CH(off+`2`) != _T(`'-'`) \|\| CH(off+`3`) != _T(`'-'`))
1135	return FALSE;
1136	off += `4`;
1137
1138	/ ">" and "->" must not follow the opening. /
1139	if(off < lines[`0`].end && CH(off) == _T(`'>'`))
1140	return FALSE;
1141	if(off+`1` < lines[`0`].end && CH(off) == _T(`'-'`) && CH(off+`1`) == _T(`'>'`))
1142	return FALSE;
1143
1144	/ HTML comment must not contain "--", so we scan just for "--" instead*
1145	* of "-->" and verify manually that '>' follows. */
1146	if(md_scan_for_html_closer(ctx, _T("--"), len: `2`,
1147	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_comment_horizon))
1148	{
1149	if(p_end < max_end && CH(p_end) == _T(`'>'`)) {
1150	p_end = p_end + `1`;
1151	return TRUE;
1152	}
1153	}
1154
1155	return FALSE;
1156	}
1157
1158	static int
1159	md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1160	{
1161	OFF off = beg;
1162
1163	if(off + `2` >= lines[`0`].end)
1164	return FALSE;
1165	if(CH(off+`1`) != _T(`'?'`))
1166	return FALSE;
1167	off += `2`;
1168
1169	return md_scan_for_html_closer(ctx, _T("?>"), len: `2`,
1170	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_proc_instr_horizon);
1171	}
1172
1173	static int
1174	md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1175	{
1176	OFF off = beg;
1177
1178	if(off + `2` >= lines[`0`].end)
1179	return FALSE;
1180	if(CH(off+`1`) != _T(`'!'`))
1181	return FALSE;
1182	off += `2`;
1183
1184	/ Declaration name. /
1185	if(off >= lines[`0`].end \|\| !ISALPHA(off))
1186	return FALSE;
1187	off++;
1188	while(off < lines[`0`].end && ISALPHA(off))
1189	off++;
1190	if(off < lines[`0`].end && !ISWHITESPACE(off))
1191	return FALSE;
1192
1193	return md_scan_for_html_closer(ctx, _T(">"), len: `1`,
1194	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_decl_horizon);
1195	}
1196
1197	static int
1198	md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1199	{
1200	static const CHAR open_str[] = _T("<![CDATA[");
1201	static const SZ open_size = SIZEOF_ARRAY(open_str) - `1`;
1202
1203	OFF off = beg;
1204
1205	if(off + open_size >= lines[`0`].end)
1206	return FALSE;
1207	if(memcmp(STR(off), s2: open_str, n: open_size) != `0`)
1208	return FALSE;
1209	off += open_size;
1210
1211	if(lines[n_lines-`1`].end < max_end)
1212	max_end = lines[n_lines-`1`].end - `2`;
1213
1214	return md_scan_for_html_closer(ctx, _T("]]>"), len: `3`,
1215	lines, n_lines, beg: off, max_end, p_end, p_scan_horizon: &ctx->html_cdata_horizon);
1216	}
1217
1218	static int
1219	md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1220	{
1221	MD_ASSERT(CH(beg) == _T(`'<'`));
1222	return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1223	md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1224	md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1225	md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1226	md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1227	}
1228
1229
1230	/****************************
1231	* Recognizing Entity *
1232	****************************/
1233
1234	static int
1235	md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1236	{
1237	OFF off = beg;
1238
1239	while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= `8`)
1240	off++;
1241
1242	if(`1` <= off - beg && off - beg <= `6`) {
1243	*p_end = off;
1244	return TRUE;
1245	} else {
1246	return FALSE;
1247	}
1248	}
1249
1250	static int
1251	md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1252	{
1253	OFF off = beg;
1254
1255	while(off < max_end && ISDIGIT_(text[off]) && off - beg <= `8`)
1256	off++;
1257
1258	if(`1` <= off - beg && off - beg <= `7`) {
1259	*p_end = off;
1260	return TRUE;
1261	} else {
1262	return FALSE;
1263	}
1264	}
1265
1266	static int
1267	md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1268	{
1269	OFF off = beg;
1270
1271	if(off < max_end && ISALPHA_(text[off]))
1272	off++;
1273	else
1274	return FALSE;
1275
1276	while(off < max_end && ISALNUM_(text[off]) && off - beg <= `48`)
1277	off++;
1278
1279	if(`2` <= off - beg && off - beg <= `48`) {
1280	*p_end = off;
1281	return TRUE;
1282	} else {
1283	return FALSE;
1284	}
1285	}
1286
1287	static int
1288	md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1289	{
1290	int is_contents;
1291	OFF off = beg;
1292
1293	MD_ASSERT(text[off] == _T(`'&'`));
1294	off++;
1295
1296	if(off+`2` < max_end && text[off] == _T(`'#'`) && (text[off+`1`] == _T(`'x'`) \|\| text[off+`1`] == _T(`'X'`)))
1297	is_contents = md_is_hex_entity_contents(ctx, text, beg: off+`2`, max_end, p_end: &off);
1298	else if(off+`1` < max_end && text[off] == _T(`'#'`))
1299	is_contents = md_is_dec_entity_contents(ctx, text, beg: off+`1`, max_end, p_end: &off);
1300	else
1301	is_contents = md_is_named_entity_contents(ctx, text, beg: off, max_end, p_end: &off);
1302
1303	if(is_contents && off < max_end && text[off] == _T(`';'`)) {
1304	*p_end = off+`1`;
1305	return TRUE;
1306	} else {
1307	return FALSE;
1308	}
1309	}
1310
1311	static inline int
1312	md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1313	{
1314	return md_is_entity_str(ctx, text: ctx->text, beg, max_end, p_end);
1315	}
1316
1317
1318	/******************************
1319	* Attribute Management *
1320	******************************/
1321
1322	typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1323	struct MD_ATTRIBUTE_BUILD_tag {
1324	CHAR* text;
1325	MD_TEXTTYPE* substr_types;
1326	OFF* substr_offsets;
1327	int substr_count;
1328	int substr_alloc;
1329	MD_TEXTTYPE trivial_types[`1`];
1330	OFF trivial_offsets[`2`];
1331	};
1332
1333
1334	#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1335
1336	static int
1337	md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1338	MD_TEXTTYPE type, OFF off)
1339	{
1340	if(build->substr_count >= build->substr_alloc) {
1341	MD_TEXTTYPE* new_substr_types;
1342	OFF* new_substr_offsets;
1343
1344	build->substr_alloc = (build->substr_alloc > `0`
1345	? build->substr_alloc + build->substr_alloc / `2`
1346	: `8`);
1347	new_substr_types = (MD_TEXTTYPE*) realloc(ptr: build->substr_types,
1348	size: build->substr_alloc * sizeof(MD_TEXTTYPE));
1349	if(new_substr_types == NULL) {
1350	MD_LOG("realloc() failed.");
1351	return -`1`;
1352	}
1353	/ Note +1 to reserve space for final offset (== raw_size). /
1354	new_substr_offsets = (OFF*) realloc(ptr: build->substr_offsets,
1355	size: (build->substr_alloc+`1`) * sizeof(OFF));
1356	if(new_substr_offsets == NULL) {
1357	MD_LOG("realloc() failed.");
1358	free(ptr: new_substr_types);
1359	return -`1`;
1360	}
1361
1362	build->substr_types = new_substr_types;
1363	build->substr_offsets = new_substr_offsets;
1364	}
1365
1366	build->substr_types[build->substr_count] = type;
1367	build->substr_offsets[build->substr_count] = off;
1368	build->substr_count++;
1369	return `0`;
1370	}
1371
1372	static void
1373	md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1374	{
1375	if(build->substr_alloc > `0`) {
1376	free(ptr: build->text);
1377	free(ptr: build->substr_types);
1378	free(ptr: build->substr_offsets);
1379	}
1380	}
1381
1382	static int
1383	md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1384	unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1385	{
1386	OFF raw_off, off;
1387	int is_trivial;
1388	int ret = `0`;
1389
1390	memset(s: build, c: `0`, n: sizeof(MD_ATTRIBUTE_BUILD));
1391
1392	/ If there is no backslash and no ampersand, build trivial attribute*
1393	* without any malloc(). */
1394	is_trivial = TRUE;
1395	for(raw_off = `0`; raw_off < raw_size; raw_off++) {
1396	if(ISANYOF3_(raw_text[raw_off], _T(`'\\'`), _T(`'&'`), _T(`'\0'`))) {
1397	is_trivial = FALSE;
1398	break;
1399	}
1400	}
1401
1402	if(is_trivial) {
1403	build->text = (CHAR*) (raw_size ? raw_text : NULL);
1404	build->substr_types = build->trivial_types;
1405	build->substr_offsets = build->trivial_offsets;
1406	build->substr_count = `1`;
1407	build->substr_alloc = `0`;
1408	build->trivial_types[`0`] = MD_TEXT_NORMAL;
1409	build->trivial_offsets[`0`] = `0`;
1410	build->trivial_offsets[`1`] = raw_size;
1411	off = raw_size;
1412	} else {
1413	build->text = (CHAR) malloc(size: raw_size sizeof(CHAR));
1414	if(build->text == NULL) {
1415	MD_LOG("malloc() failed.");
1416	goto abort;
1417	}
1418
1419	raw_off = `0`;
1420	off = `0`;
1421
1422	while(raw_off < raw_size) {
1423	if(raw_text[raw_off] == _T(`'\0'`)) {
1424	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1425	memcpy(dest: build->text + off, src: raw_text + raw_off, n: `1`);
1426	off++;
1427	raw_off++;
1428	continue;
1429	}
1430
1431	if(raw_text[raw_off] == _T(`'&'`)) {
1432	OFF ent_end;
1433
1434	if(md_is_entity_str(ctx, text: raw_text, beg: raw_off, max_end: raw_size, p_end: &ent_end)) {
1435	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1436	memcpy(dest: build->text + off, src: raw_text + raw_off, n: ent_end - raw_off);
1437	off += ent_end - raw_off;
1438	raw_off = ent_end;
1439	continue;
1440	}
1441	}
1442
1443	if(build->substr_count == `0` \|\| build->substr_types[build->substr_count-`1`] != MD_TEXT_NORMAL)
1444	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1445
1446	if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1447	raw_text[raw_off] == _T(`'\\'`) && raw_off+`1` < raw_size &&
1448	(ISPUNCT_(raw_text[raw_off+`1`]) \|\| ISNEWLINE_(raw_text[raw_off+`1`])))
1449	raw_off++;
1450
1451	build->text[off++] = raw_text[raw_off++];
1452	}
1453	build->substr_offsets[build->substr_count] = off;
1454	}
1455
1456	attr->text = build->text;
1457	attr->size = off;
1458	attr->substr_offsets = build->substr_offsets;
1459	attr->substr_types = build->substr_types;
1460	return `0`;
1461
1462	abort:
1463	md_free_attribute(ctx, build);
1464	return -`1`;
1465	}
1466
1467
1468	/*********************************************
1469	* Dictionary of Reference Definitions *
1470	*********************************************/
1471
1472	#define MD_FNV1A_BASE 2166136261U
1473	#define MD_FNV1A_PRIME 16777619U
1474
1475	static inline unsigned
1476	md_fnv1a(unsigned base, const void* data, size_t n)
1477	{
1478	const unsigned char* buf = (const unsigned char*) data;
1479	unsigned hash = base;
1480	size_t i;
1481
1482	for(i = `0`; i < n; i++) {
1483	hash ^= buf[i];
1484	hash *= MD_FNV1A_PRIME;
1485	}
1486
1487	return hash;
1488	}
1489
1490
1491	struct MD_REF_DEF_tag {
1492	CHAR* label;
1493	CHAR* title;
1494	unsigned hash;
1495	SZ label_size;
1496	SZ title_size;
1497	OFF dest_beg;
1498	OFF dest_end;
1499	unsigned char label_needs_free : `1`;
1500	unsigned char title_needs_free : `1`;
1501	};
1502
1503	/ Label equivalence is quite complicated with regards to whitespace and case*
1504	* folding. This complicates computing a hash of it as well as direct comparison
1505	* of two labels. */
1506
1507	static unsigned
1508	md_link_label_hash(const CHAR* label, SZ size)
1509	{
1510	unsigned hash = MD_FNV1A_BASE;
1511	OFF off;
1512	unsigned codepoint;
1513	int is_whitespace = FALSE;
1514
1515	off = md_skip_unicode_whitespace(label, off: `0`, size);
1516	while(off < size) {
1517	SZ char_size;
1518
1519	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
1520	is_whitespace = ISUNICODEWHITESPACE_(codepoint) \|\| ISNEWLINE_(label[off]);
1521
1522	if(is_whitespace) {
1523	codepoint = `' '`;
1524	hash = md_fnv1a(base: hash, data: &codepoint, n: sizeof(unsigned));
1525	off = md_skip_unicode_whitespace(label, off, size);
1526	} else {
1527	MD_UNICODE_FOLD_INFO fold_info;
1528
1529	md_get_unicode_fold_info(codepoint, info: &fold_info);
1530	hash = md_fnv1a(base: hash, data: fold_info.codepoints, n: fold_info.n_codepoints * sizeof(unsigned));
1531	off += char_size;
1532	}
1533	}
1534
1535	return hash;
1536	}
1537
1538	static OFF
1539	md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1540	MD_UNICODE_FOLD_INFO* fold_info)
1541	{
1542	unsigned codepoint;
1543	SZ char_size;
1544
1545	if(off >= size) {
1546	/ Treat end of a link label as a whitespace. /
1547	goto whitespace;
1548	}
1549
1550	if(ISNEWLINE_(label[off])) {
1551	/ Treat new lines as a whitespace. /
1552	off++;
1553	goto whitespace;
1554	}
1555
1556	codepoint = md_decode_unicode(str: label, off, str_size: size, p_char_size: &char_size);
1557	off += char_size;
1558	if(ISUNICODEWHITESPACE_(codepoint)) {
1559	/ Treat all whitespace as equivalent /
1560	goto whitespace;
1561	}
1562
1563	/ Get real folding info. /
1564	md_get_unicode_fold_info(codepoint, info: fold_info);
1565	return off;
1566
1567	whitespace:
1568	fold_info->codepoints[`0`] = _T(`' '`);
1569	fold_info->n_codepoints = `1`;
1570	return md_skip_unicode_whitespace(label, off, size);
1571	}
1572
1573	static int
1574	md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1575	{
1576	OFF a_off;
1577	OFF b_off;
1578	int a_reached_end = FALSE;
1579	int b_reached_end = FALSE;
1580	MD_UNICODE_FOLD_INFO a_fi = { { `0` }, `0` };
1581	MD_UNICODE_FOLD_INFO b_fi = { { `0` }, `0` };
1582	OFF a_fi_off = `0`;
1583	OFF b_fi_off = `0`;
1584	int cmp;
1585
1586	a_off = md_skip_unicode_whitespace(label: a_label, off: `0`, size: a_size);
1587	b_off = md_skip_unicode_whitespace(label: b_label, off: `0`, size: b_size);
1588	while(!a_reached_end \|\| !b_reached_end) {
1589	/ If needed, load fold info for next char. /
1590	if(a_fi_off >= a_fi.n_codepoints) {
1591	a_fi_off = `0`;
1592	a_off = md_link_label_cmp_load_fold_info(label: a_label, off: a_off, size: a_size, fold_info: &a_fi);
1593	a_reached_end = (a_off >= a_size);
1594	}
1595	if(b_fi_off >= b_fi.n_codepoints) {
1596	b_fi_off = `0`;
1597	b_off = md_link_label_cmp_load_fold_info(label: b_label, off: b_off, size: b_size, fold_info: &b_fi);
1598	b_reached_end = (b_off >= b_size);
1599	}
1600
1601	cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1602	if(cmp != `0`)
1603	return cmp;
1604
1605	a_fi_off++;
1606	b_fi_off++;
1607	}
1608
1609	return `0`;
1610	}
1611
1612	typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1613	struct MD_REF_DEF_LIST_tag {
1614	int n_ref_defs;
1615	int alloc_ref_defs;
1616	MD_REF_DEF* ref_defs[]; / Valid items always point into ctx->ref_defs[] /
1617	};
1618
1619	static int
1620	md_ref_def_cmp(const void* a, const void* b)
1621	{
1622	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1623	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1624
1625	if(a_ref->hash < b_ref->hash)
1626	return -`1`;
1627	else if(a_ref->hash > b_ref->hash)
1628	return +`1`;
1629	else
1630	return md_link_label_cmp(a_label: a_ref->label, a_size: a_ref->label_size, b_label: b_ref->label, b_size: b_ref->label_size);
1631	}
1632
1633	static int
1634	md_ref_def_cmp_for_sort(const void* a, const void* b)
1635	{
1636	int cmp;
1637
1638	cmp = md_ref_def_cmp(a, b);
1639
1640	/ Ensure stability of the sorting. /
1641	if(cmp == `0`) {
1642	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1643	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1644
1645	if(a_ref < b_ref)
1646	cmp = -`1`;
1647	else if(a_ref > b_ref)
1648	cmp = +`1`;
1649	else
1650	cmp = `0`;
1651	}
1652
1653	return cmp;
1654	}
1655
1656	static int
1657	md_build_ref_def_hashtable(MD_CTX* ctx)
1658	{
1659	int i, j;
1660
1661	if(ctx->n_ref_defs == `0`)
1662	return `0`;
1663
1664	ctx->ref_def_hashtable_size = (ctx->n_ref_defs * `5`) / `4`;
1665	ctx->ref_def_hashtable = malloc(size: ctx->ref_def_hashtable_size * sizeof(void*));
1666	if(ctx->ref_def_hashtable == NULL) {
1667	MD_LOG("malloc() failed.");
1668	goto abort;
1669	}
1670	memset(s: ctx->ref_def_hashtable, c: `0`, n: ctx->ref_def_hashtable_size * sizeof(void*));
1671
1672	/ Each member of ctx->ref_def_hashtable[] can be:*
1673	* -- NULL,
1674	* -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1675	* -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1676	* such MD_REF_DEFs.
1677	*/
1678	for(i = `0`; i < ctx->n_ref_defs; i++) {
1679	MD_REF_DEF* def = &ctx->ref_defs[i];
1680	void* bucket;
1681	MD_REF_DEF_LIST* list;
1682
1683	def->hash = md_link_label_hash(label: def->label, size: def->label_size);
1684	bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1685
1686	if(bucket == NULL) {
1687	/ The bucket is empty. Make it just point to the def. /
1688	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1689	continue;
1690	}
1691
1692	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1693	/ The bucket already contains one ref. def. Lets see whether it*
1694	* is the same label (ref. def. duplicate) or different one
1695	* (hash conflict). */
1696	MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1697
1698	if(md_link_label_cmp(a_label: def->label, a_size: def->label_size, b_label: old_def->label, b_size: old_def->label_size) == `0`) {
1699	/ Duplicate label: Ignore this ref. def. /
1700	continue;
1701	}
1702
1703	/ Make the bucket complex, i.e. able to hold more ref. defs. /
1704	list = (MD_REF_DEF_LIST) malloc(size: sizeof(MD_REF_DEF_LIST) + `2` sizeof(MD_REF_DEF*));
1705	if(list == NULL) {
1706	MD_LOG("malloc() failed.");
1707	goto abort;
1708	}
1709	list->ref_defs[`0`] = old_def;
1710	list->ref_defs[`1`] = def;
1711	list->n_ref_defs = `2`;
1712	list->alloc_ref_defs = `2`;
1713	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1714	continue;
1715	}
1716
1717	/ Append the def to the complex bucket list.*
1718	*
1719	* Note in this case we ignore potential duplicates to avoid expensive
1720	* iterating over the complex bucket. Below, we revisit all the complex
1721	* buckets and handle it more cheaply after the complex bucket contents
1722	* is sorted. */
1723	list = (MD_REF_DEF_LIST*) bucket;
1724	if(list->n_ref_defs >= list->alloc_ref_defs) {
1725	int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / `2`;
1726	MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(ptr: list,
1727	size: sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1728	if(list_tmp == NULL) {
1729	MD_LOG("realloc() failed.");
1730	goto abort;
1731	}
1732	list = list_tmp;
1733	list->alloc_ref_defs = alloc_ref_defs;
1734	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1735	}
1736
1737	list->ref_defs[list->n_ref_defs] = def;
1738	list->n_ref_defs++;
1739	}
1740
1741	/ Sort the complex buckets so we can use bsearch() with them. /
1742	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1743	void* bucket = ctx->ref_def_hashtable[i];
1744	MD_REF_DEF_LIST* list;
1745
1746	if(bucket == NULL)
1747	continue;
1748	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1749	continue;
1750
1751	list = (MD_REF_DEF_LIST*) bucket;
1752	qsort(base: list->ref_defs, nmemb: list->n_ref_defs, size: sizeof(MD_REF_DEF*), compar: md_ref_def_cmp_for_sort);
1753
1754	/ Disable all duplicates in the complex bucket by forcing all such*
1755	* records to point to the 1st such ref. def. I.e. no matter which
1756	* record is found during the lookup, it will always point to the right
1757	* ref. def. in ctx->ref_defs[]. */
1758	for(j = `1`; j < list->n_ref_defs; j++) {
1759	if(md_ref_def_cmp(a: &list->ref_defs[j-`1`], b: &list->ref_defs[j]) == `0`)
1760	list->ref_defs[j] = list->ref_defs[j-`1`];
1761	}
1762	}
1763
1764	return `0`;
1765
1766	abort:
1767	return -`1`;
1768	}
1769
1770	static void
1771	md_free_ref_def_hashtable(MD_CTX* ctx)
1772	{
1773	if(ctx->ref_def_hashtable != NULL) {
1774	int i;
1775
1776	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1777	void* bucket = ctx->ref_def_hashtable[i];
1778	if(bucket == NULL)
1779	continue;
1780	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1781	continue;
1782	free(ptr: bucket);
1783	}
1784
1785	free(ptr: ctx->ref_def_hashtable);
1786	}
1787	}
1788
1789	static const MD_REF_DEF*
1790	md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1791	{
1792	unsigned hash;
1793	void* bucket;
1794
1795	if(ctx->ref_def_hashtable_size == `0`)
1796	return NULL;
1797
1798	hash = md_link_label_hash(label, size: label_size);
1799	bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1800
1801	if(bucket == NULL) {
1802	return NULL;
1803	} else if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1804	const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1805
1806	if(md_link_label_cmp(a_label: def->label, a_size: def->label_size, b_label: label, b_size: label_size) == `0`)
1807	return def;
1808	else
1809	return NULL;
1810	} else {
1811	MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1812	MD_REF_DEF key_buf;
1813	const MD_REF_DEF* key = &key_buf;
1814	const MD_REF_DEF** ret;
1815
1816	key_buf.label = (CHAR*) label;
1817	key_buf.label_size = label_size;
1818	key_buf.hash = md_link_label_hash(label: key_buf.label, size: key_buf.label_size);
1819
1820	ret = (const MD_REF_DEF**) bsearch(key: &key, base: list->ref_defs,
1821	nmemb: list->n_ref_defs, size: sizeof(MD_REF_DEF*), compar: md_ref_def_cmp);
1822	if(ret != NULL)
1823	return *ret;
1824	else
1825	return NULL;
1826	}
1827	}
1828
1829
1830	/***************************
1831	* Recognizing Links *
1832	***************************/
1833
1834	/ Note this code is partially shared between processing inlines and blocks*
1835	* as reference definitions and links share some helper parser functions.
1836	*/
1837
1838	typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1839	struct MD_LINK_ATTR_tag {
1840	OFF dest_beg;
1841	OFF dest_end;
1842
1843	CHAR* title;
1844	SZ title_size;
1845	int title_needs_free;
1846	};
1847
1848
1849	static int
1850	md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1851	OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1852	OFF* p_contents_beg, OFF* p_contents_end)
1853	{
1854	OFF off = beg;
1855	OFF contents_beg = `0`;
1856	OFF contents_end = `0`;
1857	int line_index = `0`;
1858	int len = `0`;
1859
1860	if(CH(off) != _T(`'['`))
1861	return FALSE;
1862	off++;
1863
1864	while(`1`) {
1865	OFF line_end = lines[line_index].end;
1866
1867	while(off < line_end) {
1868	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
1869	if(contents_end == `0`) {
1870	contents_beg = off;
1871	*p_beg_line_index = line_index;
1872	}
1873	contents_end = off + `2`;
1874	off += `2`;
1875	} else if(CH(off) == _T(`'['`)) {
1876	return FALSE;
1877	} else if(CH(off) == _T(`']'`)) {
1878	if(contents_beg < contents_end) {
1879	/ Success. /
1880	*p_contents_beg = contents_beg;
1881	*p_contents_end = contents_end;
1882	*p_end = off+`1`;
1883	*p_end_line_index = line_index;
1884	return TRUE;
1885	} else {
1886	/ Link label must have some non-whitespace contents. /
1887	return FALSE;
1888	}
1889	} else {
1890	unsigned codepoint;
1891	SZ char_size;
1892
1893	codepoint = md_decode_unicode(str: ctx->text, off, str_size: ctx->size, p_char_size: &char_size);
1894	if(!ISUNICODEWHITESPACE_(codepoint)) {
1895	if(contents_end == `0`) {
1896	contents_beg = off;
1897	*p_beg_line_index = line_index;
1898	}
1899	contents_end = off + char_size;
1900	}
1901
1902	off += char_size;
1903	}
1904
1905	len++;
1906	if(len > `999`)
1907	return FALSE;
1908	}
1909
1910	line_index++;
1911	len++;
1912	if(line_index < n_lines)
1913	off = lines[line_index].beg;
1914	else
1915	break;
1916	}
1917
1918	return FALSE;
1919	}
1920
1921	static int
1922	md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1923	OFF* p_contents_beg, OFF* p_contents_end)
1924	{
1925	OFF off = beg;
1926
1927	if(off >= max_end \|\| CH(off) != _T(`'<'`))
1928	return FALSE;
1929	off++;
1930
1931	while(off < max_end) {
1932	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
1933	off += `2`;
1934	continue;
1935	}
1936
1937	if(ISNEWLINE(off) \|\| CH(off) == _T(`'<'`))
1938	return FALSE;
1939
1940	if(CH(off) == _T(`'>'`)) {
1941	/ Success. /
1942	*p_contents_beg = beg+`1`;
1943	*p_contents_end = off;
1944	*p_end = off+`1`;
1945	return TRUE;
1946	}
1947
1948	off++;
1949	}
1950
1951	return FALSE;
1952	}
1953
1954	static int
1955	md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1956	OFF* p_contents_beg, OFF* p_contents_end)
1957	{
1958	OFF off = beg;
1959	int parenthesis_level = `0`;
1960
1961	while(off < max_end) {
1962	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
1963	off += `2`;
1964	continue;
1965	}
1966
1967	if(ISWHITESPACE(off) \|\| ISCNTRL(off))
1968	break;
1969
1970	/ Link destination may include balanced pairs of unescaped '(' ')'.*
1971	* Note we limit the maximal nesting level by 32 to protect us from
1972	* https://github.com/jgm/cmark/issues/214 */
1973	if(CH(off) == _T(`'('`)) {
1974	parenthesis_level++;
1975	if(parenthesis_level > `32`)
1976	return FALSE;
1977	} else if(CH(off) == _T(`')'`)) {
1978	if(parenthesis_level == `0`)
1979	break;
1980	parenthesis_level--;
1981	}
1982
1983	off++;
1984	}
1985
1986	if(parenthesis_level != `0` \|\| off == beg)
1987	return FALSE;
1988
1989	/ Success. /
1990	*p_contents_beg = beg;
1991	*p_contents_end = off;
1992	*p_end = off;
1993	return TRUE;
1994	}
1995
1996	static inline int
1997	md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1998	OFF* p_contents_beg, OFF* p_contents_end)
1999	{
2000	if(CH(beg) == _T(`'<'`))
2001	return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2002	else
2003	return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2004	}
2005
2006	static int
2007	md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2008	OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2009	OFF* p_contents_beg, OFF* p_contents_end)
2010	{
2011	OFF off = beg;
2012	CHAR closer_char;
2013	int line_index = `0`;
2014
2015	/ White space with up to one line break. /
2016	while(off < lines[line_index].end && ISWHITESPACE(off))
2017	off++;
2018	if(off >= lines[line_index].end) {
2019	line_index++;
2020	if(line_index >= n_lines)
2021	return FALSE;
2022	off = lines[line_index].beg;
2023	}
2024	if(off == beg)
2025	return FALSE;
2026
2027	*p_beg_line_index = line_index;
2028
2029	/ First char determines how to detect end of it. /
2030	switch(CH(off)) {
2031	case _T(`'"'`): closer_char = _T(`'"'`); break;
2032	case _T(`'\''`): closer_char = _T(`'\''`); break;
2033	case _T(`'('`): closer_char = _T(`')'`); break;
2034	default: return FALSE;
2035	}
2036	off++;
2037
2038	*p_contents_beg = off;
2039
2040	while(line_index < n_lines) {
2041	OFF line_end = lines[line_index].end;
2042
2043	while(off < line_end) {
2044	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
2045	off++;
2046	} else if(CH(off) == closer_char) {
2047	/ Success. /
2048	*p_contents_end = off;
2049	*p_end = off+`1`;
2050	*p_end_line_index = line_index;
2051	return TRUE;
2052	} else if(closer_char == _T(`')'`) && CH(off) == _T(`'('`)) {
2053	/ ()-style title cannot contain (unescaped '(')) /
2054	return FALSE;
2055	}
2056
2057	off++;
2058	}
2059
2060	line_index++;
2061	}
2062
2063	return FALSE;
2064	}
2065
2066	/ Returns 0 if it is not a reference definition.*
2067	*
2068	* Returns N > 0 if it is a reference definition. N then corresponds to the
2069	* number of lines forming it). In this case the definition is stored for
2070	* resolving any links referring to it.
2071	*
2072	* Returns -1 in case of an error (out of memory).
2073	*/
2074	static int
2075	md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2076	{
2077	OFF label_contents_beg;
2078	OFF label_contents_end;
2079	int label_contents_line_index = -`1`;
2080	int label_is_multiline = FALSE;
2081	OFF dest_contents_beg;
2082	OFF dest_contents_end;
2083	OFF title_contents_beg;
2084	OFF title_contents_end;
2085	int title_contents_line_index;
2086	int title_is_multiline = FALSE;
2087	OFF off;
2088	int line_index = `0`;
2089	int tmp_line_index;
2090	MD_REF_DEF* def = NULL;
2091	int ret = `0`;
2092
2093	/ Link label. /
2094	if(!md_is_link_label(ctx, lines, n_lines, beg: lines[`0`].beg,
2095	p_end: &off, p_beg_line_index: &label_contents_line_index, p_end_line_index: &line_index,
2096	p_contents_beg: &label_contents_beg, p_contents_end: &label_contents_end))
2097	return FALSE;
2098	label_is_multiline = (label_contents_line_index != line_index);
2099
2100	/ Colon. /
2101	if(off >= lines[line_index].end \|\| CH(off) != _T(`':'`))
2102	return FALSE;
2103	off++;
2104
2105	/ Optional white space with up to one line break. /
2106	while(off < lines[line_index].end && ISWHITESPACE(off))
2107	off++;
2108	if(off >= lines[line_index].end) {
2109	line_index++;
2110	if(line_index >= n_lines)
2111	return FALSE;
2112	off = lines[line_index].beg;
2113	}
2114
2115	/ Link destination. /
2116	if(!md_is_link_destination(ctx, beg: off, max_end: lines[line_index].end,
2117	p_end: &off, p_contents_beg: &dest_contents_beg, p_contents_end: &dest_contents_end))
2118	return FALSE;
2119
2120	/ (Optional) title. Note we interpret it as an title only if nothing*
2121	* more follows on its last line. */
2122	if(md_is_link_title(ctx, lines: lines + line_index, n_lines: n_lines - line_index, beg: off,
2123	p_end: &off, p_beg_line_index: &title_contents_line_index, p_end_line_index: &tmp_line_index,
2124	p_contents_beg: &title_contents_beg, p_contents_end: &title_contents_end)
2125	&& off >= lines[line_index + tmp_line_index].end)
2126	{
2127	title_is_multiline = (tmp_line_index != title_contents_line_index);
2128	title_contents_line_index += line_index;
2129	line_index += tmp_line_index;
2130	} else {
2131	/ Not a title. /
2132	title_is_multiline = FALSE;
2133	title_contents_beg = off;
2134	title_contents_end = off;
2135	title_contents_line_index = `0`;
2136	}
2137
2138	/ Nothing more can follow on the last line. /
2139	if(off < lines[line_index].end)
2140	return FALSE;
2141
2142	/ So, it _is_ a reference definition. Remember it. /
2143	if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2144	MD_REF_DEF* new_defs;
2145
2146	ctx->alloc_ref_defs = (ctx->alloc_ref_defs > `0`
2147	? ctx->alloc_ref_defs + ctx->alloc_ref_defs / `2`
2148	: `16`);
2149	new_defs = (MD_REF_DEF) realloc(ptr: ctx->ref_defs, size: ctx->alloc_ref_defs sizeof(MD_REF_DEF));
2150	if(new_defs == NULL) {
2151	MD_LOG("realloc() failed.");
2152	goto abort;
2153	}
2154
2155	ctx->ref_defs = new_defs;
2156	}
2157	def = &ctx->ref_defs[ctx->n_ref_defs];
2158	memset(s: def, c: `0`, n: sizeof(MD_REF_DEF));
2159
2160	if(label_is_multiline) {
2161	MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2162	lines + label_contents_line_index, n_lines - label_contents_line_index,
2163	_T(`' '`), &def->label, &def->label_size));
2164	def->label_needs_free = TRUE;
2165	} else {
2166	def->label = (CHAR*) STR(label_contents_beg);
2167	def->label_size = label_contents_end - label_contents_beg;
2168	}
2169
2170	if(title_is_multiline) {
2171	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2172	lines + title_contents_line_index, n_lines - title_contents_line_index,
2173	_T(`'\n'`), &def->title, &def->title_size));
2174	def->title_needs_free = TRUE;
2175	} else {
2176	def->title = (CHAR*) STR(title_contents_beg);
2177	def->title_size = title_contents_end - title_contents_beg;
2178	}
2179
2180	def->dest_beg = dest_contents_beg;
2181	def->dest_end = dest_contents_end;
2182
2183	/ Success. /
2184	ctx->n_ref_defs++;
2185	return line_index + `1`;
2186
2187	abort:
2188	/ Failure. /
2189	if(def != NULL && def->label_needs_free)
2190	free(ptr: def->label);
2191	if(def != NULL && def->title_needs_free)
2192	free(ptr: def->title);
2193	return ret;
2194	}
2195
2196	static int
2197	md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2198	OFF beg, OFF end, MD_LINK_ATTR* attr)
2199	{
2200	const MD_REF_DEF* def;
2201	const MD_LINE* beg_line;
2202	const MD_LINE* end_line;
2203	CHAR* label;
2204	SZ label_size;
2205	int ret;
2206
2207	MD_ASSERT(CH(beg) == _T(`'['`) \|\| CH(beg) == _T(`'!'`));
2208	MD_ASSERT(CH(end-`1`) == _T(`']'`));
2209
2210	beg += (CH(beg) == _T(`'!'`) ? `2` : `1`);
2211	end--;
2212
2213	/ Find lines corresponding to the beg and end positions. /
2214	MD_ASSERT(lines[`0`].beg <= beg);
2215	beg_line = lines;
2216	while(beg >= beg_line->end)
2217	beg_line++;
2218
2219	MD_ASSERT(end <= lines[n_lines-`1`].end);
2220	end_line = beg_line;
2221	while(end >= end_line->end)
2222	end_line++;
2223
2224	if(beg_line != end_line) {
2225	MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2226	n_lines - (beg_line - lines), _T(`' '`), &label, &label_size));
2227	} else {
2228	label = (CHAR*) STR(beg);
2229	label_size = end - beg;
2230	}
2231
2232	def = md_lookup_ref_def(ctx, label, label_size);
2233	if(def != NULL) {
2234	attr->dest_beg = def->dest_beg;
2235	attr->dest_end = def->dest_end;
2236	attr->title = def->title;
2237	attr->title_size = def->title_size;
2238	attr->title_needs_free = FALSE;
2239	}
2240
2241	if(beg_line != end_line)
2242	free(ptr: label);
2243
2244	ret = (def != NULL);
2245
2246	abort:
2247	return ret;
2248	}
2249
2250	static int
2251	md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2252	OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2253	{
2254	int line_index = `0`;
2255	int tmp_line_index;
2256	OFF title_contents_beg;
2257	OFF title_contents_end;
2258	int title_contents_line_index;
2259	int title_is_multiline;
2260	OFF off = beg;
2261	int ret = FALSE;
2262
2263	while(off >= lines[line_index].end)
2264	line_index++;
2265
2266	MD_ASSERT(CH(off) == _T(`'('`));
2267	off++;
2268
2269	/ Optional white space with up to one line break. /
2270	while(off < lines[line_index].end && ISWHITESPACE(off))
2271	off++;
2272	if(off >= lines[line_index].end && ISNEWLINE(off)) {
2273	line_index++;
2274	if(line_index >= n_lines)
2275	return FALSE;
2276	off = lines[line_index].beg;
2277	}
2278
2279	/ Link destination may be omitted, but only when not also having a title. /
2280	if(off < ctx->size && CH(off) == _T(`')'`)) {
2281	attr->dest_beg = off;
2282	attr->dest_end = off;
2283	attr->title = NULL;
2284	attr->title_size = `0`;
2285	attr->title_needs_free = FALSE;
2286	off++;
2287	*p_end = off;
2288	return TRUE;
2289	}
2290
2291	/ Link destination. /
2292	if(!md_is_link_destination(ctx, beg: off, max_end: lines[line_index].end,
2293	p_end: &off, p_contents_beg: &attr->dest_beg, p_contents_end: &attr->dest_end))
2294	return FALSE;
2295
2296	/ (Optional) title. /
2297	if(md_is_link_title(ctx, lines: lines + line_index, n_lines: n_lines - line_index, beg: off,
2298	p_end: &off, p_beg_line_index: &title_contents_line_index, p_end_line_index: &tmp_line_index,
2299	p_contents_beg: &title_contents_beg, p_contents_end: &title_contents_end))
2300	{
2301	title_is_multiline = (tmp_line_index != title_contents_line_index);
2302	title_contents_line_index += line_index;
2303	line_index += tmp_line_index;
2304	} else {
2305	/ Not a title. /
2306	title_is_multiline = FALSE;
2307	title_contents_beg = off;
2308	title_contents_end = off;
2309	title_contents_line_index = `0`;
2310	}
2311
2312	/ Optional whitespace followed with final ')'. /
2313	while(off < lines[line_index].end && ISWHITESPACE(off))
2314	off++;
2315	if(off >= lines[line_index].end && ISNEWLINE(off)) {
2316	line_index++;
2317	if(line_index >= n_lines)
2318	return FALSE;
2319	off = lines[line_index].beg;
2320	}
2321	if(CH(off) != _T(`')'`))
2322	goto abort;
2323	off++;
2324
2325	if(title_contents_beg >= title_contents_end) {
2326	attr->title = NULL;
2327	attr->title_size = `0`;
2328	attr->title_needs_free = FALSE;
2329	} else if(!title_is_multiline) {
2330	attr->title = (CHAR*) STR(title_contents_beg);
2331	attr->title_size = title_contents_end - title_contents_beg;
2332	attr->title_needs_free = FALSE;
2333	} else {
2334	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2335	lines + title_contents_line_index, n_lines - title_contents_line_index,
2336	_T(`'\n'`), &attr->title, &attr->title_size));
2337	attr->title_needs_free = TRUE;
2338	}
2339
2340	*p_end = off;
2341	ret = TRUE;
2342
2343	abort:
2344	return ret;
2345	}
2346
2347	static void
2348	md_free_ref_defs(MD_CTX* ctx)
2349	{
2350	int i;
2351
2352	for(i = `0`; i < ctx->n_ref_defs; i++) {
2353	MD_REF_DEF* def = &ctx->ref_defs[i];
2354
2355	if(def->label_needs_free)
2356	free(ptr: def->label);
2357	if(def->title_needs_free)
2358	free(ptr: def->title);
2359	}
2360
2361	free(ptr: ctx->ref_defs);
2362	}
2363
2364
2365	/******************************************
2366	* Processing Inlines (a.k.a Spans) *
2367	******************************************/
2368
2369	/ We process inlines in few phases:*
2370	*
2371	* (1) We go through the block text and collect all significant characters
2372	* which may start/end a span or some other significant position into
2373	* ctx->marks[]. Core of this is what md_collect_marks() does.
2374	*
2375	* We also do some very brief preliminary context-less analysis, whether
2376	* it might be opener or closer (e.g. of an emphasis span).
2377	*
2378	* This speeds the other steps as we do not need to re-iterate over all
2379	* characters anymore.
2380	*
2381	* (2) We analyze each potential mark types, in order by their precedence.
2382	*
2383	* In each md_analyze_XXX() function, we re-iterate list of the marks,
2384	* skipping already resolved regions (in preceding precedences) and try to
2385	* resolve them.
2386	*
2387	* (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2388	* them as resolved.
2389	*
2390	* (2.2) For range-type marks, we analyze whether the mark could be closer
2391	* and, if yes, whether there is some preceding opener it could satisfy.
2392	*
2393	* If not we check whether it could be really an opener and if yes, we
2394	* remember it so subsequent closers may resolve it.
2395	*
2396	* (3) Finally, when all marks were analyzed, we render the block contents
2397	* by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2398	* or ::close_span() whenever we reach a resolved mark.
2399	*/
2400
2401
2402	/ The mark structure.*
2403	*
2404	* '\\': Maybe escape sequence.
2405	* '\0': NULL char.
2406	* '*': Maybe (strong) emphasis start/end.
2407	* '_': Maybe (strong) emphasis start/end.
2408	* '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2409	* '`': Maybe code span start/end.
2410	* '&': Maybe start of entity.
2411	* ';': Maybe end of entity.
2412	* '<': Maybe start of raw HTML or autolink.
2413	* '>': Maybe end of raw HTML or autolink.
2414	* '[': Maybe start of link label or link text.
2415	* '!': Equivalent of '[' for image.
2416	* ']': Maybe end of link label or link text.
2417	* '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2418	* ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2419	* '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2420	* 'D': Dummy mark, it reserves a space for splitting a previous mark
2421	* (e.g. emphasis) or to make more space for storing some special data
2422	* related to the preceding mark (e.g. link).
2423	*
2424	* Note that not all instances of these chars in the text imply creation of the
2425	* structure. Only those which have (or may have, after we see more context)
2426	* the special meaning.
2427	*
2428	* (Keep this struct as small as possible to fit as much of them into CPU
2429	* cache line.)
2430	*/
2431	struct MD_MARK_tag {
2432	OFF beg;
2433	OFF end;
2434
2435	/ For unresolved openers, 'prev' and 'next' form the chain of open openers*
2436	* of given type 'ch'.
2437	*
2438	* During resolving, we disconnect from the chain and point to the
2439	* corresponding counterpart so opener points to its closer and vice versa.
2440	*/
2441	int prev;
2442	int next;
2443	CHAR ch;
2444	unsigned char flags;
2445	};
2446
2447	/ Mark flags (these apply to ALL mark types). /
2448	#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
2449	#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
2450	#define MD_MARK_OPENER 0x04 /* Definitely opener. */
2451	#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
2452	#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
2453
2454	/ Mark flags specific for various mark types (so they can share bits). /
2455	#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
2456	#define MD_MARK_EMPH_MOD3_0 0x40
2457	#define MD_MARK_EMPH_MOD3_1 0x80
2458	#define MD_MARK_EMPH_MOD3_2 (0x40 \| 0x80)
2459	#define MD_MARK_EMPH_MOD3_MASK (0x40 \| 0x80)
2460	#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
2461	#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
2462
2463	static MD_MARKCHAIN*
2464	md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2465	{
2466	switch(flags & (MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_MASK)) {
2467	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0;
2468	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1;
2469	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2;
2470	case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0;
2471	case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1;
2472	case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2;
2473	default: MD_UNREACHABLE();
2474	}
2475	return NULL;
2476	}
2477
2478	static MD_MARKCHAIN*
2479	md_mark_chain(MD_CTX* ctx, int mark_index)
2480	{
2481	MD_MARK* mark = &ctx->marks[mark_index];
2482
2483	switch(mark->ch) {
2484	case _T(`''`): return* md_asterisk_chain(ctx, flags: mark->flags);
2485	case _T(`'_'`): return &UNDERSCORE_OPENERS;
2486	case _T(`'~'`): return (mark->end - mark->beg == `1`) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2487	case _T(`'['`): return &BRACKET_OPENERS;
2488	case _T(`'\|'`): return &TABLECELLBOUNDARIES;
2489	default: return NULL;
2490	}
2491	}
2492
2493	static MD_MARK*
2494	md_push_mark(MD_CTX* ctx)
2495	{
2496	if(ctx->n_marks >= ctx->alloc_marks) {
2497	MD_MARK* new_marks;
2498
2499	ctx->alloc_marks = (ctx->alloc_marks > `0`
2500	? ctx->alloc_marks + ctx->alloc_marks / `2`
2501	: `64`);
2502	new_marks = realloc(ptr: ctx->marks, size: ctx->alloc_marks * sizeof(MD_MARK));
2503	if(new_marks == NULL) {
2504	MD_LOG("realloc() failed.");
2505	return NULL;
2506	}
2507
2508	ctx->marks = new_marks;
2509	}
2510
2511	return &ctx->marks[ctx->n_marks++];
2512	}
2513
2514	#define PUSH_MARK_() \
2515	do { \
2516	mark = md_push_mark(ctx); \
2517	if(mark == NULL) { \
2518	ret = -1; \
2519	goto abort; \
2520	} \
2521	} while(0)
2522
2523	#define PUSH_MARK(ch_, beg_, end_, flags_) \
2524	do { \
2525	PUSH_MARK_(); \
2526	mark->beg = (beg_); \
2527	mark->end = (end_); \
2528	mark->prev = -1; \
2529	mark->next = -1; \
2530	mark->ch = (char)(ch_); \
2531	mark->flags = (flags_); \
2532	} while(0)
2533
2534
2535	static void
2536	md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2537	{
2538	if(chain->tail >= `0`)
2539	ctx->marks[chain->tail].next = mark_index;
2540	else
2541	chain->head = mark_index;
2542
2543	ctx->marks[mark_index].prev = chain->tail;
2544	ctx->marks[mark_index].next = -`1`;
2545	chain->tail = mark_index;
2546	}
2547
2548	/ Sometimes, we need to store a pointer into the mark. It is quite rare*
2549	* so we do not bother to make MD_MARK use union, and it can only happen
2550	* for dummy marks. */
2551	static inline void
2552	md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2553	{
2554	MD_MARK* mark = &ctx->marks[mark_index];
2555	MD_ASSERT(mark->ch == `'D'`);
2556
2557	/ Check only members beg and end are misused for this. /
2558	MD_ASSERT(sizeof(void) <= `2` sizeof(OFF));
2559	memcpy(dest: mark, src: &ptr, n: sizeof(void*));
2560	}
2561
2562	static inline void*
2563	md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2564	{
2565	void* ptr;
2566	MD_MARK* mark = &ctx->marks[mark_index];
2567	MD_ASSERT(mark->ch == `'D'`);
2568	memcpy(dest: &ptr, src: mark, n: sizeof(void*));
2569	return ptr;
2570	}
2571
2572	static void
2573	md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2574	{
2575	MD_MARK* opener = &ctx->marks[opener_index];
2576	MD_MARK* closer = &ctx->marks[closer_index];
2577
2578	/ Remove opener from the list of openers. /
2579	if(chain != NULL) {
2580	if(opener->prev >= `0`)
2581	ctx->marks[opener->prev].next = opener->next;
2582	else
2583	chain->head = opener->next;
2584
2585	if(opener->next >= `0`)
2586	ctx->marks[opener->next].prev = opener->prev;
2587	else
2588	chain->tail = opener->prev;
2589	}
2590
2591	/ Interconnect opener and closer and mark both as resolved. /
2592	opener->next = closer_index;
2593	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
2594	closer->prev = opener_index;
2595	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
2596	}
2597
2598
2599	#define MD_ROLLBACK_ALL 0
2600	#define MD_ROLLBACK_CROSSING 1
2601
2602	/ In the range ctx->marks[opener_index] ... [closer_index], undo some or all*
2603	* resolvings accordingly to these rules:
2604	*
2605	* (1) All openers BEFORE the range corresponding to any closer inside the
2606	* range are un-resolved and they are re-added to their respective chains
2607	* of unresolved openers. This ensures we can reuse the opener for closers
2608	* AFTER the range.
2609	*
2610	* (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2611	* are discarded.
2612	*
2613	* (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2614	* in (1) are discarded. I.e. pairs of openers and closers which are both
2615	* inside the range are retained as well as any unpaired marks.
2616	*/
2617	static void
2618	md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2619	{
2620	int i;
2621	int mark_index;
2622
2623	/ Cut all unresolved openers at the mark index. /
2624	for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+`1`; i++) {
2625	MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2626
2627	while(chain->tail >= opener_index)
2628	chain->tail = ctx->marks[chain->tail].prev;
2629
2630	if(chain->tail >= `0`)
2631	ctx->marks[chain->tail].next = -`1`;
2632	else
2633	chain->head = -`1`;
2634	}
2635
2636	/ Go backwards so that unresolved openers are re-added into their*
2637	* respective chains, in the right order. */
2638	mark_index = closer_index - `1`;
2639	while(mark_index > opener_index) {
2640	MD_MARK* mark = &ctx->marks[mark_index];
2641	int mark_flags = mark->flags;
2642	int discard_flag = (how == MD_ROLLBACK_ALL);
2643
2644	if(mark->flags & MD_MARK_CLOSER) {
2645	int mark_opener_index = mark->prev;
2646
2647	/ Undo opener BEFORE the range. /
2648	if(mark_opener_index < opener_index) {
2649	MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2650	MD_MARKCHAIN* chain;
2651
2652	mark_opener->flags &= ~(MD_MARK_OPENER \| MD_MARK_CLOSER \| MD_MARK_RESOLVED);
2653	chain = md_mark_chain(ctx, mark_index: opener_index);
2654	if(chain != NULL) {
2655	md_mark_chain_append(ctx, chain, mark_index: mark_opener_index);
2656	discard_flag = `1`;
2657	}
2658	}
2659	}
2660
2661	/ And reset our flags. /
2662	if(discard_flag)
2663	mark->flags &= ~(MD_MARK_OPENER \| MD_MARK_CLOSER \| MD_MARK_RESOLVED);
2664
2665	/ Jump as far as we can over unresolved or non-interesting marks. /
2666	switch(how) {
2667	case MD_ROLLBACK_CROSSING:
2668	if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) {
2669	/ If we are closer with opener INSIDE the range, there may*
2670	* not be any other crosser inside the subrange. */
2671	mark_index = mark->prev;
2672	break;
2673	}
2674	/ Pass through. /
2675	default:
2676	mark_index--;
2677	break;
2678	}
2679	}
2680	}
2681
2682	static void
2683	md_build_mark_char_map(MD_CTX* ctx)
2684	{
2685	memset(s: ctx->mark_char_map, c: `0`, n: sizeof(ctx->mark_char_map));
2686
2687	ctx->mark_char_map[`'\\'`] = `1`;
2688	ctx->mark_char_map[`'*'`] = `1`;
2689	ctx->mark_char_map[`'_'`] = `1`;
2690	ctx->mark_char_map['`'] = `1`;
2691	ctx->mark_char_map[`'&'`] = `1`;
2692	ctx->mark_char_map[`';'`] = `1`;
2693	ctx->mark_char_map[`'<'`] = `1`;
2694	ctx->mark_char_map[`'>'`] = `1`;
2695	ctx->mark_char_map[`'['`] = `1`;
2696	ctx->mark_char_map[`'!'`] = `1`;
2697	ctx->mark_char_map[`']'`] = `1`;
2698	ctx->mark_char_map[`'\0'`] = `1`;
2699
2700	if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2701	ctx->mark_char_map[`'~'`] = `1`;
2702
2703	if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2704	ctx->mark_char_map[`'$'`] = `1`;
2705
2706	if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2707	ctx->mark_char_map[`'@'`] = `1`;
2708
2709	if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2710	ctx->mark_char_map[`':'`] = `1`;
2711
2712	if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2713	ctx->mark_char_map[`'.'`] = `1`;
2714
2715	if((ctx->parser.flags & MD_FLAG_TABLES) \|\| (ctx->parser.flags & MD_FLAG_WIKILINKS))
2716	ctx->mark_char_map[`'\|'`] = `1`;
2717
2718	if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2719	int i;
2720
2721	for(i = `0`; i < (int) sizeof(ctx->mark_char_map); i++) {
2722	if(ISWHITESPACE_(i))
2723	ctx->mark_char_map[i] = `1`;
2724	}
2725	}
2726	}
2727
2728	/ We limit code span marks to lower than 32 backticks. This solves the*
2729	* pathologic case of too many openers, each of different length: Their
2730	* resolving would be then O(n^2). */
2731	#define CODESPAN_MARK_MAXLEN 32
2732
2733	static int
2734	md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2735	OFF* p_opener_beg, OFF* p_opener_end,
2736	OFF* p_closer_beg, OFF* p_closer_end,
2737	OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2738	int* p_reached_paragraph_end)
2739	{
2740	OFF opener_beg = beg;
2741	OFF opener_end;
2742	OFF closer_beg;
2743	OFF closer_end;
2744	SZ mark_len;
2745	OFF line_end;
2746	int has_space_after_opener = FALSE;
2747	int has_eol_after_opener = FALSE;
2748	int has_space_before_closer = FALSE;
2749	int has_eol_before_closer = FALSE;
2750	int has_only_space = TRUE;
2751	int line_index = `0`;
2752
2753	line_end = lines[`0`].end;
2754	opener_end = opener_beg;
2755	while(opener_end < line_end && CH(opener_end) == _T('`'))
2756	opener_end++;
2757	has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(`' '`));
2758	has_eol_after_opener = (opener_end == line_end);
2759
2760	/ The caller needs to know end of the opening mark even if we fail. /
2761	*p_opener_end = opener_end;
2762
2763	mark_len = opener_end - opener_beg;
2764	if(mark_len > CODESPAN_MARK_MAXLEN)
2765	return FALSE;
2766
2767	/ Check whether we already know there is no closer of this length.*
2768	* If so, re-scan does no sense. This fixes issue #59. */
2769	if(last_potential_closers[mark_len-`1`] >= lines[n_lines-`1`].end \|\|
2770	(*p_reached_paragraph_end && last_potential_closers[mark_len-`1`] < opener_end))
2771	return FALSE;
2772
2773	closer_beg = opener_end;
2774	closer_end = opener_end;
2775
2776	/ Find closer mark. /
2777	while(TRUE) {
2778	while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2779	if(CH(closer_beg) != _T(`' '`))
2780	has_only_space = FALSE;
2781	closer_beg++;
2782	}
2783	closer_end = closer_beg;
2784	while(closer_end < line_end && CH(closer_end) == _T('`'))
2785	closer_end++;
2786
2787	if(closer_end - closer_beg == mark_len) {
2788	/ Success. /
2789	has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-`1`) == _T(`' '`));
2790	has_eol_before_closer = (closer_beg == lines[line_index].beg);
2791	break;
2792	}
2793
2794	if(closer_end - closer_beg > `0`) {
2795	/ We have found a back-tick which is not part of the closer. /
2796	has_only_space = FALSE;
2797
2798	/ But if we eventually fail, remember it as a potential closer*
2799	* of its own length for future attempts. This mitigates needs for
2800	* rescans. */
2801	if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2802	if(closer_beg > last_potential_closers[closer_end - closer_beg - `1`])
2803	last_potential_closers[closer_end - closer_beg - `1`] = closer_beg;
2804	}
2805	}
2806
2807	if(closer_end >= line_end) {
2808	line_index++;
2809	if(line_index >= n_lines) {
2810	/ Reached end of the paragraph and still nothing. /
2811	*p_reached_paragraph_end = TRUE;
2812	return FALSE;
2813	}
2814	/ Try on the next line. /
2815	line_end = lines[line_index].end;
2816	closer_beg = lines[line_index].beg;
2817	} else {
2818	closer_beg = closer_end;
2819	}
2820	}
2821
2822	/ If there is a space or a new line both after and before the opener*
2823	* (and if the code span is not made of spaces only), consume one initial
2824	* and one trailing space as part of the marks. */
2825	if(!has_only_space &&
2826	(has_space_after_opener \|\| has_eol_after_opener) &&
2827	(has_space_before_closer \|\| has_eol_before_closer))
2828	{
2829	if(has_space_after_opener)
2830	opener_end++;
2831	else
2832	opener_end = lines[`1`].beg;
2833
2834	if(has_space_before_closer)
2835	closer_beg--;
2836	else {
2837	closer_beg = lines[line_index-`1`].end;
2838	/ We need to eat the preceding "\r\n" but not any line trailing*
2839	* spaces. */
2840	while(closer_beg < ctx->size && ISBLANK(closer_beg))
2841	closer_beg++;
2842	}
2843	}
2844
2845	*p_opener_beg = opener_beg;
2846	*p_opener_end = opener_end;
2847	*p_closer_beg = closer_beg;
2848	*p_closer_end = closer_end;
2849	return TRUE;
2850	}
2851
2852	static int
2853	md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2854	{
2855	OFF off = beg+`1`;
2856
2857	MD_ASSERT(CH(beg) == _T(`'<'`));
2858
2859	/ Check for scheme. /
2860	if(off >= max_end \|\| !ISASCII(off))
2861	return FALSE;
2862	off++;
2863	while(`1`) {
2864	if(off >= max_end)
2865	return FALSE;
2866	if(off - beg > `32`)
2867	return FALSE;
2868	if(CH(off) == _T(`':'`) && off - beg >= `3`)
2869	break;
2870	if(!ISALNUM(off) && CH(off) != _T(`'+'`) && CH(off) != _T(`'-'`) && CH(off) != _T(`'.'`))
2871	return FALSE;
2872	off++;
2873	}
2874
2875	/ Check the path after the scheme. /
2876	while(off < max_end && CH(off) != _T(`'>'`)) {
2877	if(ISWHITESPACE(off) \|\| ISCNTRL(off) \|\| CH(off) == _T(`'<'`))
2878	return FALSE;
2879	off++;
2880	}
2881
2882	if(off >= max_end)
2883	return FALSE;
2884
2885	MD_ASSERT(CH(off) == _T(`'>'`));
2886	*p_end = off+`1`;
2887	return TRUE;
2888	}
2889
2890	static int
2891	md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2892	{
2893	OFF off = beg + `1`;
2894	int label_len;
2895
2896	MD_ASSERT(CH(beg) == _T(`'<'`));
2897
2898	/ The code should correspond to this regexp:*
2899	/^[a-zA-Z0-9.!#$%&'+\/=?^_`{\|}~-]+*
2900	@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2901	(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)$/*
2902	*/
2903
2904	/ Username (before '@'). /
2905	while(off < max_end && (ISALNUM(off) \|\| ISANYOF(off, _T(".!#$%&'*+/=?^_`{\|}~-"))))
2906	off++;
2907	if(off <= beg+`1`)
2908	return FALSE;
2909
2910	/ '@' /
2911	if(off >= max_end \|\| CH(off) != _T(`'@'`))
2912	return FALSE;
2913	off++;
2914
2915	/ Labels delimited with '.'; each label is sequence of 1 - 63 alnum*
2916	* characters or '-', but '-' is not allowed as first or last char. */
2917	label_len = `0`;
2918	while(off < max_end) {
2919	if(ISALNUM(off))
2920	label_len++;
2921	else if(CH(off) == _T(`'-'`) && label_len > `0`)
2922	label_len++;
2923	else if(CH(off) == _T(`'.'`) && label_len > `0` && CH(off-`1`) != _T(`'-'`))
2924	label_len = `0`;
2925	else
2926	break;
2927
2928	if(label_len > `63`)
2929	return FALSE;
2930
2931	off++;
2932	}
2933
2934	if(label_len <= `0` \|\| off >= max_end \|\| CH(off) != _T(`'>'`) \|\| CH(off-`1`) == _T(`'-'`))
2935	return FALSE;
2936
2937	*p_end = off+`1`;
2938	return TRUE;
2939	}
2940
2941	static int
2942	md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2943	{
2944	if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2945	*p_missing_mailto = FALSE;
2946	return TRUE;
2947	}
2948
2949	if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2950	*p_missing_mailto = TRUE;
2951	return TRUE;
2952	}
2953
2954	return FALSE;
2955	}
2956
2957	static int
2958	md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2959	{
2960	int i;
2961	int ret = `0`;
2962	MD_MARK* mark;
2963	OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { `0` };
2964	int codespan_scanned_till_paragraph_end = FALSE;
2965
2966	for(i = `0`; i < n_lines; i++) {
2967	const MD_LINE* line = &lines[i];
2968	OFF off = line->beg;
2969	OFF line_end = line->end;
2970
2971	while(TRUE) {
2972	CHAR ch;
2973
2974	#ifdef MD4C_USE_UTF16
2975	/ For UTF-16, mark_char_map[] covers only ASCII. /
2976	#define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
2977	(ctx->mark_char_map[(unsigned char) CH(off)]))
2978	#else
2979	/ For 8-bit encodings, mark_char_map[] covers all 256 elements. /
2980	#define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
2981	#endif
2982
2983	/ Optimization: Use some loop unrolling. /
2984	while(off + `3` < line_end && !IS_MARK_CHAR(off+`0`) && !IS_MARK_CHAR(off+`1`)
2985	&& !IS_MARK_CHAR(off+`2`) && !IS_MARK_CHAR(off+`3`))
2986	off += `4`;
2987	while(off < line_end && !IS_MARK_CHAR(off+`0`))
2988	off++;
2989
2990	if(off >= line_end)
2991	break;
2992
2993	ch = CH(off);
2994
2995	/ A backslash escape.*
2996	* It can go beyond line->end as it may involve escaped new
2997	* line to form a hard break. */
2998	if(ch == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
2999	/ Hard-break cannot be on the last line of the block. /
3000	if(!ISNEWLINE(off+`1`) \|\| i+`1` < n_lines)
3001	PUSH_MARK(ch, off, off+`2`, MD_MARK_RESOLVED);
3002	off += `2`;
3003	continue;
3004	}
3005
3006	/ A potential (string) emphasis start/end. /
3007	if(ch == _T(`'*'`) \|\| ch == _T(`'_'`)) {
3008	OFF tmp = off+`1`;
3009	int left_level; / What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. /
3010	int right_level; / What follows: 0 = whitespace; 1 = punctuation; 2 = other char. /
3011
3012	while(tmp < line_end && CH(tmp) == ch)
3013	tmp++;
3014
3015	if(off == line->beg \|\| ISUNICODEWHITESPACEBEFORE(off))
3016	left_level = `0`;
3017	else if(ISUNICODEPUNCTBEFORE(off))
3018	left_level = `1`;
3019	else
3020	left_level = `2`;
3021
3022	if(tmp == line_end \|\| ISUNICODEWHITESPACE(tmp))
3023	right_level = `0`;
3024	else if(ISUNICODEPUNCT(tmp))
3025	right_level = `1`;
3026	else
3027	right_level = `2`;
3028
3029	/ Intra-word underscore doesn't have special meaning. /
3030	if(ch == _T(`'_'`) && left_level == `2` && right_level == `2`) {
3031	left_level = `0`;
3032	right_level = `0`;
3033	}
3034
3035	if(left_level != `0` \|\| right_level != `0`) {
3036	unsigned flags = `0`;
3037
3038	if(left_level > `0` && left_level >= right_level)
3039	flags \|= MD_MARK_POTENTIAL_CLOSER;
3040	if(right_level > `0` && right_level >= left_level)
3041	flags \|= MD_MARK_POTENTIAL_OPENER;
3042	if(left_level == `2` && right_level == `2`)
3043	flags \|= MD_MARK_EMPH_INTRAWORD;
3044
3045	/ For "the rule of three" we need to remember the original*
3046	* size of the mark (modulo three), before we potentially
3047	* split the mark when being later resolved partially by some
3048	* shorter closer. */
3049	switch((tmp - off) % `3`) {
3050	case `0`: flags \|= MD_MARK_EMPH_MOD3_0; break;
3051	case `1`: flags \|= MD_MARK_EMPH_MOD3_1; break;
3052	case `2`: flags \|= MD_MARK_EMPH_MOD3_2; break;
3053	}
3054
3055	PUSH_MARK(ch, off, tmp, flags);
3056
3057	/ During resolving, multiple asterisks may have to be*
3058	* split into independent span start/ends. Consider e.g.
3059	* "*foo bar*". Therefore we push also some empty dummy
3060	* marks to have enough space for that. */
3061	off++;
3062	while(off < tmp) {
3063	PUSH_MARK(`'D'`, off, off, `0`);
3064	off++;
3065	}
3066	continue;
3067	}
3068
3069	off = tmp;
3070	continue;
3071	}
3072
3073	/ A potential code span start/end. /
3074	if(ch == _T('`')) {
3075	OFF opener_beg, opener_end;
3076	OFF closer_beg, closer_end;
3077	int is_code_span;
3078
3079	is_code_span = md_is_code_span(ctx, lines: lines + i, n_lines: n_lines - i, beg: off,
3080	p_opener_beg: &opener_beg, p_opener_end: &opener_end, p_closer_beg: &closer_beg, p_closer_end: &closer_end,
3081	last_potential_closers: codespan_last_potential_closers,
3082	p_reached_paragraph_end: &codespan_scanned_till_paragraph_end);
3083	if(is_code_span) {
3084	PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER \| MD_MARK_RESOLVED);
3085	PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER \| MD_MARK_RESOLVED);
3086	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3087	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3088
3089	off = closer_end;
3090
3091	/ Advance the current line accordingly. /
3092	while(off > line_end) {
3093	i++;
3094	line++;
3095	line_end = line->end;
3096	}
3097	continue;
3098	}
3099
3100	off = opener_end;
3101	continue;
3102	}
3103
3104	/ A potential entity start. /
3105	if(ch == _T(`'&'`)) {
3106	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3107	off++;
3108	continue;
3109	}
3110
3111	/ A potential entity end. /
3112	if(ch == _T(`';'`)) {
3113	/ We surely cannot be entity unless the previous mark is '&'. /
3114	if(ctx->n_marks > `0` && ctx->marks[ctx->n_marks-`1`].ch == _T(`'&'`))
3115	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3116
3117	off++;
3118	continue;
3119	}
3120
3121	/ A potential autolink or raw HTML start/end. /
3122	if(ch == _T(`'<'`)) {
3123	int is_autolink;
3124	OFF autolink_end;
3125	int missing_mailto;
3126
3127	if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3128	int is_html;
3129	OFF html_end;
3130
3131	/ Given the nature of the raw HTML, we have to recognize*
3132	* it here. Doing so later in md_analyze_lt_gt() could
3133	* open can of worms of quadratic complexity. */
3134	is_html = md_is_html_any(ctx, lines: lines + i, n_lines: n_lines - i, beg: off,
3135	max_end: lines[n_lines-`1`].end, p_end: &html_end);
3136	if(is_html) {
3137	PUSH_MARK(_T(`'<'`), off, off, MD_MARK_OPENER \| MD_MARK_RESOLVED);
3138	PUSH_MARK(_T(`'>'`), html_end, html_end, MD_MARK_CLOSER \| MD_MARK_RESOLVED);
3139	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3140	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3141	off = html_end;
3142
3143	/ Advance the current line accordingly. /
3144	while(off > line_end) {
3145	i++;
3146	line++;
3147	line_end = line->end;
3148	}
3149	continue;
3150	}
3151	}
3152
3153	is_autolink = md_is_autolink(ctx, beg: off, max_end: lines[n_lines-`1`].end,
3154	p_end: &autolink_end, p_missing_mailto: &missing_mailto);
3155	if(is_autolink) {
3156	PUSH_MARK((missing_mailto ? _T(`'@'`) : _T(`'<'`)), off, off+`1`,
3157	MD_MARK_OPENER \| MD_MARK_RESOLVED \| MD_MARK_AUTOLINK);
3158	PUSH_MARK(_T(`'>'`), autolink_end-`1`, autolink_end,
3159	MD_MARK_CLOSER \| MD_MARK_RESOLVED \| MD_MARK_AUTOLINK);
3160	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3161	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3162	off = autolink_end;
3163	continue;
3164	}
3165
3166	off++;
3167	continue;
3168	}
3169
3170	/ A potential link or its part. /
3171	if(ch == _T(`'['`) \|\| (ch == _T(`'!'`) && off+`1` < line_end && CH(off+`1`) == _T(`'['`))) {
3172	OFF tmp = (ch == _T(`'['`) ? off+`1` : off+`2`);
3173	PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3174	off = tmp;
3175	/ Two dummies to make enough place for data we need if it is*
3176	* a link. */
3177	PUSH_MARK(`'D'`, off, off, `0`);
3178	PUSH_MARK(`'D'`, off, off, `0`);
3179	continue;
3180	}
3181	if(ch == _T(`']'`)) {
3182	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3183	off++;
3184	continue;
3185	}
3186
3187	/ A potential permissive e-mail autolink. /
3188	if(ch == _T(`'@'`)) {
3189	if(line->beg + `1` <= off && ISALNUM(off-`1`) &&
3190	off + `3` < line->end && ISALNUM(off+`1`))
3191	{
3192	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3193	/ Push a dummy as a reserve for a closer. /
3194	PUSH_MARK(`'D'`, off, off, `0`);
3195	}
3196
3197	off++;
3198	continue;
3199	}
3200
3201	/ A potential permissive URL autolink. /
3202	if(ch == _T(`':'`)) {
3203	static struct {
3204	const CHAR* scheme;
3205	SZ scheme_size;
3206	const CHAR* suffix;
3207	SZ suffix_size;
3208	} scheme_map[] = {
3209	/ In the order from the most frequently used, arguably. /
3210	{ _T("http"), `4`, _T("//"), `2` },
3211	{ _T("https"), `5`, _T("//"), `2` },
3212	{ _T("ftp"), `3`, _T("//"), `2` }
3213	};
3214	int scheme_index;
3215
3216	for(scheme_index = `0`; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3217	const CHAR* scheme = scheme_map[scheme_index].scheme;
3218	const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3219	const CHAR* suffix = scheme_map[scheme_index].suffix;
3220	const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3221
3222	if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), s2: scheme, n: scheme_size) &&
3223	(line->beg + scheme_size == off \|\| ISWHITESPACE(off-scheme_size-`1`) \|\| ISANYOF(off-scheme_size-`1`, _T("*_~(["))) &&
3224	off + `1` + suffix_size < line->end && md_ascii_eq(STR(off+`1`), s2: suffix, n: suffix_size))
3225	{
3226	PUSH_MARK(ch, off-scheme_size, off+`1`+suffix_size, MD_MARK_POTENTIAL_OPENER);
3227	/ Push a dummy as a reserve for a closer. /
3228	PUSH_MARK(`'D'`, off, off, `0`);
3229	off += `1` + suffix_size;
3230	continue;
3231	}
3232	}
3233
3234	off++;
3235	continue;
3236	}
3237
3238	/ A potential permissive WWW autolink. /
3239	if(ch == _T(`'.'`)) {
3240	if(line->beg + `3` <= off && md_ascii_eq(STR(off-`3`), _T("www"), n: `3`) &&
3241	(line->beg + `3` == off \|\| ISWHITESPACE(off-`4`) \|\| ISANYOF(off-`4`, _T("*_~(["))) &&
3242	off + `1` < line_end)
3243	{
3244	PUSH_MARK(ch, off-`3`, off+`1`, MD_MARK_POTENTIAL_OPENER);
3245	/ Push a dummy as a reserve for a closer. /
3246	PUSH_MARK(`'D'`, off, off, `0`);
3247	off++;
3248	continue;
3249	}
3250
3251	off++;
3252	continue;
3253	}
3254
3255	/ A potential table cell boundary or wiki link label delimiter. /
3256	if((table_mode \|\| ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T(`'\|'`)) {
3257	PUSH_MARK(ch, off, off+`1`, `0`);
3258	off++;
3259	continue;
3260	}
3261
3262	/ A potential strikethrough start/end. /
3263	if(ch == _T(`'~'`)) {
3264	OFF tmp = off+`1`;
3265
3266	while(tmp < line_end && CH(tmp) == _T(`'~'`))
3267	tmp++;
3268
3269	if(tmp - off < `3`) {
3270	unsigned flags = `0`;
3271
3272	if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
3273	flags \|= MD_MARK_POTENTIAL_OPENER;
3274	if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
3275	flags \|= MD_MARK_POTENTIAL_CLOSER;
3276	if(flags != `0`)
3277	PUSH_MARK(ch, off, tmp, flags);
3278	}
3279
3280	off = tmp;
3281	continue;
3282	}
3283
3284	/ A potential equation start/end /
3285	if(ch == _T(`'$'`)) {
3286	/ We can have at most two consecutive $ signs,*
3287	* where two dollar signs signify a display equation. */
3288	OFF tmp = off+`1`;
3289
3290	while(tmp < line_end && CH(tmp) == _T(`'$'`))
3291	tmp++;
3292
3293	if (tmp - off <= `2`)
3294	PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER \| MD_MARK_POTENTIAL_CLOSER);
3295	off = tmp;
3296	continue;
3297	}
3298
3299	/ Turn non-trivial whitespace into single space. /
3300	if(ISWHITESPACE_(ch)) {
3301	OFF tmp = off+`1`;
3302
3303	while(tmp < line_end && ISWHITESPACE(tmp))
3304	tmp++;
3305
3306	if(tmp - off > `1` \|\| ch != _T(`' '`))
3307	PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3308
3309	off = tmp;
3310	continue;
3311	}
3312
3313	/ NULL character. /
3314	if(ch == _T(`'\0'`)) {
3315	PUSH_MARK(ch, off, off+`1`, MD_MARK_RESOLVED);
3316	off++;
3317	continue;
3318	}
3319
3320	off++;
3321	}
3322	}
3323
3324	/ Add a dummy mark at the end of the mark vector to simplify*
3325	* process_inlines(). */
3326	PUSH_MARK(`127`, ctx->size, ctx->size, MD_MARK_RESOLVED);
3327
3328	abort:
3329	return ret;
3330	}
3331
3332	static void
3333	md_analyze_bracket(MD_CTX* ctx, int mark_index)
3334	{
3335	/ We cannot really resolve links here as for that we would need*
3336	* more context. E.g. a following pair of brackets (reference link),
3337	* or enclosing pair of brackets (if the inner is the link, the outer
3338	* one cannot be.)
3339	*
3340	* Therefore we here only construct a list of resolved '[' ']' pairs
3341	* ordered by position of the closer. This allows ur to analyze what is
3342	* or is not link in the right order, from inside to outside in case
3343	* of nested brackets.
3344	*
3345	* The resolving itself is deferred into md_resolve_links().
3346	*/
3347
3348	MD_MARK* mark = &ctx->marks[mark_index];
3349
3350	if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3351	md_mark_chain_append(ctx, chain: &BRACKET_OPENERS, mark_index);
3352	return;
3353	}
3354
3355	if(BRACKET_OPENERS.tail >= `0`) {
3356	/ Pop the opener from the chain. /
3357	int opener_index = BRACKET_OPENERS.tail;
3358	MD_MARK* opener = &ctx->marks[opener_index];
3359	if(opener->prev >= `0`)
3360	ctx->marks[opener->prev].next = -`1`;
3361	else
3362	BRACKET_OPENERS.head = -`1`;
3363	BRACKET_OPENERS.tail = opener->prev;
3364
3365	/ Interconnect the opener and closer. /
3366	opener->next = mark_index;
3367	mark->prev = opener_index;
3368
3369	/ Add the pair into chain of potential links for md_resolve_links().*
3370	* Note we misuse opener->prev for this as opener->next points to its
3371	* closer. */
3372	if(ctx->unresolved_link_tail >= `0`)
3373	ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3374	else
3375	ctx->unresolved_link_head = opener_index;
3376	ctx->unresolved_link_tail = opener_index;
3377	opener->prev = -`1`;
3378	}
3379	}
3380
3381	/ Forward declaration. /
3382	static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3383	int mark_beg, int mark_end);
3384
3385	static int
3386	md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3387	{
3388	int opener_index = ctx->unresolved_link_head;
3389	OFF last_link_beg = `0`;
3390	OFF last_link_end = `0`;
3391	OFF last_img_beg = `0`;
3392	OFF last_img_end = `0`;
3393
3394	while(opener_index >= `0`) {
3395	MD_MARK* opener = &ctx->marks[opener_index];
3396	int closer_index = opener->next;
3397	MD_MARK* closer = &ctx->marks[closer_index];
3398	int next_index = opener->prev;
3399	MD_MARK* next_opener;
3400	MD_MARK* next_closer;
3401	MD_LINK_ATTR attr;
3402	int is_link = FALSE;
3403
3404	if(next_index >= `0`) {
3405	next_opener = &ctx->marks[next_index];
3406	next_closer = &ctx->marks[next_opener->next];
3407	} else {
3408	next_opener = NULL;
3409	next_closer = NULL;
3410	}
3411
3412	/ If nested ("[ [ ] ]"), we need to make sure that:*
3413	* - The outer does not end inside of (...) belonging to the inner.
3414	* - The outer cannot be link if the inner is link (i.e. not image).
3415	*
3416	* (Note we here analyze from inner to outer as the marks are ordered
3417	* by closer->beg.)
3418	*/
3419	if((opener->beg < last_link_beg && closer->end < last_link_end) \|\|
3420	(opener->beg < last_img_beg && closer->end < last_img_end) \|\|
3421	(opener->beg < last_link_end && opener->ch == `'['`))
3422	{
3423	opener_index = next_index;
3424	continue;
3425	}
3426
3427	/ Recognize and resolve wiki links.*
3428	* Wiki-links maybe '[[destination]]' or '[[destination\|label]]'.
3429	*/
3430	if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3431	(opener->end - opener->beg == `1`) && / not image /
3432	next_opener != NULL && / double '[' opener /
3433	next_opener->ch == `'['` &&
3434	(next_opener->beg == opener->beg - `1`) &&
3435	(next_opener->end - next_opener->beg == `1`) &&
3436	next_closer != NULL && / double ']' closer /
3437	next_closer->ch == `']'` &&
3438	(next_closer->beg == closer->beg + `1`) &&
3439	(next_closer->end - next_closer->beg == `1`))
3440	{
3441	MD_MARK* delim = NULL;
3442	int delim_index;
3443	OFF dest_beg, dest_end;
3444
3445	is_link = TRUE;
3446
3447	/ We don't allow destination to be longer than 100 characters.*
3448	* Lets scan to see whether there is '\|'. (If not then the whole
3449	* wiki-link has to be below the 100 characters.) */
3450	delim_index = opener_index + `1`;
3451	while(delim_index < closer_index) {
3452	MD_MARK* m = &ctx->marks[delim_index];
3453	if(m->ch == `'\|'`) {
3454	delim = m;
3455	break;
3456	}
3457	if(m->ch != `'D'` && m->beg - opener->end > `100`)
3458	break;
3459	delim_index++;
3460	}
3461	dest_beg = opener->end;
3462	dest_end = (delim != NULL) ? delim->beg : closer->beg;
3463	if(dest_end - dest_beg == `0` \|\| dest_end - dest_beg > `100`)
3464	is_link = FALSE;
3465
3466	/ There may not be any new line in the destination. /
3467	if(is_link) {
3468	OFF off;
3469	for(off = dest_beg; off < dest_end; off++) {
3470	if(ISNEWLINE(off)) {
3471	is_link = FALSE;
3472	break;
3473	}
3474	}
3475	}
3476
3477	if(is_link) {
3478	if(delim != NULL) {
3479	if(delim->end < closer->beg) {
3480	opener->end = delim->beg;
3481	} else {
3482	/ The pipe is just before the closer: [[foo\|]] /
3483	closer->beg = delim->beg;
3484	delim = NULL;
3485	}
3486	}
3487
3488	opener->beg = next_opener->beg;
3489	opener->next = closer_index;
3490	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3491
3492	closer->end = next_closer->end;
3493	closer->prev = opener_index;
3494	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3495
3496	last_link_beg = opener->beg;
3497	last_link_end = closer->end;
3498
3499	if(delim != NULL) {
3500	delim->flags \|= MD_MARK_RESOLVED;
3501	md_rollback(ctx, opener_index, closer_index: delim_index, MD_ROLLBACK_ALL);
3502	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: opener_index+`1`, mark_end: closer_index);
3503	} else {
3504	md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3505	}
3506
3507	opener_index = next_opener->prev;
3508	continue;
3509	}
3510	}
3511
3512	if(next_opener != NULL && next_opener->beg == closer->end) {
3513	if(next_closer->beg > closer->end + `1`) {
3514	/ Might be full reference link. /
3515	is_link = md_is_link_reference(ctx, lines, n_lines, beg: next_opener->beg, end: next_closer->end, attr: &attr);
3516	} else {
3517	/ Might be shortcut reference link. /
3518	is_link = md_is_link_reference(ctx, lines, n_lines, beg: opener->beg, end: closer->end, attr: &attr);
3519	}
3520
3521	if(is_link < `0`)
3522	return -`1`;
3523
3524	if(is_link) {
3525	/ Eat the 2nd "[...]". /
3526	closer->end = next_closer->end;
3527	}
3528	} else {
3529	if(closer->end < ctx->size && CH(closer->end) == _T(`'('`)) {
3530	/ Might be inline link. /
3531	OFF inline_link_end = UINT_MAX;
3532
3533	is_link = md_is_inline_link_spec(ctx, lines, n_lines, beg: closer->end, p_end: &inline_link_end, attr: &attr);
3534	if(is_link < `0`)
3535	return -`1`;
3536
3537	/ Check the closing ')' is not inside an already resolved range*
3538	* (i.e. a range with a higher priority), e.g. a code span. */
3539	if(is_link) {
3540	int i = closer_index + `1`;
3541
3542	while(i < ctx->n_marks) {
3543	MD_MARK* mark = &ctx->marks[i];
3544
3545	if(mark->beg >= inline_link_end)
3546	break;
3547	if((mark->flags & (MD_MARK_OPENER \| MD_MARK_RESOLVED)) == (MD_MARK_OPENER \| MD_MARK_RESOLVED)) {
3548	if(ctx->marks[mark->next].beg >= inline_link_end) {
3549	/ Cancel the link status. /
3550	if(attr.title_needs_free)
3551	free(ptr: attr.title);
3552	is_link = FALSE;
3553	break;
3554	}
3555
3556	i = mark->next + `1`;
3557	} else {
3558	i++;
3559	}
3560	}
3561	}
3562
3563	if(is_link) {
3564	/ Eat the "(...)" /
3565	closer->end = inline_link_end;
3566	}
3567	}
3568
3569	if(!is_link) {
3570	/ Might be collapsed reference link. /
3571	is_link = md_is_link_reference(ctx, lines, n_lines, beg: opener->beg, end: closer->end, attr: &attr);
3572	if(is_link < `0`)
3573	return -`1`;
3574	}
3575	}
3576
3577	if(is_link) {
3578	/ Resolve the brackets as a link. /
3579	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3580	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3581
3582	/ If it is a link, we store the destination and title in the two*
3583	* dummy marks after the opener. */
3584	MD_ASSERT(ctx->marks[opener_index+`1`].ch == `'D'`);
3585	ctx->marks[opener_index+`1`].beg = attr.dest_beg;
3586	ctx->marks[opener_index+`1`].end = attr.dest_end;
3587
3588	MD_ASSERT(ctx->marks[opener_index+`2`].ch == `'D'`);
3589	md_mark_store_ptr(ctx, mark_index: opener_index+`2`, ptr: attr.title);
3590	/ The title might or might not have been allocated for us. /
3591	if(attr.title_needs_free)
3592	md_mark_chain_append(ctx, chain: &PTR_CHAIN, mark_index: opener_index+`2`);
3593	ctx->marks[opener_index+`2`].prev = attr.title_size;
3594
3595	if(opener->ch == `'['`) {
3596	last_link_beg = opener->beg;
3597	last_link_end = closer->end;
3598	} else {
3599	last_img_beg = opener->beg;
3600	last_img_end = closer->end;
3601	}
3602
3603	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: opener_index+`1`, mark_end: closer_index);
3604
3605	/ If the link text is formed by nothing but permissive autolink,*
3606	* suppress the autolink.
3607	* See https://github.com/mity/md4c/issues/152 for more info. */
3608	if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
3609	MD_MARK* first_nested;
3610	MD_MARK* last_nested;
3611
3612	first_nested = opener + `1`;
3613	while(first_nested->ch == _T(`'D'`) && first_nested < closer)
3614	first_nested++;
3615
3616	last_nested = closer - `1`;
3617	while(first_nested->ch == _T(`'D'`) && last_nested > opener)
3618	last_nested--;
3619
3620	if((first_nested->flags & MD_MARK_RESOLVED) &&
3621	first_nested->beg == opener->end &&
3622	ISANYOF_(first_nested->ch, _T("@:.")) &&
3623	first_nested->next == (last_nested - ctx->marks) &&
3624	last_nested->end == closer->beg)
3625	{
3626	first_nested->ch = _T(`'D'`);
3627	first_nested->flags &= ~MD_MARK_RESOLVED;
3628	last_nested->ch = _T(`'D'`);
3629	last_nested->flags &= ~MD_MARK_RESOLVED;
3630	}
3631	}
3632	}
3633
3634	opener_index = next_index;
3635	}
3636
3637	return `0`;
3638	}
3639
3640	/ Analyze whether the mark '&' starts a HTML entity.*
3641	* If so, update its flags as well as flags of corresponding closer ';'. */
3642	static void
3643	md_analyze_entity(MD_CTX* ctx, int mark_index)
3644	{
3645	MD_MARK* opener = &ctx->marks[mark_index];
3646	MD_MARK* closer;
3647	OFF off;
3648
3649	/ Cannot be entity if there is no closer as the next mark.*
3650	* (Any other mark between would mean strange character which cannot be
3651	* part of the entity.
3652	*
3653	* So we can do all the work on '&' and do not call this later for the
3654	* closing mark ';'.
3655	*/
3656	if(mark_index + `1` >= ctx->n_marks)
3657	return;
3658	closer = &ctx->marks[mark_index+`1`];
3659	if(closer->ch != `';'`)
3660	return;
3661
3662	if(md_is_entity(ctx, beg: opener->beg, max_end: closer->end, p_end: &off)) {
3663	MD_ASSERT(off == closer->end);
3664
3665	md_resolve_range(ctx, NULL, opener_index: mark_index, closer_index: mark_index+`1`);
3666	opener->end = closer->end;
3667	}
3668	}
3669
3670	static void
3671	md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3672	{
3673	MD_MARK* mark = &ctx->marks[mark_index];
3674	mark->flags \|= MD_MARK_RESOLVED;
3675
3676	md_mark_chain_append(ctx, chain: &TABLECELLBOUNDARIES, mark_index);
3677	ctx->n_table_cell_boundaries++;
3678	}
3679
3680	/ Split a longer mark into two. The new mark takes the given count of*
3681	* characters. May only be called if an adequate number of dummy 'D' marks
3682	* follows.
3683	*/
3684	static int
3685	md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3686	{
3687	MD_MARK* mark = &ctx->marks[mark_index];
3688	int new_mark_index = mark_index + (mark->end - mark->beg - n);
3689	MD_MARK* dummy = &ctx->marks[new_mark_index];
3690
3691	MD_ASSERT(mark->end - mark->beg > n);
3692	MD_ASSERT(dummy->ch == `'D'`);
3693
3694	memcpy(dest: dummy, src: mark, n: sizeof(MD_MARK));
3695	mark->end -= n;
3696	dummy->beg = mark->end;
3697
3698	return new_mark_index;
3699	}
3700
3701	static void
3702	md_analyze_emph(MD_CTX* ctx, int mark_index)
3703	{
3704	MD_MARK* mark = &ctx->marks[mark_index];
3705	MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3706
3707	/ If we can be a closer, try to resolve with the preceding opener. /
3708	if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3709	MD_MARK* opener = NULL;
3710	int opener_index;
3711
3712	if(mark->ch == _T(`'*'`)) {
3713	MD_MARKCHAIN* opener_chains[`6`];
3714	int i, n_opener_chains;
3715	unsigned flags = mark->flags;
3716
3717	/ Apply the "rule of three". /
3718	n_opener_chains = `0`;
3719	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
3720	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3721	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
3722	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3723	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
3724	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
3725	if(!(flags & MD_MARK_EMPH_INTRAWORD) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3726	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
3727	if(!(flags & MD_MARK_EMPH_INTRAWORD) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3728	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
3729
3730	/ Opener is the most recent mark from the allowed chains. /
3731	for(i = `0`; i < n_opener_chains; i++) {
3732	if(opener_chains[i]->tail >= `0`) {
3733	int tmp_index = opener_chains[i]->tail;
3734	MD_MARK* tmp_mark = &ctx->marks[tmp_index];
3735	if(opener == NULL \|\| tmp_mark->end > opener->end) {
3736	opener_index = tmp_index;
3737	opener = tmp_mark;
3738	}
3739	}
3740	}
3741	} else {
3742	/ Simple emph. mark /
3743	if(chain->tail >= `0`) {
3744	opener_index = chain->tail;
3745	opener = &ctx->marks[opener_index];
3746	}
3747	}
3748
3749	/ Resolve, if we have found matching opener. /
3750	if(opener != NULL) {
3751	SZ opener_size = opener->end - opener->beg;
3752	SZ closer_size = mark->end - mark->beg;
3753	MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, mark_index: opener_index);
3754
3755	if(opener_size > closer_size) {
3756	opener_index = md_split_emph_mark(ctx, mark_index: opener_index, n: closer_size);
3757	md_mark_chain_append(ctx, chain: opener_chain, mark_index: opener_index);
3758	} else if(opener_size < closer_size) {
3759	md_split_emph_mark(ctx, mark_index, n: closer_size - opener_size);
3760	}
3761
3762	md_rollback(ctx, opener_index, closer_index: mark_index, MD_ROLLBACK_CROSSING);
3763	md_resolve_range(ctx, chain: opener_chain, opener_index, closer_index: mark_index);
3764	return;
3765	}
3766	}
3767
3768	/ If we could not resolve as closer, we may be yet be an opener. /
3769	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3770	md_mark_chain_append(ctx, chain, mark_index);
3771	}
3772
3773	static void
3774	md_analyze_tilde(MD_CTX* ctx, int mark_index)
3775	{
3776	MD_MARK* mark = &ctx->marks[mark_index];
3777	MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3778
3779	/ We attempt to be Github Flavored Markdown compatible here. GFM accepts*
3780	* only tildes sequences of length 1 and 2, and the length of the opener
3781	* and closer has to match. */
3782
3783	if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= `0`) {
3784	int opener_index = chain->head;
3785
3786	md_rollback(ctx, opener_index, closer_index: mark_index, MD_ROLLBACK_CROSSING);
3787	md_resolve_range(ctx, chain, opener_index, closer_index: mark_index);
3788	return;
3789	}
3790
3791	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3792	md_mark_chain_append(ctx, chain, mark_index);
3793	}
3794
3795	static void
3796	md_analyze_dollar(MD_CTX* ctx, int mark_index)
3797	{
3798	/ This should mimic the way inline equations work in LaTeX, so there*
3799	* can only ever be one item in the chain (i.e. the dollars can't be
3800	* nested). This is basically the same as the md_analyze_tilde function,
3801	* except that we require matching openers and closers to be of the same
3802	* length.
3803	*
3804	* E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
3805	if(DOLLAR_OPENERS.head >= `0`) {
3806	/ If the potential closer has a non-matching number of $, discard /
3807	MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
3808	MD_MARK* close = &ctx->marks[mark_index];
3809
3810	int opener_index = DOLLAR_OPENERS.head;
3811	md_rollback(ctx, opener_index, closer_index: mark_index, MD_ROLLBACK_ALL);
3812	if (open->end - open->beg == close->end - close->beg) {
3813	/ We are the matching closer /
3814	md_resolve_range(ctx, chain: &DOLLAR_OPENERS, opener_index, closer_index: mark_index);
3815	} else {
3816	/ We don't match the opener, so discard old opener and insert as opener /
3817	md_mark_chain_append(ctx, chain: &DOLLAR_OPENERS, mark_index);
3818	}
3819	} else {
3820	/ No unmatched openers, so we are opener /
3821	md_mark_chain_append(ctx, chain: &DOLLAR_OPENERS, mark_index);
3822	}
3823	}
3824
3825	static void
3826	md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
3827	{
3828	MD_MARK* opener = &ctx->marks[mark_index];
3829	int closer_index = mark_index + `1`;
3830	MD_MARK* closer = &ctx->marks[closer_index];
3831	MD_MARK* next_resolved_mark;
3832	OFF off = opener->end;
3833	int n_dots = FALSE;
3834	int has_underscore_in_last_seg = FALSE;
3835	int has_underscore_in_next_to_last_seg = FALSE;
3836	int n_opened_parenthesis = `0`;
3837
3838	/ Check for domain. /
3839	while(off < ctx->size) {
3840	if(ISALNUM(off) \|\| CH(off) == _T(`'-'`)) {
3841	off++;
3842	} else if(CH(off) == _T(`'.'`)) {
3843	/ We must see at least one period. /
3844	n_dots++;
3845	has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
3846	has_underscore_in_last_seg = FALSE;
3847	off++;
3848	} else if(CH(off) == _T(`'_'`)) {
3849	/ No underscore may be present in the last two domain segments. /
3850	has_underscore_in_last_seg = TRUE;
3851	off++;
3852	} else {
3853	break;
3854	}
3855	}
3856	if(off > opener->end && CH(off-`1`) == _T(`'.'`)) {
3857	off--;
3858	n_dots--;
3859	}
3860	if(off <= opener->end \|\| n_dots == `0` \|\| has_underscore_in_next_to_last_seg \|\| has_underscore_in_last_seg)
3861	return;
3862
3863	/ Check for path. /
3864	next_resolved_mark = closer + `1`;
3865	while(next_resolved_mark->ch == `'D'` \|\| !(next_resolved_mark->flags & MD_MARK_RESOLVED))
3866	next_resolved_mark++;
3867	while(off < next_resolved_mark->beg && CH(off) != _T(`'<'`) && !ISWHITESPACE(off) && !ISNEWLINE(off)) {
3868	/ Parenthesis must be balanced. /
3869	if(CH(off) == _T(`'('`)) {
3870	n_opened_parenthesis++;
3871	} else if(CH(off) == _T(`')'`)) {
3872	if(n_opened_parenthesis > `0`)
3873	n_opened_parenthesis--;
3874	else
3875	break;
3876	}
3877
3878	off++;
3879	}
3880	/ These cannot be last char In such case they are more likely normal*
3881	* punctuation. */
3882	if(ISANYOF(off-`1`, _T("?!.,:*_~")))
3883	off--;
3884
3885	/ Ok. Lets call it auto-link. Adapt opener and create closer to zero*
3886	* length so all the contents becomes the link text. */
3887	MD_ASSERT(closer->ch == `'D'`);
3888	opener->end = opener->beg;
3889	closer->ch = opener->ch;
3890	closer->beg = off;
3891	closer->end = off;
3892	md_resolve_range(ctx, NULL, opener_index: mark_index, closer_index);
3893	}
3894
3895	/ The permissive autolinks do not have to be enclosed in '<' '>' but we*
3896	* instead impose stricter rules what is understood as an e-mail address
3897	* here. Actually any non-alphanumeric characters with exception of '.'
3898	* are prohibited both in username and after '@'. */
3899	static void
3900	md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
3901	{
3902	MD_MARK* opener = &ctx->marks[mark_index];
3903	int closer_index;
3904	MD_MARK* closer;
3905	OFF beg = opener->beg;
3906	OFF end = opener->end;
3907	int dot_count = `0`;
3908
3909	MD_ASSERT(CH(beg) == _T(`'@'`));
3910
3911	/ Scan for name before '@'. /
3912	while(beg > `0` && (ISALNUM(beg-`1`) \|\| ISANYOF(beg-`1`, _T(".-_+"))))
3913	beg--;
3914
3915	/ Scan for domain after '@'. /
3916	while(end < ctx->size && (ISALNUM(end) \|\| ISANYOF(end, _T(".-_")))) {
3917	if(CH(end) == _T(`'.'`))
3918	dot_count++;
3919	end++;
3920	}
3921	if(CH(end-`1`) == _T(`'.'`)) { / Final '.' not part of it. /
3922	dot_count--;
3923	end--;
3924	}
3925	else if(ISANYOF2(end-`1`, _T(`'-'`), _T(`'_'`))) / These are forbidden at the end. /
3926	return;
3927	if(CH(end-`1`) == _T(`'@'`) \|\| dot_count == `0`)
3928	return;
3929
3930	/ Ok. Lets call it auto-link. Adapt opener and create closer to zero*
3931	* length so all the contents becomes the link text. */
3932	closer_index = mark_index + `1`;
3933	closer = &ctx->marks[closer_index];
3934	MD_ASSERT(closer->ch == `'D'`);
3935
3936	opener->beg = beg;
3937	opener->end = beg;
3938	closer->ch = opener->ch;
3939	closer->beg = end;
3940	closer->end = end;
3941	md_resolve_range(ctx, NULL, opener_index: mark_index, closer_index);
3942	}
3943
3944	static inline void
3945	md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3946	int mark_beg, int mark_end, const CHAR* mark_chars)
3947	{
3948	int i = mark_beg;
3949
3950	while(i < mark_end) {
3951	MD_MARK* mark = &ctx->marks[i];
3952
3953	/ Skip resolved spans. /
3954	if(mark->flags & MD_MARK_RESOLVED) {
3955	if(mark->flags & MD_MARK_OPENER) {
3956	MD_ASSERT(i < mark->next);
3957	i = mark->next + `1`;
3958	} else {
3959	i++;
3960	}
3961	continue;
3962	}
3963
3964	/ Skip marks we do not want to deal with. /
3965	if(!ISANYOF_(mark->ch, mark_chars)) {
3966	i++;
3967	continue;
3968	}
3969
3970	/ Analyze the mark. /
3971	switch(mark->ch) {
3972	case `'['`: / Pass through. /
3973	case `'!'`: / Pass through. /
3974	case `']'`: md_analyze_bracket(ctx, mark_index: i); break;
3975	case `'&'`: md_analyze_entity(ctx, mark_index: i); break;
3976	case `'\|'`: md_analyze_table_cell_boundary(ctx, mark_index: i); break;
3977	case `'_'`: / Pass through. /
3978	case `''`: md_analyze_emph(ctx, mark_index: i); break*;
3979	case `'~'`: md_analyze_tilde(ctx, mark_index: i); break;
3980	case `'$'`: md_analyze_dollar(ctx, mark_index: i); break;
3981	case `'.'`: / Pass through. /
3982	case `':'`: md_analyze_permissive_url_autolink(ctx, mark_index: i); break;
3983	case `'@'`: md_analyze_permissive_email_autolink(ctx, mark_index: i); break;
3984	}
3985
3986	i++;
3987	}
3988	}
3989
3990	/ Analyze marks (build ctx->marks). /
3991	static int
3992	md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
3993	{
3994	int ret;
3995
3996	/ Reset the previously collected stack of marks. /
3997	ctx->n_marks = `0`;
3998
3999	/ Collect all marks. /
4000	MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
4001
4002	/ We analyze marks in few groups to handle their precedence. /
4003	/ (1) Entities; code spans; autolinks; raw HTML. /
4004	md_analyze_marks(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks, _T("&"));
4005
4006	/ (2) Links. /
4007	md_analyze_marks(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks, _T("[]!"));
4008	MD_CHECK(md_resolve_links(ctx, lines, n_lines));
4009	BRACKET_OPENERS.head = -`1`;
4010	BRACKET_OPENERS.tail = -`1`;
4011	ctx->unresolved_link_head = -`1`;
4012	ctx->unresolved_link_tail = -`1`;
4013
4014	if(table_mode) {
4015	/ (3) Analyze table cell boundaries.*
4016	* Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
4017	* not after, because caller may need it. */
4018	MD_ASSERT(n_lines == `1`);
4019	TABLECELLBOUNDARIES.head = -`1`;
4020	TABLECELLBOUNDARIES.tail = -`1`;
4021	ctx->n_table_cell_boundaries = `0`;
4022	md_analyze_marks(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks, _T("\|"));
4023	return ret;
4024	}
4025
4026	/ (4) Emphasis and strong emphasis; permissive autolinks. /
4027	md_analyze_link_contents(ctx, lines, n_lines, mark_beg: `0`, mark_end: ctx->n_marks);
4028
4029	abort:
4030	return ret;
4031	}
4032
4033	static void
4034	md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
4035	int mark_beg, int mark_end)
4036	{
4037	int i;
4038
4039	md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
4040
4041	for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
4042	ctx->mark_chains[i].head = -`1`;
4043	ctx->mark_chains[i].tail = -`1`;
4044	}
4045	}
4046
4047	static int
4048	md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4049	const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
4050	const CHAR* title, SZ title_size)
4051	{
4052	MD_ATTRIBUTE_BUILD href_build = { `0` };
4053	MD_ATTRIBUTE_BUILD title_build = { `0` };
4054	MD_SPAN_A_DETAIL det;
4055	int ret = `0`;
4056
4057	/ Note we here rely on fact that MD_SPAN_A_DETAIL and*
4058	* MD_SPAN_IMG_DETAIL are binary-compatible. */
4059	memset(s: &det, c: `0`, n: sizeof(MD_SPAN_A_DETAIL));
4060	MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4061	(prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : `0`),
4062	&det.href, &href_build));
4063	MD_CHECK(md_build_attribute(ctx, title, title_size, `0`, &det.title, &title_build));
4064
4065	if(enter)
4066	MD_ENTER_SPAN(type, &det);
4067	else
4068	MD_LEAVE_SPAN(type, &det);
4069
4070	abort:
4071	md_free_attribute(ctx, build: &href_build);
4072	md_free_attribute(ctx, build: &title_build);
4073	return ret;
4074	}
4075
4076	static int
4077	md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4078	{
4079	MD_ATTRIBUTE_BUILD target_build = { `0` };
4080	MD_SPAN_WIKILINK_DETAIL det;
4081	int ret = `0`;
4082
4083	memset(s: &det, c: `0`, n: sizeof(MD_SPAN_WIKILINK_DETAIL));
4084	MD_CHECK(md_build_attribute(ctx, target, target_size, `0`, &det.target, &target_build));
4085
4086	if (enter)
4087	MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
4088	else
4089	MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
4090
4091	abort:
4092	md_free_attribute(ctx, build: &target_build);
4093	return ret;
4094	}
4095
4096
4097	/ Render the output, accordingly to the analyzed ctx->marks. /
4098	static int
4099	md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4100	{
4101	MD_TEXTTYPE text_type;
4102	const MD_LINE* line = lines;
4103	MD_MARK* prev_mark = NULL;
4104	MD_MARK* mark;
4105	OFF off = lines[`0`].beg;
4106	OFF end = lines[n_lines-`1`].end;
4107	int enforce_hardbreak = `0`;
4108	int ret = `0`;
4109
4110	/ Find first resolved mark. Note there is always at least one resolved*
4111	* mark, the dummy last one after the end of the latest line we actually
4112	* never really reach. This saves us of a lot of special checks and cases
4113	* in this function. */
4114	mark = ctx->marks;
4115	while(!(mark->flags & MD_MARK_RESOLVED))
4116	mark++;
4117
4118	text_type = MD_TEXT_NORMAL;
4119
4120	while(`1`) {
4121	/ Process the text up to the next mark or end-of-line. /
4122	OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
4123	if(tmp > off) {
4124	MD_TEXT(text_type, STR(off), tmp - off);
4125	off = tmp;
4126	}
4127
4128	/ If reached the mark, process it and move to next one. /
4129	if(off >= mark->beg) {
4130	switch(mark->ch) {
4131	case `'\\'`: / Backslash escape. /
4132	if(ISNEWLINE(mark->beg+`1`))
4133	enforce_hardbreak = `1`;
4134	else
4135	MD_TEXT(text_type, STR(mark->beg+`1`), `1`);
4136	break;
4137
4138	case `' '`: / Non-trivial space. /
4139	MD_TEXT(text_type, _T(" "), `1`);
4140	break;
4141
4142	case '`': / Code span. /
4143	if(mark->flags & MD_MARK_OPENER) {
4144	MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
4145	text_type = MD_TEXT_CODE;
4146	} else {
4147	MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
4148	text_type = MD_TEXT_NORMAL;
4149	}
4150	break;
4151
4152	case `'_'`: / Underline (or emphasis if we fall through). /
4153	if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4154	if(mark->flags & MD_MARK_OPENER) {
4155	while(off < mark->end) {
4156	MD_ENTER_SPAN(MD_SPAN_U, NULL);
4157	off++;
4158	}
4159	} else {
4160	while(off < mark->end) {
4161	MD_LEAVE_SPAN(MD_SPAN_U, NULL);
4162	off++;
4163	}
4164	}
4165	break;
4166	}
4167	/ Fall though. /
4168
4169	case `''`: /* Emphasis, strong emphasis. /
4170	if(mark->flags & MD_MARK_OPENER) {
4171	if((mark->end - off) % `2`) {
4172	MD_ENTER_SPAN(MD_SPAN_EM, NULL);
4173	off++;
4174	}
4175	while(off + `1` < mark->end) {
4176	MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
4177	off += `2`;
4178	}
4179	} else {
4180	while(off + `1` < mark->end) {
4181	MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
4182	off += `2`;
4183	}
4184	if((mark->end - off) % `2`) {
4185	MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
4186	off++;
4187	}
4188	}
4189	break;
4190
4191	case `'~'`:
4192	if(mark->flags & MD_MARK_OPENER)
4193	MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
4194	else
4195	MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
4196	break;
4197
4198	case `'$'`:
4199	if(mark->flags & MD_MARK_OPENER) {
4200	MD_ENTER_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4201	text_type = MD_TEXT_LATEXMATH;
4202	} else {
4203	MD_LEAVE_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4204	text_type = MD_TEXT_NORMAL;
4205	}
4206	break;
4207
4208	case `'['`: / Link, wiki link, image. /
4209	case `'!'`:
4210	case `']'`:
4211	{
4212	const MD_MARK* opener = (mark->ch != `']'` ? mark : &ctx->marks[mark->prev]);
4213	const MD_MARK* closer = &ctx->marks[opener->next];
4214	const MD_MARK* dest_mark;
4215	const MD_MARK* title_mark;
4216
4217	if ((opener->ch == `'['` && closer->ch == `']'`) &&
4218	opener->end - opener->beg >= `2` &&
4219	closer->end - closer->beg >= `2`)
4220	{
4221	int has_label = (opener->end - opener->beg > `2`);
4222	SZ target_sz;
4223
4224	if(has_label)
4225	target_sz = opener->end - (opener->beg+`2`);
4226	else
4227	target_sz = closer->beg - opener->end;
4228
4229	MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != `']'`),
4230	has_label ? STR(opener->beg+`2`) : STR(opener->end),
4231	target_sz));
4232
4233	break;
4234	}
4235
4236	dest_mark = opener+`1`;
4237	MD_ASSERT(dest_mark->ch == `'D'`);
4238	title_mark = opener+`2`;
4239	MD_ASSERT(title_mark->ch == `'D'`);
4240
4241	MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != `']'`),
4242	(opener->ch == `'!'` ? MD_SPAN_IMG : MD_SPAN_A),
4243	STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4244	md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
4245
4246	/ link/image closer may span multiple lines. /
4247	if(mark->ch == `']'`) {
4248	while(mark->end > line->end)
4249	line++;
4250	}
4251
4252	break;
4253	}
4254
4255	case `'<'`:
4256	case `'>'`: / Autolink or raw HTML. /
4257	if(!(mark->flags & MD_MARK_AUTOLINK)) {
4258	/ Raw HTML. /
4259	if(mark->flags & MD_MARK_OPENER)
4260	text_type = MD_TEXT_HTML;
4261	else
4262	text_type = MD_TEXT_NORMAL;
4263	break;
4264	}
4265	/ Pass through, if auto-link. /
4266
4267	case `'@'`: / Permissive e-mail autolink. /
4268	case `':'`: / Permissive URL autolink. /
4269	case `'.'`: / Permissive WWW autolink. /
4270	{
4271	MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4272	MD_MARK* closer = &ctx->marks[opener->next];
4273	const CHAR* dest = STR(opener->end);
4274	SZ dest_size = closer->beg - opener->end;
4275
4276	/ For permissive auto-links we do not know closer mark*
4277	* position at the time of md_collect_marks(), therefore
4278	* it can be out-of-order in ctx->marks[].
4279	*
4280	* With this flag, we make sure that we output the closer
4281	* only if we processed the opener. */
4282	if(mark->flags & MD_MARK_OPENER)
4283	closer->flags \|= MD_MARK_VALIDPERMISSIVEAUTOLINK;
4284
4285	if(opener->ch == `'@'` \|\| opener->ch == `'.'`) {
4286	dest_size += `7`;
4287	MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4288	memcpy(dest: ctx->buffer,
4289	src: (opener->ch == `'@'` ? _T("mailto:") : _T("http://")),
4290	n: `7` * sizeof(CHAR));
4291	memcpy(dest: ctx->buffer + `7`, src: dest, n: (dest_size-`7`) * sizeof(CHAR));
4292	dest = ctx->buffer;
4293	}
4294
4295	if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
4296	MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4297	MD_SPAN_A, dest, dest_size, TRUE, NULL, `0`));
4298	break;
4299	}
4300
4301	case `'&'`: / Entity. /
4302	MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4303	break;
4304
4305	case `'\0'`:
4306	MD_TEXT(MD_TEXT_NULLCHAR, _T(""), `1`);
4307	break;
4308
4309	case `127`:
4310	goto abort;
4311	}
4312
4313	off = mark->end;
4314
4315	/ Move to next resolved mark. /
4316	prev_mark = mark;
4317	mark++;
4318	while(!(mark->flags & MD_MARK_RESOLVED) \|\| mark->beg < off)
4319	mark++;
4320	}
4321
4322	/ If reached end of line, move to next one. /
4323	if(off >= line->end) {
4324	/ If it is the last line, we are done. /
4325	if(off >= end)
4326	break;
4327
4328	if(text_type == MD_TEXT_CODE \|\| text_type == MD_TEXT_LATEXMATH) {
4329	OFF tmp;
4330
4331	MD_ASSERT(prev_mark != NULL);
4332	MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', `'$'`) && (prev_mark->flags & MD_MARK_OPENER));
4333	MD_ASSERT(ISANYOF2_(mark->ch, '`', `'$'`) && (mark->flags & MD_MARK_CLOSER));
4334
4335	/ Inside a code span, trailing line whitespace has to be*
4336	* outputted. */
4337	tmp = off;
4338	while(off < ctx->size && ISBLANK(off))
4339	off++;
4340	if(off > tmp)
4341	MD_TEXT(text_type, STR(tmp), off-tmp);
4342
4343	/ and new lines are transformed into single spaces. /
4344	if(prev_mark->end < off && off < mark->beg)
4345	MD_TEXT(text_type, _T(" "), `1`);
4346	} else if(text_type == MD_TEXT_HTML) {
4347	/ Inside raw HTML, we output the new line verbatim, including*
4348	* any trailing spaces. */
4349	OFF tmp = off;
4350
4351	while(tmp < end && ISBLANK(tmp))
4352	tmp++;
4353	if(tmp > off)
4354	MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4355	MD_TEXT(MD_TEXT_HTML, _T("\n"), `1`);
4356	} else {
4357	/ Output soft or hard line break. /
4358	MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4359
4360	if(text_type == MD_TEXT_NORMAL) {
4361	if(enforce_hardbreak)
4362	break_type = MD_TEXT_BR;
4363	else if((CH(line->end) == _T(`' '`) && CH(line->end+`1`) == _T(`' '`)))
4364	break_type = MD_TEXT_BR;
4365	}
4366
4367	MD_TEXT(break_type, _T("\n"), `1`);
4368	}
4369
4370	/ Move to the next line. /
4371	line++;
4372	off = line->beg;
4373
4374	enforce_hardbreak = `0`;
4375	}
4376	}
4377
4378	abort:
4379	return ret;
4380	}
4381
4382
4383	/***************************
4384	* Processing Tables *
4385	***************************/
4386
4387	static void
4388	md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4389	{
4390	static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4391	OFF off = beg;
4392
4393	while(n_align > `0`) {
4394	int index = `0`; / index into align_map[] /
4395
4396	while(CH(off) != _T(`'-'`))
4397	off++;
4398	if(off > beg && CH(off-`1`) == _T(`':'`))
4399	index \|= `1`;
4400	while(off < end && CH(off) == _T(`'-'`))
4401	off++;
4402	if(off < end && CH(off) == _T(`':'`))
4403	index \|= `2`;
4404
4405	*align = align_map[index];
4406	align++;
4407	n_align--;
4408	}
4409
4410	}
4411
4412	/ Forward declaration. /
4413	static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
4414
4415	static int
4416	md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4417	{
4418	MD_LINE line;
4419	MD_BLOCK_TD_DETAIL det;
4420	int ret = `0`;
4421
4422	while(beg < end && ISWHITESPACE(beg))
4423	beg++;
4424	while(end > beg && ISWHITESPACE(end-`1`))
4425	end--;
4426
4427	det.align = align;
4428	line.beg = beg;
4429	line.end = end;
4430
4431	MD_ENTER_BLOCK(cell_type, &det);
4432	MD_CHECK(md_process_normal_block_contents(ctx, &line, `1`));
4433	MD_LEAVE_BLOCK(cell_type, &det);
4434
4435	abort:
4436	return ret;
4437	}
4438
4439	static int
4440	md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4441	const MD_ALIGN* align, int col_count)
4442	{
4443	MD_LINE line;
4444	OFF* pipe_offs = NULL;
4445	int i, j, k, n;
4446	int ret = `0`;
4447
4448	line.beg = beg;
4449	line.end = end;
4450
4451	/ Break the line into table cells by identifying pipe characters who*
4452	* form the cell boundary. */
4453	MD_CHECK(md_analyze_inlines(ctx, &line, `1`, TRUE));
4454
4455	/ We have to remember the cell boundaries in local buffer because*
4456	* ctx->marks[] shall be reused during cell contents processing. */
4457	n = ctx->n_table_cell_boundaries + `2`;
4458	pipe_offs = (OFF) malloc(size: n sizeof(OFF));
4459	if(pipe_offs == NULL) {
4460	MD_LOG("malloc() failed.");
4461	ret = -`1`;
4462	goto abort;
4463	}
4464	j = `0`;
4465	pipe_offs[j++] = beg;
4466	for(i = TABLECELLBOUNDARIES.head; i >= `0`; i = ctx->marks[i].next) {
4467	MD_MARK* mark = &ctx->marks[i];
4468	pipe_offs[j++] = mark->end;
4469	}
4470	pipe_offs[j++] = end+`1`;
4471
4472	/ Process cells. /
4473	MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4474	k = `0`;
4475	for(i = `0`; i < j-`1` && k < col_count; i++) {
4476	if(pipe_offs[i] < pipe_offs[i+`1`]-`1`)
4477	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+`1`]-`1`));
4478	}
4479	/ Make sure we call enough table cells even if the current table contains*
4480	* too few of them. */
4481	while(k < col_count)
4482	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], `0`, `0`));
4483	MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4484
4485	abort:
4486	free(ptr: pipe_offs);
4487
4488	/ Free any temporary memory blocks stored within some dummy marks. /
4489	for(i = PTR_CHAIN.head; i >= `0`; i = ctx->marks[i].next)
4490	free(ptr: md_mark_get_ptr(ctx, mark_index: i));
4491	PTR_CHAIN.head = -`1`;
4492	PTR_CHAIN.tail = -`1`;
4493
4494	return ret;
4495	}
4496
4497	static int
4498	md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
4499	{
4500	MD_ALIGN* align;
4501	int i;
4502	int ret = `0`;
4503
4504	/ At least two lines have to be present: The column headers and the line*
4505	* with the underlines. */
4506	MD_ASSERT(n_lines >= `2`);
4507
4508	align = malloc(size: col_count * sizeof(MD_ALIGN));
4509	if(align == NULL) {
4510	MD_LOG("malloc() failed.");
4511	ret = -`1`;
4512	goto abort;
4513	}
4514
4515	md_analyze_table_alignment(ctx, beg: lines[`1`].beg, end: lines[`1`].end, align, n_align: col_count);
4516
4517	MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4518	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4519	lines[`0`].beg, lines[`0`].end, align, col_count));
4520	MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4521
4522	MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4523	for(i = `2`; i < n_lines; i++) {
4524	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4525	lines[i].beg, lines[i].end, align, col_count));
4526	}
4527	MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4528
4529	abort:
4530	free(ptr: align);
4531	return ret;
4532	}
4533
4534
4535	/**************************
4536	* Processing Block *
4537	**************************/
4538
4539	#define MD_BLOCK_CONTAINER_OPENER 0x01
4540	#define MD_BLOCK_CONTAINER_CLOSER 0x02
4541	#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER \| MD_BLOCK_CONTAINER_CLOSER)
4542	#define MD_BLOCK_LOOSE_LIST 0x04
4543	#define MD_BLOCK_SETEXT_HEADER 0x08
4544
4545	struct MD_BLOCK_tag {
4546	MD_BLOCKTYPE type : `8`;
4547	unsigned flags : `8`;
4548
4549	/ MD_BLOCK_H: Header level (1 - 6)*
4550	* MD_BLOCK_CODE: Non-zero if fenced, zero if indented.
4551	* MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' ').
4552	* MD_BLOCK_TABLE: Column count (as determined by the table underline).
4553	*/
4554	unsigned data : `16`;
4555
4556	/ Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.*
4557	* MD_BLOCK_LI: Task mark offset in the input doc.
4558	* MD_BLOCK_OL: Start item number.
4559	*/
4560	unsigned n_lines;
4561	};
4562
4563	struct MD_CONTAINER_tag {
4564	CHAR ch;
4565	unsigned is_loose : `8`;
4566	unsigned is_task : `8`;
4567	unsigned start;
4568	unsigned mark_indent;
4569	unsigned contents_indent;
4570	OFF block_byte_off;
4571	OFF task_mark_off;
4572	};
4573
4574
4575	static int
4576	md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4577	{
4578	int i;
4579	int ret;
4580
4581	MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4582	MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4583
4584	abort:
4585	/ Free any temporary memory blocks stored within some dummy marks. /
4586	for(i = PTR_CHAIN.head; i >= `0`; i = ctx->marks[i].next)
4587	free(ptr: md_mark_get_ptr(ctx, mark_index: i));
4588	PTR_CHAIN.head = -`1`;
4589	PTR_CHAIN.tail = -`1`;
4590
4591	return ret;
4592	}
4593
4594	static int
4595	md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
4596	{
4597	static const CHAR indent_chunk_str[] = _T(" ");
4598	static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - `1`;
4599
4600	int i;
4601	int ret = `0`;
4602
4603	for(i = `0`; i < n_lines; i++) {
4604	const MD_VERBATIMLINE* line = &lines[i];
4605	int indent = line->indent;
4606
4607	MD_ASSERT(indent >= `0`);
4608
4609	/ Output code indentation. /
4610	while(indent > (int) indent_chunk_size) {
4611	MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4612	indent -= indent_chunk_size;
4613	}
4614	if(indent > `0`)
4615	MD_TEXT(text_type, indent_chunk_str, indent);
4616
4617	/ Output the code line itself. /
4618	MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4619
4620	/ Enforce end-of-line. /
4621	MD_TEXT(text_type, _T("\n"), `1`);
4622	}
4623
4624	abort:
4625	return ret;
4626	}
4627
4628	static int
4629	md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
4630	{
4631	if(is_fenced) {
4632	/ Skip the first line in case of fenced code: It is the fence.*
4633	* (Only the starting fence is present due to logic in md_analyze_line().) */
4634	lines++;
4635	n_lines--;
4636	} else {
4637	/ Ignore blank lines at start/end of indented code block. /
4638	while(n_lines > `0` && lines[`0`].beg == lines[`0`].end) {
4639	lines++;
4640	n_lines--;
4641	}
4642	while(n_lines > `0` && lines[n_lines-`1`].beg == lines[n_lines-`1`].end) {
4643	n_lines--;
4644	}
4645	}
4646
4647	if(n_lines == `0`)
4648	return `0`;
4649
4650	return md_process_verbatim_block_contents(ctx, text_type: MD_TEXT_CODE, lines, n_lines);
4651	}
4652
4653	static int
4654	md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4655	MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4656	{
4657	const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + `1`);
4658	OFF beg = fence_line->beg;
4659	OFF end = fence_line->end;
4660	OFF lang_end;
4661	CHAR fence_ch = CH(fence_line->beg);
4662	int ret = `0`;
4663
4664	/ Skip the fence itself. /
4665	while(beg < ctx->size && CH(beg) == fence_ch)
4666	beg++;
4667	/ Trim initial spaces. /
4668	while(beg < ctx->size && CH(beg) == _T(`' '`))
4669	beg++;
4670
4671	/ Trim trailing spaces. /
4672	while(end > beg && CH(end-`1`) == _T(`' '`))
4673	end--;
4674
4675	/ Build info string attribute. /
4676	MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, `0`, &det->info, info_build));
4677
4678	/ Build info string attribute. /
4679	lang_end = beg;
4680	while(lang_end < end && !ISWHITESPACE(lang_end))
4681	lang_end++;
4682	MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, `0`, &det->lang, lang_build));
4683
4684	det->fence_char = fence_ch;
4685
4686	abort:
4687	return ret;
4688	}
4689
4690	static int
4691	md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4692	{
4693	union {
4694	MD_BLOCK_H_DETAIL header;
4695	MD_BLOCK_CODE_DETAIL code;
4696	} det;
4697	MD_ATTRIBUTE_BUILD info_build;
4698	MD_ATTRIBUTE_BUILD lang_build;
4699	int is_in_tight_list;
4700	int clean_fence_code_detail = FALSE;
4701	int ret = `0`;
4702
4703	memset(s: &det, c: `0`, n: sizeof(det));
4704
4705	if(ctx->n_containers == `0`)
4706	is_in_tight_list = FALSE;
4707	else
4708	is_in_tight_list = !ctx->containers[ctx->n_containers-`1`].is_loose;
4709
4710	switch(block->type) {
4711	case MD_BLOCK_H:
4712	det.header.level = block->data;
4713	break;
4714
4715	case MD_BLOCK_CODE:
4716	/ For fenced code block, we may need to set the info string. /
4717	if(block->data != `0`) {
4718	memset(s: &det.code, c: `0`, n: sizeof(MD_BLOCK_CODE_DETAIL));
4719	clean_fence_code_detail = TRUE;
4720	MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4721	}
4722	break;
4723
4724	default:
4725	/ Noop. /
4726	break;
4727	}
4728
4729	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4730	MD_ENTER_BLOCK(block->type, (void*) &det);
4731
4732	/ Process the block contents accordingly to is type. /
4733	switch(block->type) {
4734	case MD_BLOCK_HR:
4735	/ noop /
4736	break;
4737
4738	case MD_BLOCK_CODE:
4739	MD_CHECK(md_process_code_block_contents(ctx, (block->data != `0`),
4740	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4741	break;
4742
4743	case MD_BLOCK_HTML:
4744	MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4745	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4746	break;
4747
4748	case MD_BLOCK_TABLE:
4749	MD_CHECK(md_process_table_block_contents(ctx, block->data,
4750	(const MD_LINE*)(block + `1`), block->n_lines));
4751	break;
4752
4753	default:
4754	MD_CHECK(md_process_normal_block_contents(ctx,
4755	(const MD_LINE*)(block + `1`), block->n_lines));
4756	break;
4757	}
4758
4759	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4760	MD_LEAVE_BLOCK(block->type, (void*) &det);
4761
4762	abort:
4763	if(clean_fence_code_detail) {
4764	md_free_attribute(ctx, build: &info_build);
4765	md_free_attribute(ctx, build: &lang_build);
4766	}
4767	return ret;
4768	}
4769
4770	static int
4771	md_process_all_blocks(MD_CTX* ctx)
4772	{
4773	int byte_off = `0`;
4774	int ret = `0`;
4775
4776	/ ctx->containers now is not needed for detection of lists and list items*
4777	* so we reuse it for tracking what lists are loose or tight. We rely
4778	* on the fact the vector is large enough to hold the deepest nesting
4779	* level of lists. */
4780	ctx->n_containers = `0`;
4781
4782	while(byte_off < ctx->n_block_bytes) {
4783	MD_BLOCK* block = (MD_BLOCK)((char**)ctx->block_bytes + byte_off);
4784	union {
4785	MD_BLOCK_UL_DETAIL ul;
4786	MD_BLOCK_OL_DETAIL ol;
4787	MD_BLOCK_LI_DETAIL li;
4788	} det;
4789
4790	switch(block->type) {
4791	case MD_BLOCK_UL:
4792	det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4793	det.ul.mark = (CHAR) block->data;
4794	break;
4795
4796	case MD_BLOCK_OL:
4797	det.ol.start = block->n_lines;
4798	det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4799	det.ol.mark_delimiter = (CHAR) block->data;
4800	break;
4801
4802	case MD_BLOCK_LI:
4803	det.li.is_task = (block->data != `0`);
4804	det.li.task_mark = (CHAR) block->data;
4805	det.li.task_mark_offset = (OFF) block->n_lines;
4806	break;
4807
4808	default:
4809	/ noop /
4810	break;
4811	}
4812
4813	if(block->flags & MD_BLOCK_CONTAINER) {
4814	if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4815	MD_LEAVE_BLOCK(block->type, &det);
4816
4817	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL \|\| block->type == MD_BLOCK_QUOTE)
4818	ctx->n_containers--;
4819	}
4820
4821	if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4822	MD_ENTER_BLOCK(block->type, &det);
4823
4824	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL) {
4825	ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4826	ctx->n_containers++;
4827	} else if(block->type == MD_BLOCK_QUOTE) {
4828	/ This causes that any text in a block quote, even if*
4829	* nested inside a tight list item, is wrapped with
4830	* <p>...</p>. */
4831	ctx->containers[ctx->n_containers].is_loose = TRUE;
4832	ctx->n_containers++;
4833	}
4834	}
4835	} else {
4836	MD_CHECK(md_process_leaf_block(ctx, block));
4837
4838	if(block->type == MD_BLOCK_CODE \|\| block->type == MD_BLOCK_HTML)
4839	byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4840	else
4841	byte_off += block->n_lines * sizeof(MD_LINE);
4842	}
4843
4844	byte_off += sizeof(MD_BLOCK);
4845	}
4846
4847	ctx->n_block_bytes = `0`;
4848
4849	abort:
4850	return ret;
4851	}
4852
4853
4854	/************************************
4855	* Grouping Lines into Blocks *
4856	************************************/
4857
4858	static void*
4859	md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4860	{
4861	void* ptr;
4862
4863	if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4864	void* new_block_bytes;
4865
4866	ctx->alloc_block_bytes = (ctx->alloc_block_bytes > `0`
4867	? ctx->alloc_block_bytes + ctx->alloc_block_bytes / `2`
4868	: `512`);
4869	new_block_bytes = realloc(ptr: ctx->block_bytes, size: ctx->alloc_block_bytes);
4870	if(new_block_bytes == NULL) {
4871	MD_LOG("realloc() failed.");
4872	return NULL;
4873	}
4874
4875	/ Fix the ->current_block after the reallocation. /
4876	if(ctx->current_block != NULL) {
4877	OFF off_current_block = (char) ctx->current_block - (char**) ctx->block_bytes;
4878	ctx->current_block = (MD_BLOCK) ((char**) new_block_bytes + off_current_block);
4879	}
4880
4881	ctx->block_bytes = new_block_bytes;
4882	}
4883
4884	ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4885	ctx->n_block_bytes += n_bytes;
4886	return ptr;
4887	}
4888
4889	static int
4890	md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4891	{
4892	MD_BLOCK* block;
4893
4894	MD_ASSERT(ctx->current_block == NULL);
4895
4896	block = (MD_BLOCK) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_BLOCK));
4897	if(block == NULL)
4898	return -`1`;
4899
4900	switch(line->type) {
4901	case MD_LINE_HR:
4902	block->type = MD_BLOCK_HR;
4903	break;
4904
4905	case MD_LINE_ATXHEADER:
4906	case MD_LINE_SETEXTHEADER:
4907	block->type = MD_BLOCK_H;
4908	break;
4909
4910	case MD_LINE_FENCEDCODE:
4911	case MD_LINE_INDENTEDCODE:
4912	block->type = MD_BLOCK_CODE;
4913	break;
4914
4915	case MD_LINE_TEXT:
4916	block->type = MD_BLOCK_P;
4917	break;
4918
4919	case MD_LINE_HTML:
4920	block->type = MD_BLOCK_HTML;
4921	break;
4922
4923	case MD_LINE_BLANK:
4924	case MD_LINE_SETEXTUNDERLINE:
4925	case MD_LINE_TABLEUNDERLINE:
4926	default:
4927	MD_UNREACHABLE();
4928	break;
4929	}
4930
4931	block->flags = `0`;
4932	block->data = line->data;
4933	block->n_lines = `0`;
4934
4935	ctx->current_block = block;
4936	return `0`;
4937	}
4938
4939	/ Eat from start of current (textual) block any reference definitions and*
4940	* remember them so we can resolve any links referring to them.
4941	*
4942	* (Reference definitions can only be at start of it as they cannot break
4943	* a paragraph.)
4944	*/
4945	static int
4946	md_consume_link_reference_definitions(MD_CTX* ctx)
4947	{
4948	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
4949	int n_lines = ctx->current_block->n_lines;
4950	int n = `0`;
4951
4952	/ Compute how many lines at the start of the block form one or more*
4953	* reference definitions. */
4954	while(n < n_lines) {
4955	int n_link_ref_lines;
4956
4957	n_link_ref_lines = md_is_link_reference_definition(ctx,
4958	lines: lines + n, n_lines: n_lines - n);
4959	/ Not a reference definition? /
4960	if(n_link_ref_lines == `0`)
4961	break;
4962
4963	/ We fail if it is the ref. def. but it could not be stored due*
4964	* a memory allocation error. */
4965	if(n_link_ref_lines < `0`)
4966	return -`1`;
4967
4968	n += n_link_ref_lines;
4969	}
4970
4971	/ If there was at least one reference definition, we need to remove*
4972	* its lines from the block, or perhaps even the whole block. */
4973	if(n > `0`) {
4974	if(n == n_lines) {
4975	/ Remove complete block. /
4976	ctx->n_block_bytes -= n * sizeof(MD_LINE);
4977	ctx->n_block_bytes -= sizeof(MD_BLOCK);
4978	ctx->current_block = NULL;
4979	} else {
4980	/ Remove just some initial lines from the block. /
4981	memmove(dest: lines, src: lines + n, n: (n_lines - n) * sizeof(MD_LINE));
4982	ctx->current_block->n_lines -= n;
4983	ctx->n_block_bytes -= n * sizeof(MD_LINE);
4984	}
4985	}
4986
4987	return `0`;
4988	}
4989
4990	static int
4991	md_end_current_block(MD_CTX* ctx)
4992	{
4993	int ret = `0`;
4994
4995	if(ctx->current_block == NULL)
4996	return ret;
4997
4998	/ Check whether there is a reference definition. (We do this here instead*
4999	* of in md_analyze_line() because reference definition can take multiple
5000	* lines.) */
5001	if(ctx->current_block->type == MD_BLOCK_P \|\|
5002	(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
5003	{
5004	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
5005	if(CH(lines[`0`].beg) == _T(`'['`)) {
5006	MD_CHECK(md_consume_link_reference_definitions(ctx));
5007	if(ctx->current_block == NULL)
5008	return ret;
5009	}
5010	}
5011
5012	if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
5013	int n_lines = ctx->current_block->n_lines;
5014
5015	if(n_lines > `1`) {
5016	/ Get rid of the underline. /
5017	ctx->current_block->n_lines--;
5018	ctx->n_block_bytes -= sizeof(MD_LINE);
5019	} else {
5020	/ Only the underline has left after eating the ref. defs.*
5021	* Keep the line as beginning of a new ordinary paragraph. */
5022	ctx->current_block->type = MD_BLOCK_P;
5023	return `0`;
5024	}
5025	}
5026
5027	/ Mark we are not building any block anymore. /
5028	ctx->current_block = NULL;
5029
5030	abort:
5031	return ret;
5032	}
5033
5034	static int
5035	md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5036	{
5037	MD_ASSERT(ctx->current_block != NULL);
5038
5039	if(ctx->current_block->type == MD_BLOCK_CODE \|\| ctx->current_block->type == MD_BLOCK_HTML) {
5040	MD_VERBATIMLINE* line;
5041
5042	line = (MD_VERBATIMLINE) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_VERBATIMLINE));
5043	if(line == NULL)
5044	return -`1`;
5045
5046	line->indent = analysis->indent;
5047	line->beg = analysis->beg;
5048	line->end = analysis->end;
5049	} else {
5050	MD_LINE* line;
5051
5052	line = (MD_LINE) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_LINE));
5053	if(line == NULL)
5054	return -`1`;
5055
5056	line->beg = analysis->beg;
5057	line->end = analysis->end;
5058	}
5059	ctx->current_block->n_lines++;
5060
5061	return `0`;
5062	}
5063
5064	static int
5065	md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5066	unsigned data, unsigned flags)
5067	{
5068	MD_BLOCK* block;
5069	int ret = `0`;
5070
5071	MD_CHECK(md_end_current_block(ctx));
5072
5073	block = (MD_BLOCK) md_push_block_bytes(ctx, n_bytes: sizeof*(MD_BLOCK));
5074	if(block == NULL)
5075	return -`1`;
5076
5077	block->type = type;
5078	block->flags = flags;
5079	block->data = data;
5080	block->n_lines = start;
5081
5082	abort:
5083	return ret;
5084	}
5085
5086
5087
5088	/***********************
5089	* Line Analysis *
5090	***********************/
5091
5092	static int
5093	md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5094	{
5095	OFF off = beg + `1`;
5096	int n = `1`;
5097
5098	while(off < ctx->size && (CH(off) == CH(beg) \|\| CH(off) == _T(`' '`) \|\| CH(off) == _T(`'\t'`))) {
5099	if(CH(off) == CH(beg))
5100	n++;
5101	off++;
5102	}
5103
5104	if(n < `3`) {
5105	*p_killer = off;
5106	return FALSE;
5107	}
5108
5109	/ Nothing else can be present on the line. /
5110	if(off < ctx->size && !ISNEWLINE(off)) {
5111	*p_killer = off;
5112	return FALSE;
5113	}
5114
5115	*p_end = off;
5116	return TRUE;
5117	}
5118
5119	static int
5120	md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5121	{
5122	int n;
5123	OFF off = beg + `1`;
5124
5125	while(off < ctx->size && CH(off) == _T(`'#'`) && off - beg < `7`)
5126	off++;
5127	n = off - beg;
5128
5129	if(n > `6`)
5130	return FALSE;
5131	*p_level = n;
5132
5133	if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
5134	CH(off) != _T(`' '`) && CH(off) != _T(`'\t'`) && !ISNEWLINE(off))
5135	return FALSE;
5136
5137	while(off < ctx->size && CH(off) == _T(`' '`))
5138	off++;
5139	*p_beg = off;
5140	*p_end = off;
5141	return TRUE;
5142	}
5143
5144	static int
5145	md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5146	{
5147	OFF off = beg + `1`;
5148
5149	while(off < ctx->size && CH(off) == CH(beg))
5150	off++;
5151
5152	/ Optionally, space(s) can follow. /
5153	while(off < ctx->size && CH(off) == _T(`' '`))
5154	off++;
5155
5156	/ But nothing more is allowed on the line. /
5157	if(off < ctx->size && !ISNEWLINE(off))
5158	return FALSE;
5159
5160	*p_level = (CH(beg) == _T(`'='`) ? `1` : `2`);
5161	*p_end = off;
5162	return TRUE;
5163	}
5164
5165	static int
5166	md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5167	{
5168	OFF off = beg;
5169	int found_pipe = FALSE;
5170	unsigned col_count = `0`;
5171
5172	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5173	found_pipe = TRUE;
5174	off++;
5175	while(off < ctx->size && ISWHITESPACE(off))
5176	off++;
5177	}
5178
5179	while(`1`) {
5180	OFF cell_beg;
5181	int delimited = FALSE;
5182
5183	/ Cell underline ("-----", ":----", "----:" or ":----:") /
5184	cell_beg = off;
5185	if(off < ctx->size && CH(off) == _T(`':'`))
5186	off++;
5187	while(off < ctx->size && CH(off) == _T(`'-'`))
5188	off++;
5189	if(off < ctx->size && CH(off) == _T(`':'`))
5190	off++;
5191	if(off - cell_beg < `3`)
5192	return FALSE;
5193
5194	col_count++;
5195
5196	/ Pipe delimiter (optional at the end of line). /
5197	while(off < ctx->size && ISWHITESPACE(off))
5198	off++;
5199	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5200	delimited = TRUE;
5201	found_pipe = TRUE;
5202	off++;
5203	while(off < ctx->size && ISWHITESPACE(off))
5204	off++;
5205	}
5206
5207	/ Success, if we reach end of line. /
5208	if(off >= ctx->size \|\| ISNEWLINE(off))
5209	break;
5210
5211	if(!delimited)
5212	return FALSE;
5213	}
5214
5215	if(!found_pipe)
5216	return FALSE;
5217
5218	*p_end = off;
5219	*p_col_count = col_count;
5220	return TRUE;
5221	}
5222
5223	static int
5224	md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5225	{
5226	OFF off = beg;
5227
5228	while(off < ctx->size && CH(off) == CH(beg))
5229	off++;
5230
5231	/ Fence must have at least three characters. /
5232	if(off - beg < `3`)
5233	return FALSE;
5234
5235	ctx->code_fence_length = off - beg;
5236
5237	/ Optionally, space(s) can follow. /
5238	while(off < ctx->size && CH(off) == _T(`' '`))
5239	off++;
5240
5241	/ Optionally, an info string can follow. /
5242	while(off < ctx->size && !ISNEWLINE(off)) {
5243	/ Backtick-based fence must not contain '`' in the info string. /
5244	if(CH(beg) == _T('`') && CH(off) == _T('`'))
5245	return FALSE;
5246	off++;
5247	}
5248
5249	*p_end = off;
5250	return TRUE;
5251	}
5252
5253	static int
5254	md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5255	{
5256	OFF off = beg;
5257	int ret = FALSE;
5258
5259	/ Closing fence must have at least the same length and use same char as*
5260	* opening one. */
5261	while(off < ctx->size && CH(off) == ch)
5262	off++;
5263	if(off - beg < ctx->code_fence_length)
5264	goto out;
5265
5266	/ Optionally, space(s) can follow /
5267	while(off < ctx->size && CH(off) == _T(`' '`))
5268	off++;
5269
5270	/ But nothing more is allowed on the line. /
5271	if(off < ctx->size && !ISNEWLINE(off))
5272	goto out;
5273
5274	ret = TRUE;
5275
5276	out:
5277	/ Note we set p_end even on failure: If we are not closing fence, caller
5278	* would eat the line anyway without any parsing. */
5279	*p_end = off;
5280	return ret;
5281	}
5282
5283	/ Returns type of the raw HTML block, or FALSE if it is not HTML block.*
5284	* (Refer to CommonMark specification for details about the types.)
5285	*/
5286	static int
5287	md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5288	{
5289	typedef struct TAG_tag TAG;
5290	struct TAG_tag {
5291	const CHAR* name;
5292	unsigned len : `8`;
5293	};
5294
5295	/ Type 6 is started by a long list of allowed tags. We use two-level*
5296	* tree to speed-up the search. */
5297	#ifdef X
5298	#undef X
5299	#endif
5300	#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5301	#define Xend { NULL, 0 }
5302	static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
5303
5304	static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5305	static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5306	static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5307	static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5308	X("div"), X("dl"), X("dt"), Xend };
5309	static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5310	X("form"), X("frame"), X("frameset"), Xend };
5311	static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
5312	static const TAG i6[] = { X("iframe"), Xend };
5313	static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5314	static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5315	static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5316	static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5317	static const TAG p6[] = { X("p"), X("param"), Xend };
5318	static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
5319	static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5320	X("thead"), X("title"), X("tr"), X("track"), Xend };
5321	static const TAG u6[] = { X("ul"), Xend };
5322	static const TAG xx[] = { Xend };
5323	#undef X
5324
5325	static const TAG* map6[`26`] = {
5326	a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5327	n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5328	};
5329	OFF off = beg + `1`;
5330	int i;
5331
5332	/ Check for type 1: <script, <pre, or <style /
5333	for(i = `0`; t1[i].name != NULL; i++) {
5334	if(off + t1[i].len <= ctx->size) {
5335	if(md_ascii_case_eq(STR(off), s2: t1[i].name, n: t1[i].len))
5336	return `1`;
5337	}
5338	}
5339
5340	/ Check for type 2: <!-- /
5341	if(off + `3` < ctx->size && CH(off) == _T(`'!'`) && CH(off+`1`) == _T(`'-'`) && CH(off+`2`) == _T(`'-'`))
5342	return `2`;
5343
5344	/ Check for type 3: <? /
5345	if(off < ctx->size && CH(off) == _T(`'?'`))
5346	return `3`;
5347
5348	/ Check for type 4 or 5: <! /
5349	if(off < ctx->size && CH(off) == _T(`'!'`)) {
5350	/ Check for type 4: <! followed by uppercase letter. /
5351	if(off + `1` < ctx->size && ISUPPER(off+`1`))
5352	return `4`;
5353
5354	/ Check for type 5: <![CDATA[ /
5355	if(off + `8` < ctx->size) {
5356	if(md_ascii_eq(STR(off), _T("![CDATA["), n: `8`))
5357	return `5`;
5358	}
5359	}
5360
5361	/ Check for type 6: Many possible starting tags listed above. /
5362	if(off + `1` < ctx->size && (ISALPHA(off) \|\| (CH(off) == _T(`'/'`) && ISALPHA(off+`1`)))) {
5363	int slot;
5364	const TAG* tags;
5365
5366	if(CH(off) == _T(`'/'`))
5367	off++;
5368
5369	slot = (ISUPPER(off) ? CH(off) - `'A'` : CH(off) - `'a'`);
5370	tags = map6[slot];
5371
5372	for(i = `0`; tags[i].name != NULL; i++) {
5373	if(off + tags[i].len <= ctx->size) {
5374	if(md_ascii_case_eq(STR(off), s2: tags[i].name, n: tags[i].len)) {
5375	OFF tmp = off + tags[i].len;
5376	if(tmp >= ctx->size)
5377	return `6`;
5378	if(ISBLANK(tmp) \|\| ISNEWLINE(tmp) \|\| CH(tmp) == _T(`'>'`))
5379	return `6`;
5380	if(tmp+`1` < ctx->size && CH(tmp) == _T(`'/'`) && CH(tmp+`1`) == _T(`'>'`))
5381	return `6`;
5382	break;
5383	}
5384	}
5385	}
5386	}
5387
5388	/ Check for type 7: any COMPLETE other opening or closing tag. /
5389	if(off + `1` < ctx->size) {
5390	OFF end;
5391
5392	if(md_is_html_tag(ctx, NULL, n_lines: `0`, beg, max_end: ctx->size, p_end: &end)) {
5393	/ Only optional whitespace and new line may follow. /
5394	while(end < ctx->size && ISWHITESPACE(end))
5395	end++;
5396	if(end >= ctx->size \|\| ISNEWLINE(end))
5397	return `7`;
5398	}
5399	}
5400
5401	return FALSE;
5402	}
5403
5404	/ Case sensitive check whether there is a substring 'what' between 'beg'*
5405	* and end of line. */
5406	static int
5407	md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5408	{
5409	OFF i;
5410	for(i = beg; i + what_len < ctx->size; i++) {
5411	if(ISNEWLINE(i))
5412	break;
5413	if(memcmp(STR(i), s2: what, n: what_len * sizeof(CHAR)) == `0`) {
5414	*p_end = i + what_len;
5415	return TRUE;
5416	}
5417	}
5418
5419	*p_end = i;
5420	return FALSE;
5421	}
5422
5423	/ Returns type of HTML block end condition or FALSE if not an end condition.*
5424	*
5425	* Note it fills p_end even when it is not end condition as the caller
5426	* does not need to analyze contents of a raw HTML block.
5427	*/
5428	static int
5429	md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5430	{
5431	switch(ctx->html_block_type) {
5432	case `1`:
5433	{
5434	OFF off = beg;
5435
5436	while(off < ctx->size && !ISNEWLINE(off)) {
5437	if(CH(off) == _T(`'<'`)) {
5438	if(md_ascii_case_eq(STR(off), _T("</script>"), n: `9`)) {
5439	*p_end = off + `9`;
5440	return TRUE;
5441	}
5442
5443	if(md_ascii_case_eq(STR(off), _T("</style>"), n: `8`)) {
5444	*p_end = off + `8`;
5445	return TRUE;
5446	}
5447
5448	if(md_ascii_case_eq(STR(off), _T("</pre>"), n: `6`)) {
5449	*p_end = off + `6`;
5450	return TRUE;
5451	}
5452	}
5453
5454	off++;
5455	}
5456	*p_end = off;
5457	return FALSE;
5458	}
5459
5460	case `2`:
5461	return (md_line_contains(ctx, beg, _T("-->"), what_len: `3`, p_end) ? `2` : FALSE);
5462
5463	case `3`:
5464	return (md_line_contains(ctx, beg, _T("?>"), what_len: `2`, p_end) ? `3` : FALSE);
5465
5466	case `4`:
5467	return (md_line_contains(ctx, beg, _T(">"), what_len: `1`, p_end) ? `4` : FALSE);
5468
5469	case `5`:
5470	return (md_line_contains(ctx, beg, _T("]]>"), what_len: `3`, p_end) ? `5` : FALSE);
5471
5472	case `6`: / Pass through /
5473	case `7`:
5474	*p_end = beg;
5475	return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
5476
5477	default:
5478	MD_UNREACHABLE();
5479	}
5480	return FALSE;
5481	}
5482
5483
5484	static int
5485	md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5486	{
5487	/ Block quote has no "items" like lists. /
5488	if(container->ch == _T(`'>'`))
5489	return FALSE;
5490
5491	if(container->ch != pivot->ch)
5492	return FALSE;
5493	if(container->mark_indent > pivot->contents_indent)
5494	return FALSE;
5495
5496	return TRUE;
5497	}
5498
5499	static int
5500	md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5501	{
5502	if(ctx->n_containers >= ctx->alloc_containers) {
5503	MD_CONTAINER* new_containers;
5504
5505	ctx->alloc_containers = (ctx->alloc_containers > `0`
5506	? ctx->alloc_containers + ctx->alloc_containers / `2`
5507	: `16`);
5508	new_containers = realloc(ptr: ctx->containers, size: ctx->alloc_containers * sizeof(MD_CONTAINER));
5509	if(new_containers == NULL) {
5510	MD_LOG("realloc() failed.");
5511	return -`1`;
5512	}
5513
5514	ctx->containers = new_containers;
5515	}
5516
5517	memcpy(dest: &ctx->containers[ctx->n_containers++], src: container, n: sizeof(MD_CONTAINER));
5518	return `0`;
5519	}
5520
5521	static int
5522	md_enter_child_containers(MD_CTX* ctx, int n_children)
5523	{
5524	int i;
5525	int ret = `0`;
5526
5527	for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5528	MD_CONTAINER* c = &ctx->containers[i];
5529	int is_ordered_list = FALSE;
5530
5531	switch(c->ch) {
5532	case _T(`')'`):
5533	case _T(`'.'`):
5534	is_ordered_list = TRUE;
5535	/ Pass through /
5536
5537	case _T(`'-'`):
5538	case _T(`'+'`):
5539	case _T(`'*'`):
5540	/ Remember offset in ctx->block_bytes so we can revisit the*
5541	* block if we detect it is a loose list. */
5542	md_end_current_block(ctx);
5543	c->block_byte_off = ctx->n_block_bytes;
5544
5545	MD_CHECK(md_push_container_bytes(ctx,
5546	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5547	c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
5548	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5549	c->task_mark_off,
5550	(c->is_task ? CH(c->task_mark_off) : `0`),
5551	MD_BLOCK_CONTAINER_OPENER));
5552	break;
5553
5554	case _T(`'>'`):
5555	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`, `0`, MD_BLOCK_CONTAINER_OPENER));
5556	break;
5557
5558	default:
5559	MD_UNREACHABLE();
5560	break;
5561	}
5562	}
5563
5564	abort:
5565	return ret;
5566	}
5567
5568	static int
5569	md_leave_child_containers(MD_CTX* ctx, int n_keep)
5570	{
5571	int ret = `0`;
5572
5573	while(ctx->n_containers > n_keep) {
5574	MD_CONTAINER* c = &ctx->containers[ctx->n_containers-`1`];
5575	int is_ordered_list = FALSE;
5576
5577	switch(c->ch) {
5578	case _T(`')'`):
5579	case _T(`'.'`):
5580	is_ordered_list = TRUE;
5581	/ Pass through /
5582
5583	case _T(`'-'`):
5584	case _T(`'+'`):
5585	case _T(`'*'`):
5586	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5587	c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : `0`),
5588	MD_BLOCK_CONTAINER_CLOSER));
5589	MD_CHECK(md_push_container_bytes(ctx,
5590	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), `0`,
5591	c->ch, MD_BLOCK_CONTAINER_CLOSER));
5592	break;
5593
5594	case _T(`'>'`):
5595	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`,
5596	`0`, MD_BLOCK_CONTAINER_CLOSER));
5597	break;
5598
5599	default:
5600	MD_UNREACHABLE();
5601	break;
5602	}
5603
5604	ctx->n_containers--;
5605	}
5606
5607	abort:
5608	return ret;
5609	}
5610
5611	static int
5612	md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5613	{
5614	OFF off = beg;
5615	OFF max_end;
5616
5617	if(indent >= ctx->code_indent_offset)
5618	return FALSE;
5619
5620	/ Check for block quote mark. /
5621	if(off < ctx->size && CH(off) == _T(`'>'`)) {
5622	off++;
5623	p_container->ch = _T(`'>'`);
5624	p_container->is_loose = FALSE;
5625	p_container->is_task = FALSE;
5626	p_container->mark_indent = indent;
5627	p_container->contents_indent = indent + `1`;
5628	*p_end = off;
5629	return TRUE;
5630	}
5631
5632	/ Check for list item bullet mark. /
5633	if(off+`1` < ctx->size && ISANYOF(off, _T("-+*")) && (ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`))) {
5634	p_container->ch = CH(off);
5635	p_container->is_loose = FALSE;
5636	p_container->is_task = FALSE;
5637	p_container->mark_indent = indent;
5638	p_container->contents_indent = indent + `1`;
5639	*p_end = off + `1`;
5640	return TRUE;
5641	}
5642
5643	/ Check for ordered list item marks. /
5644	max_end = off + `9`;
5645	if(max_end > ctx->size)
5646	max_end = ctx->size;
5647	p_container->start = `0`;
5648	while(off < max_end && ISDIGIT(off)) {
5649	p_container->start = p_container->start * `10` + CH(off) - _T(`'0'`);
5650	off++;
5651	}
5652	if(off > beg && off+`1` < ctx->size &&
5653	(CH(off) == _T(`'.'`) \|\| CH(off) == _T(`')'`)) &&
5654	(ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`)))
5655	{
5656	p_container->ch = CH(off);
5657	p_container->is_loose = FALSE;
5658	p_container->is_task = FALSE;
5659	p_container->mark_indent = indent;
5660	p_container->contents_indent = indent + off - beg + `1`;
5661	*p_end = off + `1`;
5662	return TRUE;
5663	}
5664
5665	return FALSE;
5666	}
5667
5668	static unsigned
5669	md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5670	{
5671	OFF off = beg;
5672	unsigned indent = total_indent;
5673
5674	while(off < ctx->size && ISBLANK(off)) {
5675	if(CH(off) == _T(`'\t'`))
5676	indent = (indent + `4`) & ~`3`;
5677	else
5678	indent++;
5679	off++;
5680	}
5681
5682	*p_end = off;
5683	return indent - total_indent;
5684	}
5685
5686	static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, `0` };
5687
5688	/ Analyze type of the line and find some its properties. This serves as a*
5689	* main input for determining type and boundaries of a block. */
5690	static int
5691	md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5692	const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5693	{
5694	unsigned total_indent = `0`;
5695	int n_parents = `0`;
5696	int n_brothers = `0`;
5697	int n_children = `0`;
5698	MD_CONTAINER container = { `0` };
5699	int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5700	OFF off = beg;
5701	OFF hr_killer = `0`;
5702	int ret = `0`;
5703
5704	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5705	total_indent += line->indent;
5706	line->beg = off;
5707
5708	/ Given the indentation and block quote marks '>', determine how many of*
5709	* the current containers are our parents. */
5710	while(n_parents < ctx->n_containers) {
5711	MD_CONTAINER* c = &ctx->containers[n_parents];
5712
5713	if(c->ch == _T(`'>'`) && line->indent < ctx->code_indent_offset &&
5714	off < ctx->size && CH(off) == _T(`'>'`))
5715	{
5716	/ Block quote mark. /
5717	off++;
5718	total_indent++;
5719	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5720	total_indent += line->indent;
5721
5722	/ The optional 1st space after '>' is part of the block quote mark. /
5723	if(line->indent > `0`)
5724	line->indent--;
5725
5726	line->beg = off;
5727
5728	} else if(c->ch != _T(`'>'`) && line->indent >= c->contents_indent) {
5729	/ List. /
5730	line->indent -= c->contents_indent;
5731	} else {
5732	break;
5733	}
5734
5735	n_parents++;
5736	}
5737
5738	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5739	/ Blank line does not need any real indentation to be nested inside*
5740	* a list. */
5741	if(n_brothers + n_children == `0`) {
5742	while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T(`'>'`))
5743	n_parents++;
5744	}
5745	}
5746
5747	while(TRUE) {
5748	/ Check whether we are fenced code continuation. /
5749	if(pivot_line->type == MD_LINE_FENCEDCODE) {
5750	line->beg = off;
5751
5752	/ We are another MD_LINE_FENCEDCODE unless we are closing fence*
5753	* which we transform into MD_LINE_BLANK. */
5754	if(line->indent < ctx->code_indent_offset) {
5755	if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), beg: off, p_end: &off)) {
5756	line->type = MD_LINE_BLANK;
5757	ctx->last_line_has_list_loosening_effect = FALSE;
5758	break;
5759	}
5760	}
5761
5762	/ Change indentation accordingly to the initial code fence. /
5763	if(n_parents == ctx->n_containers) {
5764	if(line->indent > pivot_line->indent)
5765	line->indent -= pivot_line->indent;
5766	else
5767	line->indent = `0`;
5768
5769	line->type = MD_LINE_FENCEDCODE;
5770	break;
5771	}
5772	}
5773
5774	/ Check whether we are HTML block continuation. /
5775	if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > `0`) {
5776	if(n_parents < ctx->n_containers) {
5777	/ HTML block is implicitly ended if the enclosing container*
5778	* block ends. */
5779	ctx->html_block_type = `0`;
5780	} else {
5781	int html_block_type;
5782
5783	html_block_type = md_is_html_block_end_condition(ctx, beg: off, p_end: &off);
5784	if(html_block_type > `0`) {
5785	MD_ASSERT(html_block_type == ctx->html_block_type);
5786
5787	/ Make sure this is the last line of the block. /
5788	ctx->html_block_type = `0`;
5789
5790	/ Some end conditions serve as blank lines at the same time. /
5791	if(html_block_type == `6` \|\| html_block_type == `7`) {
5792	line->type = MD_LINE_BLANK;
5793	line->indent = `0`;
5794	break;
5795	}
5796	}
5797
5798	line->type = MD_LINE_HTML;
5799	n_parents = ctx->n_containers;
5800	break;
5801	}
5802	}
5803
5804	/ Check for blank line. /
5805	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5806	if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5807	line->type = MD_LINE_INDENTEDCODE;
5808	if(line->indent > ctx->code_indent_offset)
5809	line->indent -= ctx->code_indent_offset;
5810	else
5811	line->indent = `0`;
5812	ctx->last_line_has_list_loosening_effect = FALSE;
5813	} else {
5814	line->type = MD_LINE_BLANK;
5815	ctx->last_line_has_list_loosening_effect = (n_parents > `0` &&
5816	n_brothers + n_children == `0` &&
5817	ctx->containers[n_parents-`1`].ch != _T(`'>'`));
5818
5819	#if 1
5820	/ See https://github.com/mity/md4c/issues/6*
5821	*
5822	* This ugly checking tests we are in (yet empty) list item but not
5823	* its very first line (with the list item mark).
5824	*
5825	* If we are such blank line, then any following non-blank line
5826	* which would be part of this list item actually ends the list
5827	* because "a list item can begin with at most one blank line."
5828	*/
5829	if(n_parents > `0` && ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5830	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5831	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5832	{
5833	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5834	if(top_block->type == MD_BLOCK_LI)
5835	ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5836	}
5837	#endif
5838	}
5839	break;
5840	} else {
5841	#if 1
5842	/ This is 2nd half of the hack. If the flag is set (that is there*
5843	* were 2nd blank line at the start of the list item) and we would also
5844	* belonging to such list item, than interrupt the list. */
5845	ctx->last_line_has_list_loosening_effect = FALSE;
5846	if(ctx->last_list_item_starts_with_two_blank_lines) {
5847	if(n_parents > `0` && ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5848	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5849	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5850	{
5851	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5852	if(top_block->type == MD_BLOCK_LI)
5853	n_parents--;
5854	}
5855
5856	ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5857	}
5858	#endif
5859	}
5860
5861	/ Check whether we are Setext underline. /
5862	if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
5863	&& off < ctx->size && ISANYOF2(off, _T(`'='`), _T(`'-'`))
5864	&& (n_parents == ctx->n_containers))
5865	{
5866	unsigned level;
5867
5868	if(md_is_setext_underline(ctx, beg: off, p_end: &off, p_level: &level)) {
5869	line->type = MD_LINE_SETEXTUNDERLINE;
5870	line->data = level;
5871	break;
5872	}
5873	}
5874
5875	/ Check for thematic break line. /
5876	if(line->indent < ctx->code_indent_offset
5877	&& off < ctx->size && off >= hr_killer
5878	&& ISANYOF(off, _T("-_*")))
5879	{
5880	if(md_is_hr_line(ctx, beg: off, p_end: &off, p_killer: &hr_killer)) {
5881	line->type = MD_LINE_HR;
5882	break;
5883	}
5884	}
5885
5886	/ Check for "brother" container. I.e. whether we are another list item*
5887	* in already started list. */
5888	if(n_parents < ctx->n_containers && n_brothers + n_children == `0`) {
5889	OFF tmp;
5890
5891	if(md_is_container_mark(ctx, indent: line->indent, beg: off, p_end: &tmp, p_container: &container) &&
5892	md_is_container_compatible(pivot: &ctx->containers[n_parents], container: &container))
5893	{
5894	pivot_line = &md_dummy_blank_line;
5895
5896	off = tmp;
5897
5898	total_indent += container.contents_indent - container.mark_indent;
5899	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5900	total_indent += line->indent;
5901	line->beg = off;
5902
5903	/ Some of the following whitespace actually still belongs to the mark. /
5904	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5905	container.contents_indent++;
5906	} else if(line->indent <= ctx->code_indent_offset) {
5907	container.contents_indent += line->indent;
5908	line->indent = `0`;
5909	} else {
5910	container.contents_indent += `1`;
5911	line->indent--;
5912	}
5913
5914	ctx->containers[n_parents].mark_indent = container.mark_indent;
5915	ctx->containers[n_parents].contents_indent = container.contents_indent;
5916
5917	n_brothers++;
5918	continue;
5919	}
5920	}
5921
5922	/ Check for indented code.*
5923	* Note indented code block cannot interrupt a paragraph. */
5924	if(line->indent >= ctx->code_indent_offset &&
5925	(pivot_line->type == MD_LINE_BLANK \|\| pivot_line->type == MD_LINE_INDENTEDCODE))
5926	{
5927	line->type = MD_LINE_INDENTEDCODE;
5928	MD_ASSERT(line->indent >= ctx->code_indent_offset);
5929	line->indent -= ctx->code_indent_offset;
5930	line->data = `0`;
5931	break;
5932	}
5933
5934	/ Check for start of a new container block. /
5935	if(line->indent < ctx->code_indent_offset &&
5936	md_is_container_mark(ctx, indent: line->indent, beg: off, p_end: &off, p_container: &container))
5937	{
5938	if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5939	(off >= ctx->size \|\| ISNEWLINE(off)) && container.ch != _T(`'>'`))
5940	{
5941	/ Noop. List mark followed by a blank line cannot interrupt a paragraph. /
5942	} else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5943	ISANYOF2_(container.ch, _T(`'.'`), _T(`')'`)) && container.start != `1`)
5944	{
5945	/ Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. /
5946	} else {
5947	total_indent += container.contents_indent - container.mark_indent;
5948	line->indent = md_line_indentation(ctx, total_indent, beg: off, p_end: &off);
5949	total_indent += line->indent;
5950
5951	line->beg = off;
5952	line->data = container.ch;
5953
5954	/ Some of the following whitespace actually still belongs to the mark. /
5955	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5956	container.contents_indent++;
5957	} else if(line->indent <= ctx->code_indent_offset) {
5958	container.contents_indent += line->indent;
5959	line->indent = `0`;
5960	} else {
5961	container.contents_indent += `1`;
5962	line->indent--;
5963	}
5964
5965	if(n_brothers + n_children == `0`)
5966	pivot_line = &md_dummy_blank_line;
5967
5968	if(n_children == `0`)
5969	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5970
5971	n_children++;
5972	MD_CHECK(md_push_container(ctx, &container));
5973	continue;
5974	}
5975	}
5976
5977	/ Check whether we are table continuation. /
5978	if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
5979	line->type = MD_LINE_TABLE;
5980	break;
5981	}
5982
5983	/ Check for ATX header. /
5984	if(line->indent < ctx->code_indent_offset &&
5985	off < ctx->size && CH(off) == _T(`'#'`))
5986	{
5987	unsigned level;
5988
5989	if(md_is_atxheader_line(ctx, beg: off, p_beg: &line->beg, p_end: &off, p_level: &level)) {
5990	line->type = MD_LINE_ATXHEADER;
5991	line->data = level;
5992	break;
5993	}
5994	}
5995
5996	/ Check whether we are starting code fence. /
5997	if(off < ctx->size && ISANYOF2(off, _T('`'), _T(`'~'`))) {
5998	if(md_is_opening_code_fence(ctx, beg: off, p_end: &off)) {
5999	line->type = MD_LINE_FENCEDCODE;
6000	line->data = `1`;
6001	break;
6002	}
6003	}
6004
6005	/ Check for start of raw HTML block. /
6006	if(off < ctx->size && CH(off) == _T(`'<'`)
6007	&& !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
6008	{
6009	ctx->html_block_type = md_is_html_block_start_condition(ctx, beg: off);
6010
6011	/ HTML block type 7 cannot interrupt paragraph. /
6012	if(ctx->html_block_type == `7` && pivot_line->type == MD_LINE_TEXT)
6013	ctx->html_block_type = `0`;
6014
6015	if(ctx->html_block_type > `0`) {
6016	/ The line itself also may immediately close the block. /
6017	if(md_is_html_block_end_condition(ctx, beg: off, p_end: &off) == ctx->html_block_type) {
6018	/ Make sure this is the last line of the block. /
6019	ctx->html_block_type = `0`;
6020	}
6021
6022	line->type = MD_LINE_HTML;
6023	break;
6024	}
6025	}
6026
6027	/ Check for table underline. /
6028	if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT
6029	&& off < ctx->size && ISANYOF3(off, _T(`'\|'`), _T(`'-'`), _T(`':'`))
6030	&& n_parents == ctx->n_containers)
6031	{
6032	unsigned col_count;
6033
6034	if(ctx->current_block != NULL && ctx->current_block->n_lines == `1` &&
6035	md_is_table_underline(ctx, beg: off, p_end: &off, p_col_count: &col_count))
6036	{
6037	line->data = col_count;
6038	line->type = MD_LINE_TABLEUNDERLINE;
6039	break;
6040	}
6041	}
6042
6043	/ By default, we are normal text line. /
6044	line->type = MD_LINE_TEXT;
6045	if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == `0`) {
6046	/ Lazy continuation. /
6047	n_parents = ctx->n_containers;
6048	}
6049
6050	/ Check for task mark. /
6051	if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > `0` &&
6052	ISANYOF_(ctx->containers[ctx->n_containers-`1`].ch, _T("-+*.)")))
6053	{
6054	OFF tmp = off;
6055
6056	while(tmp < ctx->size && tmp < off + `3` && ISBLANK(tmp))
6057	tmp++;
6058	if(tmp + `2` < ctx->size && CH(tmp) == _T(`'['`) &&
6059	ISANYOF(tmp+`1`, _T("xX ")) && CH(tmp+`2`) == _T(`']'`) &&
6060	(tmp + `3` == ctx->size \|\| ISBLANK(tmp+`3`) \|\| ISNEWLINE(tmp+`3`)))
6061	{
6062	MD_CONTAINER* task_container = (n_children > `0` ? &ctx->containers[ctx->n_containers-`1`] : &container);
6063	task_container->is_task = TRUE;
6064	task_container->task_mark_off = tmp + `1`;
6065	off = tmp + `3`;
6066	while(ISWHITESPACE(off))
6067	off++;
6068	line->beg = off;
6069	}
6070	}
6071
6072	break;
6073	}
6074
6075	/ Scan for end of the line.*
6076	*
6077	* Note this is quite a bottleneck of the parsing as we here iterate almost
6078	* over compete document.
6079	*/
6080	#if defined __linux__ && !defined MD4C_USE_UTF16
6081	/ Recent glibc versions have superbly optimized strcspn(), even using*
6082	* vectorization if available. */
6083	if(ctx->doc_ends_with_newline && off < ctx->size) {
6084	while(TRUE) {
6085	off += (OFF) strcspn(STR(off), reject: "\r\n");
6086
6087	/ strcspn() can stop on zero terminator; but that can appear*
6088	* anywhere in the Markfown input... */
6089	if(CH(off) == _T(`'\0'`))
6090	off++;
6091	else
6092	break;
6093	}
6094	} else
6095	#endif
6096	{
6097	/ Optimization: Use some loop unrolling. /
6098	while(off + `3` < ctx->size && !ISNEWLINE(off+`0`) && !ISNEWLINE(off+`1`)
6099	&& !ISNEWLINE(off+`2`) && !ISNEWLINE(off+`3`))
6100	off += `4`;
6101	while(off < ctx->size && !ISNEWLINE(off))
6102	off++;
6103	}
6104
6105	/ Set end of the line. /
6106	line->end = off;
6107
6108	/ But for ATX header, we should exclude the optional trailing mark. /
6109	if(line->type == MD_LINE_ATXHEADER) {
6110	OFF tmp = line->end;
6111	while(tmp > line->beg && CH(tmp-`1`) == _T(`' '`))
6112	tmp--;
6113	while(tmp > line->beg && CH(tmp-`1`) == _T(`'#'`))
6114	tmp--;
6115	if(tmp == line->beg \|\| CH(tmp-`1`) == _T(`' '`) \|\| (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6116	line->end = tmp;
6117	}
6118
6119	/ Trim trailing spaces. /
6120	if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) {
6121	while(line->end > line->beg && CH(line->end-`1`) == _T(`' '`))
6122	line->end--;
6123	}
6124
6125	/ Eat also the new line. /
6126	if(off < ctx->size && CH(off) == _T(`'\r'`))
6127	off++;
6128	if(off < ctx->size && CH(off) == _T(`'\n'`))
6129	off++;
6130
6131	*p_end = off;
6132
6133	/ If we belong to a list after seeing a blank line, the list is loose. /
6134	if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > `0`) {
6135	MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - `1`];
6136	if(c->ch != _T(`'>'`)) {
6137	MD_BLOCK* block = (MD_BLOCK) (((char**)ctx->block_bytes) + c->block_byte_off);
6138	block->flags \|= MD_BLOCK_LOOSE_LIST;
6139	}
6140	}
6141
6142	/ Leave any containers we are not part of anymore. /
6143	if(n_children == `0` && n_parents + n_brothers < ctx->n_containers)
6144	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6145
6146	/ Enter any container we found a mark for. /
6147	if(n_brothers > `0`) {
6148	MD_ASSERT(n_brothers == `1`);
6149	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6150	ctx->containers[n_parents].task_mark_off,
6151	(ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : `0`),
6152	MD_BLOCK_CONTAINER_CLOSER));
6153	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6154	container.task_mark_off,
6155	(container.is_task ? CH(container.task_mark_off) : `0`),
6156	MD_BLOCK_CONTAINER_OPENER));
6157	ctx->containers[n_parents].is_task = container.is_task;
6158	ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6159	}
6160
6161	if(n_children > `0`)
6162	MD_CHECK(md_enter_child_containers(ctx, n_children));
6163
6164	abort:
6165	return ret;
6166	}
6167
6168	static int
6169	md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6170	{
6171	const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6172	int ret = `0`;
6173
6174	/ Blank line ends current leaf block. /
6175	if(line->type == MD_LINE_BLANK) {
6176	MD_CHECK(md_end_current_block(ctx));
6177	*p_pivot_line = &md_dummy_blank_line;
6178	return `0`;
6179	}
6180
6181	/ Some line types form block on their own. /
6182	if(line->type == MD_LINE_HR \|\| line->type == MD_LINE_ATXHEADER) {
6183	MD_CHECK(md_end_current_block(ctx));
6184
6185	/ Add our single-line block. /
6186	MD_CHECK(md_start_new_block(ctx, line));
6187	MD_CHECK(md_add_line_into_current_block(ctx, line));
6188	MD_CHECK(md_end_current_block(ctx));
6189	*p_pivot_line = &md_dummy_blank_line;
6190	return `0`;
6191	}
6192
6193	/ MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. /
6194	if(line->type == MD_LINE_SETEXTUNDERLINE) {
6195	MD_ASSERT(ctx->current_block != NULL);
6196	ctx->current_block->type = MD_BLOCK_H;
6197	ctx->current_block->data = line->data;
6198	ctx->current_block->flags \|= MD_BLOCK_SETEXT_HEADER;
6199	MD_CHECK(md_add_line_into_current_block(ctx, line));
6200	MD_CHECK(md_end_current_block(ctx));
6201	if(ctx->current_block == NULL) {
6202	*p_pivot_line = &md_dummy_blank_line;
6203	} else {
6204	/ This happens if we have consumed all the body as link ref. defs.*
6205	* and downgraded the underline into start of a new paragraph block. */
6206	line->type = MD_LINE_TEXT;
6207	*p_pivot_line = line;
6208	}
6209	return `0`;
6210	}
6211
6212	/ MD_LINE_TABLEUNDERLINE changes meaning of the current block. /
6213	if(line->type == MD_LINE_TABLEUNDERLINE) {
6214	MD_ASSERT(ctx->current_block != NULL);
6215	MD_ASSERT(ctx->current_block->n_lines == `1`);
6216	ctx->current_block->type = MD_BLOCK_TABLE;
6217	ctx->current_block->data = line->data;
6218	MD_ASSERT(pivot_line != &md_dummy_blank_line);
6219	((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6220	MD_CHECK(md_add_line_into_current_block(ctx, line));
6221	return `0`;
6222	}
6223
6224	/ The current block also ends if the line has different type. /
6225	if(line->type != pivot_line->type)
6226	MD_CHECK(md_end_current_block(ctx));
6227
6228	/ The current line may start a new block. /
6229	if(ctx->current_block == NULL) {
6230	MD_CHECK(md_start_new_block(ctx, line));
6231	*p_pivot_line = line;
6232	}
6233
6234	/ In all other cases the line is just a continuation of the current block. /
6235	MD_CHECK(md_add_line_into_current_block(ctx, line));
6236
6237	abort:
6238	return ret;
6239	}
6240
6241	static int
6242	md_process_doc(MD_CTX *ctx)
6243	{
6244	const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6245	MD_LINE_ANALYSIS line_buf[`2`];
6246	MD_LINE_ANALYSIS* line = &line_buf[`0`];
6247	OFF off = `0`;
6248	int ret = `0`;
6249
6250	MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
6251
6252	while(off < ctx->size) {
6253	if(line == pivot_line)
6254	line = (line == &line_buf[`0`] ? &line_buf[`1`] : &line_buf[`0`]);
6255
6256	MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6257	MD_CHECK(md_process_line(ctx, &pivot_line, line));
6258	}
6259
6260	md_end_current_block(ctx);
6261
6262	MD_CHECK(md_build_ref_def_hashtable(ctx));
6263
6264	/ Process all blocks. /
6265	MD_CHECK(md_leave_child_containers(ctx, `0`));
6266	MD_CHECK(md_process_all_blocks(ctx));
6267
6268	MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
6269
6270	abort:
6271
6272	#if 0
6273	/ Output some memory consumption statistics. /
6274	{
6275	char buffer[`256`];
6276	sprintf(buffer, "Alloced %u bytes for block buffer.",
6277	(unsigned)(ctx->alloc_block_bytes));
6278	MD_LOG(buffer);
6279
6280	sprintf(buffer, "Alloced %u bytes for containers buffer.",
6281	(unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6282	MD_LOG(buffer);
6283
6284	sprintf(buffer, "Alloced %u bytes for marks buffer.",
6285	(unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6286	MD_LOG(buffer);
6287
6288	sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6289	(unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6290	MD_LOG(buffer);
6291	}
6292	#endif
6293
6294	return ret;
6295	}
6296
6297
6298	/********************
6299	* Public API *
6300	********************/
6301
6302	int
6303	md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6304	{
6305	MD_CTX ctx;
6306	int i;
6307	int ret;
6308
6309	if(parser->abi_version != `0`) {
6310	if(parser->debug_log != NULL)
6311	parser->debug_log("Unsupported abi_version.", userdata);
6312	return -`1`;
6313	}
6314
6315	/ Setup context structure. /
6316	memset(s: &ctx, c: `0`, n: sizeof(MD_CTX));
6317	ctx.text = text;
6318	ctx.size = size;
6319	memcpy(dest: &ctx.parser, src: parser, n: sizeof(MD_PARSER));
6320	ctx.userdata = userdata;
6321	ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-`1`) : `4`;
6322	md_build_mark_char_map(ctx: &ctx);
6323	ctx.doc_ends_with_newline = (size > `0` && ISNEWLINE_(text[size-`1`]));
6324
6325	/ Reset all unresolved opener mark chains. /
6326	for(i = `0`; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
6327	ctx.mark_chains[i].head = -`1`;
6328	ctx.mark_chains[i].tail = -`1`;
6329	}
6330	ctx.unresolved_link_head = -`1`;
6331	ctx.unresolved_link_tail = -`1`;
6332
6333	/ All the work. /
6334	ret = md_process_doc(ctx: &ctx);
6335
6336	/ Clean-up. /
6337	md_free_ref_defs(ctx: &ctx);
6338	md_free_ref_def_hashtable(ctx: &ctx);
6339	free(ptr: ctx.buffer);
6340	free(ptr: ctx.marks);
6341	free(ptr: ctx.block_bytes);
6342	free(ptr: ctx.containers);
6343
6344	return ret;
6345	}
6346

source code of qtbase/src/3rdparty/md4c/md4c.c