1cmark(3) Library Functions Manual cmark(3)
2
3
4
6 cmark - CommonMark parsing, manipulating, and rendering
7
8
10 Simple Interface
11 char * cmark_markdown_to_html(const char *text, size_t len, int op‐
12 tions)
13
14
15 Convert text (assumed to be a UTF-8 encoded string with length len)
16 from CommonMark Markdown to HTML, returning a null-terminated,
17 UTF-8-encoded string. It is the caller's responsibility to free the re‐
18 turned buffer.
19
20
21 Node Structure
22 typedef enum {
23 /* Error status */
24 CMARK_NODE_NONE,
25
26 /* Block */
27 CMARK_NODE_DOCUMENT,
28 CMARK_NODE_BLOCK_QUOTE,
29 CMARK_NODE_LIST,
30 CMARK_NODE_ITEM,
31 CMARK_NODE_CODE_BLOCK,
32 CMARK_NODE_HTML_BLOCK,
33 CMARK_NODE_CUSTOM_BLOCK,
34 CMARK_NODE_PARAGRAPH,
35 CMARK_NODE_HEADING,
36 CMARK_NODE_THEMATIC_BREAK,
37
38 CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
39 CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
40
41 /* Inline */
42 CMARK_NODE_TEXT,
43 CMARK_NODE_SOFTBREAK,
44 CMARK_NODE_LINEBREAK,
45 CMARK_NODE_CODE,
46 CMARK_NODE_HTML_INLINE,
47 CMARK_NODE_CUSTOM_INLINE,
48 CMARK_NODE_EMPH,
49 CMARK_NODE_STRONG,
50 CMARK_NODE_LINK,
51 CMARK_NODE_IMAGE,
52
53 CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
54 CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE
55 } cmark_node_type;
56
57
58
59
60 typedef enum {
61 CMARK_NO_LIST,
62 CMARK_BULLET_LIST,
63 CMARK_ORDERED_LIST
64 } cmark_list_type;
65
66
67
68
69 typedef enum {
70 CMARK_NO_DELIM,
71 CMARK_PERIOD_DELIM,
72 CMARK_PAREN_DELIM
73 } cmark_delim_type;
74
75
76
77
78 Custom memory allocator support
79 typedef struct cmark_mem {
80 void *(*calloc)(size_t, size_t);
81 void *(*realloc)(void *, size_t);
82 void (*free)(void *);
83 } cmark_mem;
84
85
86 Defines the memory allocation functions to be used by CMark when pars‐
87 ing and allocating a document tree
88
89
90 cmark_mem * cmark_get_default_mem_allocator()
91
92
93 Returns a pointer to the default memory allocator.
94
95
96 Creating and Destroying Nodes
97 cmark_node * cmark_node_new(cmark_node_type type)
98
99
100 Creates a new node of type type. Note that the node may have other re‐
101 quired properties, which it is the caller's responsibility to assign.
102
103
104 cmark_node * cmark_node_new_with_mem(cmark_node_type type, cmark_mem
105 *mem)
106
107
108 Same as cmark_node_new, but explicitly listing the memory allocator
109 used to allocate the node. Note: be sure to use the same allocator for
110 every node in a tree, or bad things can happen.
111
112
113 void cmark_node_free(cmark_node *node)
114
115
116 Frees the memory allocated for a node and any children.
117
118
119 Tree Traversal
120 cmark_node * cmark_node_next(cmark_node *node)
121
122
123 Returns the next node in the sequence after node, or NULL if there is
124 none.
125
126
127 cmark_node * cmark_node_previous(cmark_node *node)
128
129
130 Returns the previous node in the sequence after node, or NULL if there
131 is none.
132
133
134 cmark_node * cmark_node_parent(cmark_node *node)
135
136
137 Returns the parent of node, or NULL if there is none.
138
139
140 cmark_node * cmark_node_first_child(cmark_node *node)
141
142
143 Returns the first child of node, or NULL if node has no children.
144
145
146 cmark_node * cmark_node_last_child(cmark_node *node)
147
148
149 Returns the last child of node, or NULL if node has no children.
150
151
152 Iterator
153 An iterator will walk through a tree of nodes, starting from a root
154 node, returning one node at a time, together with information about
155 whether the node is being entered or exited. The iterator will first
156 descend to a child node, if there is one. When there is no child, the
157 iterator will go to the next sibling. When there is no next sibling,
158 the iterator will return to the parent (but with a cmark_event_type of
159 CMARK_EVENT_EXIT). The iterator will return CMARK_EVENT_DONE when it
160 reaches the root node again. One natural application is an HTML ren‐
161 derer, where an ENTER event outputs an open tag and an EXIT event out‐
162 puts a close tag. An iterator might also be used to transform an AST in
163 some systematic way, for example, turning all level-3 headings into
164 regular paragraphs.
165
166 void
167 usage_example(cmark_node *root) {
168 cmark_event_type ev_type;
169 cmark_iter *iter = cmark_iter_new(root);
170
171 while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
172 cmark_node *cur = cmark_iter_get_node(iter);
173 // Do something with `cur` and `ev_type`
174 }
175
176 cmark_iter_free(iter);
177 }
178
179 Iterators will never return EXIT events for leaf nodes, which are nodes
180 of type:
181
182 • CMARK_NODE_HTML_BLOCK
183
184 • CMARK_NODE_THEMATIC_BREAK
185
186 • CMARK_NODE_CODE_BLOCK
187
188 • CMARK_NODE_TEXT
189
190 • CMARK_NODE_SOFTBREAK
191
192 • CMARK_NODE_LINEBREAK
193
194 • CMARK_NODE_CODE
195
196 • CMARK_NODE_HTML_INLINE
197
198 Nodes must only be modified after an EXIT event, or an ENTER event for
199 leaf nodes.
200
201
202 typedef enum {
203 CMARK_EVENT_NONE,
204 CMARK_EVENT_DONE,
205 CMARK_EVENT_ENTER,
206 CMARK_EVENT_EXIT
207 } cmark_event_type;
208
209
210
211
212 cmark_iter * cmark_iter_new(cmark_node *root)
213
214
215 Creates a new iterator starting at root. The current node and event
216 type are undefined until cmark_iter_next is called for the first time.
217 The memory allocated for the iterator should be released using
218 cmark_iter_free when it is no longer needed.
219
220
221 void cmark_iter_free(cmark_iter *iter)
222
223
224 Frees the memory allocated for an iterator.
225
226
227 cmark_event_type cmark_iter_next(cmark_iter *iter)
228
229
230 Advances to the next node and returns the event type (CMARK_EVENT_EN‐
231 TER, CMARK_EVENT_EXIT or CMARK_EVENT_DONE).
232
233
234 cmark_node * cmark_iter_get_node(cmark_iter *iter)
235
236
237 Returns the current node.
238
239
240 cmark_event_type cmark_iter_get_event_type(cmark_iter *iter)
241
242
243 Returns the current event type.
244
245
246 cmark_node * cmark_iter_get_root(cmark_iter *iter)
247
248
249 Returns the root node.
250
251
252 void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
253 cmark_event_type event_type)
254
255
256 Resets the iterator so that the current node is current and the event
257 type is event_type. The new current node must be a descendant of the
258 root node or the root node itself.
259
260
261 Accessors
262 void * cmark_node_get_user_data(cmark_node *node)
263
264
265 Returns the user data of node.
266
267
268 int cmark_node_set_user_data(cmark_node *node, void *user_data)
269
270
271 Sets arbitrary user data for node. Returns 1 on success, 0 on failure.
272
273
274 cmark_node_type cmark_node_get_type(cmark_node *node)
275
276
277 Returns the type of node, or CMARK_NODE_NONE on error.
278
279
280 const char * cmark_node_get_type_string(cmark_node *node)
281
282
283 Like cmark_node_get_type, but returns a string representation of the
284 type, or "<unknown>".
285
286
287 const char * cmark_node_get_literal(cmark_node *node)
288
289
290 Returns the string contents of node, or an empty string if none is set.
291 Returns NULL if called on a node that does not have string content.
292
293
294 int cmark_node_set_literal(cmark_node *node, const char *content)
295
296
297 Sets the string contents of node. Returns 1 on success, 0 on failure.
298
299
300 int cmark_node_get_heading_level(cmark_node *node)
301
302
303 Returns the heading level of node, or 0 if node is not a heading.
304
305
306 int cmark_node_set_heading_level(cmark_node *node, int level)
307
308
309 Sets the heading level of node, returning 1 on success and 0 on error.
310
311
312 cmark_list_type cmark_node_get_list_type(cmark_node *node)
313
314
315 Returns the list type of node, or CMARK_NO_LIST if node is not a list.
316
317
318 int cmark_node_set_list_type(cmark_node *node, cmark_list_type type)
319
320
321 Sets the list type of node, returning 1 on success and 0 on error.
322
323
324 cmark_delim_type cmark_node_get_list_delim(cmark_node *node)
325
326
327 Returns the list delimiter type of node, or CMARK_NO_DELIM if node is
328 not a list.
329
330
331 int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim)
332
333
334 Sets the list delimiter type of node, returning 1 on success and 0 on
335 error.
336
337
338 int cmark_node_get_list_start(cmark_node *node)
339
340
341 Returns starting number of node, if it is an ordered list, otherwise 0.
342
343
344 int cmark_node_set_list_start(cmark_node *node, int start)
345
346
347 Sets starting number of node, if it is an ordered list. Returns 1 on
348 success, 0 on failure.
349
350
351 int cmark_node_get_list_tight(cmark_node *node)
352
353
354 Returns 1 if node is a tight list, 0 otherwise.
355
356
357 int cmark_node_set_list_tight(cmark_node *node, int tight)
358
359
360 Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
361
362
363 const char * cmark_node_get_fence_info(cmark_node *node)
364
365
366 Returns the info string from a fenced code block.
367
368
369 int cmark_node_set_fence_info(cmark_node *node, const char *info)
370
371
372 Sets the info string in a fenced code block, returning 1 on success and
373 0 on failure.
374
375
376 const char * cmark_node_get_url(cmark_node *node)
377
378
379 Returns the URL of a link or image node, or an empty string if no URL
380 is set. Returns NULL if called on a node that is not a link or image.
381
382
383 int cmark_node_set_url(cmark_node *node, const char *url)
384
385
386 Sets the URL of a link or image node. Returns 1 on success, 0 on fail‐
387 ure.
388
389
390 const char * cmark_node_get_title(cmark_node *node)
391
392
393 Returns the title of a link or image node, or an empty string if no ti‐
394 tle is set. Returns NULL if called on a node that is not a link or im‐
395 age.
396
397
398 int cmark_node_set_title(cmark_node *node, const char *title)
399
400
401 Sets the title of a link or image node. Returns 1 on success, 0 on
402 failure.
403
404
405 const char * cmark_node_get_on_enter(cmark_node *node)
406
407
408 Returns the literal "on enter" text for a custom node, or an empty
409 string if no on_enter is set. Returns NULL if called on a non-custom
410 node.
411
412
413 int cmark_node_set_on_enter(cmark_node *node, const char *on_enter)
414
415
416 Sets the literal text to render "on enter" for a custom node. Any
417 children of the node will be rendered after this text. Returns 1 on
418 success 0 on failure.
419
420
421 const char * cmark_node_get_on_exit(cmark_node *node)
422
423
424 Returns the literal "on exit" text for a custom node, or an empty
425 string if no on_exit is set. Returns NULL if called on a non-custom
426 node.
427
428
429 int cmark_node_set_on_exit(cmark_node *node, const char *on_exit)
430
431
432 Sets the literal text to render "on exit" for a custom node. Any chil‐
433 dren of the node will be rendered before this text. Returns 1 on suc‐
434 cess 0 on failure.
435
436
437 int cmark_node_get_start_line(cmark_node *node)
438
439
440 Returns the line on which node begins.
441
442
443 int cmark_node_get_start_column(cmark_node *node)
444
445
446 Returns the column at which node begins.
447
448
449 int cmark_node_get_end_line(cmark_node *node)
450
451
452 Returns the line on which node ends.
453
454
455 int cmark_node_get_end_column(cmark_node *node)
456
457
458 Returns the column at which node ends.
459
460
461 Tree Manipulation
462 void cmark_node_unlink(cmark_node *node)
463
464
465 Unlinks a node, removing it from the tree, but not freeing its memory.
466 (Use cmark_node_free for that.)
467
468
469 int cmark_node_insert_before(cmark_node *node, cmark_node *sibling)
470
471
472 Inserts sibling before node. Returns 1 on success, 0 on failure.
473
474
475 int cmark_node_insert_after(cmark_node *node, cmark_node *sibling)
476
477
478 Inserts sibling after node. Returns 1 on success, 0 on failure.
479
480
481 int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode)
482
483
484 Replaces oldnode with newnode and unlinks oldnode (but does not free
485 its memory). Returns 1 on success, 0 on failure.
486
487
488 int cmark_node_prepend_child(cmark_node *node, cmark_node *child)
489
490
491 Adds child to the beginning of the children of node. Returns 1 on suc‐
492 cess, 0 on failure.
493
494
495 int cmark_node_append_child(cmark_node *node, cmark_node *child)
496
497
498 Adds child to the end of the children of node. Returns 1 on success, 0
499 on failure.
500
501
502 void cmark_consolidate_text_nodes(cmark_node *root)
503
504
505 Consolidates adjacent text nodes.
506
507
508 Parsing
509 Simple interface:
510
511 cmark_node *document = cmark_parse_document("Hello *world*", 13,
512 CMARK_OPT_DEFAULT);
513
514 Streaming interface:
515
516 cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
517 FILE *fp = fopen("myfile.md", "rb");
518 while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
519 cmark_parser_feed(parser, buffer, bytes);
520 if (bytes < sizeof(buffer)) {
521 break;
522 }
523 }
524 document = cmark_parser_finish(parser);
525 cmark_parser_free(parser);
526
527
528 cmark_parser * cmark_parser_new(int options)
529
530
531 Creates a new parser object.
532
533
534 cmark_parser * cmark_parser_new_with_mem(int options, cmark_mem *mem)
535
536
537 Creates a new parser object with the given memory allocator
538
539
540 void cmark_parser_free(cmark_parser *parser)
541
542
543 Frees memory allocated for a parser object.
544
545
546 void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t
547 len)
548
549
550 Feeds a string of length len to parser.
551
552
553 cmark_node * cmark_parser_finish(cmark_parser *parser)
554
555
556 Finish parsing and return a pointer to a tree of nodes.
557
558
559 cmark_node * cmark_parse_document(const char *buffer, size_t len, int
560 options)
561
562
563 Parse a CommonMark document in buffer of length len. Returns a pointer
564 to a tree of nodes. The memory allocated for the node tree should be
565 released using cmark_node_free when it is no longer needed.
566
567
568 cmark_node * cmark_parse_file(FILE *f, int options)
569
570
571 Parse a CommonMark document in file f, returning a pointer to a tree of
572 nodes. The memory allocated for the node tree should be released using
573 cmark_node_free when it is no longer needed.
574
575
576 Rendering
577 char * cmark_render_xml(cmark_node *root, int options)
578
579
580 Render a node tree as XML. It is the caller's responsibility to free
581 the returned buffer.
582
583
584 char * cmark_render_html(cmark_node *root, int options)
585
586
587 Render a node tree as an HTML fragment. It is up to the user to add an
588 appropriate header and footer. It is the caller's responsibility to
589 free the returned buffer.
590
591
592 char * cmark_render_man(cmark_node *root, int options, int width)
593
594
595 Render a node tree as a groff man page, without the header. It is the
596 caller's responsibility to free the returned buffer.
597
598
599 char * cmark_render_commonmark(cmark_node *root, int options, int
600 width)
601
602
603 Render a node tree as a commonmark document. It is the caller's respon‐
604 sibility to free the returned buffer.
605
606
607 char * cmark_render_latex(cmark_node *root, int options, int width)
608
609
610 Render a node tree as a LaTeX document. It is the caller's responsibil‐
611 ity to free the returned buffer.
612
613
614 Options
615 #define CMARK_OPT_DEFAULT 0
616
617
618 Default options.
619
620
621 Options affecting rendering
622 #define CMARK_OPT_SOURCEPOS (1 << 1)
623
624
625 Include a data-sourcepos attribute on all block elements.
626
627
628 #define CMARK_OPT_HARDBREAKS (1 << 2)
629
630
631 Render softbreak elements as hard line breaks.
632
633
634 #define CMARK_OPT_SAFE (1 << 3)
635
636
637 CMARK_OPT_SAFE is defined here for API compatibility, but it no longer
638 has any effect. "Safe" mode is now the default: set CMARK_OPT_UNSAFE to
639 disable it.
640
641
642 #define CMARK_OPT_UNSAFE (1 << 17)
643
644
645 Render raw HTML and unsafe links (javascript:, vbscript:, file:, and
646 data:, except for image/png, image/gif, image/jpeg, or image/webp mime
647 types). By default, raw HTML is replaced by a placeholder HTML comment.
648 Unsafe links are replaced by empty strings.
649
650
651 #define CMARK_OPT_NOBREAKS (1 << 4)
652
653
654 Render softbreak elements as spaces.
655
656
657 Options affecting parsing
658 #define CMARK_OPT_NORMALIZE (1 << 8)
659
660
661 Legacy option (no effect).
662
663
664 #define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
665
666
667 Validate UTF-8 in the input before parsing, replacing illegal sequences
668 with the replacement character U+FFFD.
669
670
671 #define CMARK_OPT_SMART (1 << 10)
672
673
674 Convert straight quotes to curly, --- to em dashes, -- to en dashes.
675
676
677 Version information
678 int cmark_version(void)
679
680
681 The library version as integer for runtime checks. Also available as
682 macro CMARK_VERSION for compile time checks.
683
684 • Bits 16-23 contain the major version.
685
686 • Bits 8-15 contain the minor version.
687
688 • Bits 0-7 contain the patchlevel.
689
690 In hexadecimal format, the number 0x010203 represents version 1.2.3.
691
692
693 const char * cmark_version_string(void)
694
695
696 The library version string for runtime checks. Also available as macro
697 CMARK_VERSION_STRING for compile time checks.
698
699
701 John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
702
703
704
705
706cmark 0.30.2 September 24, 2021 cmark(3)