1cmark(3) Library Functions Manual cmark(3)
2
3
4
6 cmark - CommonMark parsing, manipulating, and rendering
7
8
10 Simple Interface
11 char * cmark_markdown_to_html(const char *text, size_t len, int
12 options)
13
14
15 Convert text (assumed to be a UTF-8 encoded string with length len)
16 from CommonMark Markdown to HTML, returning a null-terminated,
17 UTF-8-encoded string. It is the caller's responsibility to free the
18 returned buffer.
19
20
21 Node Structure
22 typedef enum {
23 /* Error status */
24 CMARK_NODE_NONE,
25
26 /* Block */
27 CMARK_NODE_DOCUMENT,
28 CMARK_NODE_BLOCK_QUOTE,
29 CMARK_NODE_LIST,
30 CMARK_NODE_ITEM,
31 CMARK_NODE_CODE_BLOCK,
32 CMARK_NODE_HTML_BLOCK,
33 CMARK_NODE_CUSTOM_BLOCK,
34 CMARK_NODE_PARAGRAPH,
35 CMARK_NODE_HEADING,
36 CMARK_NODE_THEMATIC_BREAK,
37
38 CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
39 CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
40
41 /* Inline */
42 CMARK_NODE_TEXT,
43 CMARK_NODE_SOFTBREAK,
44 CMARK_NODE_LINEBREAK,
45 CMARK_NODE_CODE,
46 CMARK_NODE_HTML_INLINE,
47 CMARK_NODE_CUSTOM_INLINE,
48 CMARK_NODE_EMPH,
49 CMARK_NODE_STRONG,
50 CMARK_NODE_LINK,
51 CMARK_NODE_IMAGE,
52
53 CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
54 CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
55 } cmark_node_type;
56
57
58
59
60 typedef enum {
61 CMARK_NO_LIST,
62 CMARK_BULLET_LIST,
63 CMARK_ORDERED_LIST
64 } cmark_list_type;
65
66
67
68
69 typedef enum {
70 CMARK_NO_DELIM,
71 CMARK_PERIOD_DELIM,
72 CMARK_PAREN_DELIM
73 } cmark_delim_type;
74
75
76
77
78 Custom memory allocator support
79 typedef struct cmark_mem {
80 void *(*calloc)(size_t, size_t);
81 void *(*realloc)(void *, size_t);
82 void (*free)(void *);
83 } cmark_mem;
84
85
86 Defines the memory allocation functions to be used by CMark when pars‐
87 ing and allocating a document tree
88
89
90 Creating and Destroying Nodes
91 cmark_node * cmark_node_new(cmark_node_type type)
92
93
94 Creates a new node of type type. Note that the node may have other
95 required properties, which it is the caller's responsibility to assign.
96
97
98 cmark_node * cmark_node_new_with_mem(cmark_node_type type, cmark_mem
99 *mem)
100
101
102 Same as cmark_node_new, but explicitly listing the memory allocator
103 used to allocate the node. Note: be sure to use the same allocator for
104 every node in a tree, or bad things can happen.
105
106
107 void cmark_node_free(cmark_node *node)
108
109
110 Frees the memory allocated for a node and any children.
111
112
113 Tree Traversal
114 cmark_node * cmark_node_next(cmark_node *node)
115
116
117 Returns the next node in the sequence after node, or NULL if there is
118 none.
119
120
121 cmark_node * cmark_node_previous(cmark_node *node)
122
123
124 Returns the previous node in the sequence after node, or NULL if there
125 is none.
126
127
128 cmark_node * cmark_node_parent(cmark_node *node)
129
130
131 Returns the parent of node, or NULL if there is none.
132
133
134 cmark_node * cmark_node_first_child(cmark_node *node)
135
136
137 Returns the first child of node, or NULL if node has no children.
138
139
140 cmark_node * cmark_node_last_child(cmark_node *node)
141
142
143 Returns the last child of node, or NULL if node has no children.
144
145
146 Iterator
147 An iterator will walk through a tree of nodes, starting from a root
148 node, returning one node at a time, together with information about
149 whether the node is being entered or exited. The iterator will first
150 descend to a child node, if there is one. When there is no child, the
151 iterator will go to the next sibling. When there is no next sibling,
152 the iterator will return to the parent (but with a cmark_event_type of
153 CMARK_EVENT_EXIT). The iterator will return CMARK_EVENT_DONE when it
154 reaches the root node again. One natural application is an HTML ren‐
155 derer, where an ENTER event outputs an open tag and an EXIT event out‐
156 puts a close tag. An iterator might also be used to transform an AST in
157 some systematic way, for example, turning all level-3 headings into
158 regular paragraphs.
159
160 void
161 usage_example(cmark_node *root) {
162 cmark_event_type ev_type;
163 cmark_iter *iter = cmark_iter_new(root);
164
165 while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
166 cmark_node *cur = cmark_iter_get_node(iter);
167 // Do something with `cur` and `ev_type`
168 }
169
170 cmark_iter_free(iter);
171 }
172
173 Iterators will never return EXIT events for leaf nodes, which are nodes
174 of type:
175
176 · CMARK_NODE_HTML_BLOCK
177
178 · CMARK_NODE_THEMATIC_BREAK
179
180 · CMARK_NODE_CODE_BLOCK
181
182 · CMARK_NODE_TEXT
183
184 · CMARK_NODE_SOFTBREAK
185
186 · CMARK_NODE_LINEBREAK
187
188 · CMARK_NODE_CODE
189
190 · CMARK_NODE_HTML_INLINE
191
192 Nodes must only be modified after an EXIT event, or an ENTER event for
193 leaf nodes.
194
195
196 typedef enum {
197 CMARK_EVENT_NONE,
198 CMARK_EVENT_DONE,
199 CMARK_EVENT_ENTER,
200 CMARK_EVENT_EXIT
201 } cmark_event_type;
202
203
204
205
206 cmark_iter * cmark_iter_new(cmark_node *root)
207
208
209 Creates a new iterator starting at root. The current node and event
210 type are undefined until cmark_iter_next is called for the first time.
211 The memory allocated for the iterator should be released using
212 cmark_iter_free when it is no longer needed.
213
214
215 void cmark_iter_free(cmark_iter *iter)
216
217
218 Frees the memory allocated for an iterator.
219
220
221 cmark_event_type cmark_iter_next(cmark_iter *iter)
222
223
224 Advances to the next node and returns the event type
225 (CMARK_EVENT_ENTER, CMARK_EVENT_EXIT or CMARK_EVENT_DONE).
226
227
228 cmark_node * cmark_iter_get_node(cmark_iter *iter)
229
230
231 Returns the current node.
232
233
234 cmark_event_type cmark_iter_get_event_type(cmark_iter *iter)
235
236
237 Returns the current event type.
238
239
240 cmark_node * cmark_iter_get_root(cmark_iter *iter)
241
242
243 Returns the root node.
244
245
246 void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
247 cmark_event_type event_type)
248
249
250 Resets the iterator so that the current node is current and the event
251 type is event_type. The new current node must be a descendant of the
252 root node or the root node itself.
253
254
255 Accessors
256 void * cmark_node_get_user_data(cmark_node *node)
257
258
259 Returns the user data of node.
260
261
262 int cmark_node_set_user_data(cmark_node *node, void *user_data)
263
264
265 Sets arbitrary user data for node. Returns 1 on success, 0 on failure.
266
267
268 cmark_node_type cmark_node_get_type(cmark_node *node)
269
270
271 Returns the type of node, or CMARK_NODE_NONE on error.
272
273
274 const char * cmark_node_get_type_string(cmark_node *node)
275
276
277 Like cmark_node_get_type, but returns a string representation of the
278 type, or "<unknown>".
279
280
281 const char * cmark_node_get_literal(cmark_node *node)
282
283
284 Returns the string contents of node, or an empty string if none is set.
285 Returns NULL if called on a node that does not have string content.
286
287
288 int cmark_node_set_literal(cmark_node *node, const char *content)
289
290
291 Sets the string contents of node. Returns 1 on success, 0 on failure.
292
293
294 int cmark_node_get_heading_level(cmark_node *node)
295
296
297 Returns the heading level of node, or 0 if node is not a heading.
298
299
300 int cmark_node_set_heading_level(cmark_node *node, int level)
301
302
303 Sets the heading level of node, returning 1 on success and 0 on error.
304
305
306 cmark_list_type cmark_node_get_list_type(cmark_node *node)
307
308
309 Returns the list type of node, or CMARK_NO_LIST if node is not a list.
310
311
312 int cmark_node_set_list_type(cmark_node *node, cmark_list_type type)
313
314
315 Sets the list type of node, returning 1 on success and 0 on error.
316
317
318 cmark_delim_type cmark_node_get_list_delim(cmark_node *node)
319
320
321 Returns the list delimiter type of node, or CMARK_NO_DELIM if node is
322 not a list.
323
324
325 int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim)
326
327
328 Sets the list delimiter type of node, returning 1 on success and 0 on
329 error.
330
331
332 int cmark_node_get_list_start(cmark_node *node)
333
334
335 Returns starting number of node, if it is an ordered list, otherwise 0.
336
337
338 int cmark_node_set_list_start(cmark_node *node, int start)
339
340
341 Sets starting number of node, if it is an ordered list. Returns 1 on
342 success, 0 on failure.
343
344
345 int cmark_node_get_list_tight(cmark_node *node)
346
347
348 Returns 1 if node is a tight list, 0 otherwise.
349
350
351 int cmark_node_set_list_tight(cmark_node *node, int tight)
352
353
354 Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
355
356
357 const char * cmark_node_get_fence_info(cmark_node *node)
358
359
360 Returns the info string from a fenced code block.
361
362
363 int cmark_node_set_fence_info(cmark_node *node, const char *info)
364
365
366 Sets the info string in a fenced code block, returning 1 on success and
367 0 on failure.
368
369
370 const char * cmark_node_get_url(cmark_node *node)
371
372
373 Returns the URL of a link or image node, or an empty string if no URL
374 is set. Returns NULL if called on a node that is not a link or image.
375
376
377 int cmark_node_set_url(cmark_node *node, const char *url)
378
379
380 Sets the URL of a link or image node. Returns 1 on success, 0 on fail‐
381 ure.
382
383
384 const char * cmark_node_get_title(cmark_node *node)
385
386
387 Returns the title of a link or image node, or an empty string if no
388 title is set. Returns NULL if called on a node that is not a link or
389 image.
390
391
392 int cmark_node_set_title(cmark_node *node, const char *title)
393
394
395 Sets the title of a link or image node. Returns 1 on success, 0 on
396 failure.
397
398
399 const char * cmark_node_get_on_enter(cmark_node *node)
400
401
402 Returns the literal "on enter" text for a custom node, or an empty
403 string if no on_enter is set. Returns NULL if called on a non-custom
404 node.
405
406
407 int cmark_node_set_on_enter(cmark_node *node, const char *on_enter)
408
409
410 Sets the literal text to render "on enter" for a custom node. Any
411 children of the node will be rendered after this text. Returns 1 on
412 success 0 on failure.
413
414
415 const char * cmark_node_get_on_exit(cmark_node *node)
416
417
418 Returns the literal "on exit" text for a custom node, or an empty
419 string if no on_exit is set. Returns NULL if called on a non-custom
420 node.
421
422
423 int cmark_node_set_on_exit(cmark_node *node, const char *on_exit)
424
425
426 Sets the literal text to render "on exit" for a custom node. Any chil‐
427 dren of the node will be rendered before this text. Returns 1 on suc‐
428 cess 0 on failure.
429
430
431 int cmark_node_get_start_line(cmark_node *node)
432
433
434 Returns the line on which node begins.
435
436
437 int cmark_node_get_start_column(cmark_node *node)
438
439
440 Returns the column at which node begins.
441
442
443 int cmark_node_get_end_line(cmark_node *node)
444
445
446 Returns the line on which node ends.
447
448
449 int cmark_node_get_end_column(cmark_node *node)
450
451
452 Returns the column at which node ends.
453
454
455 Tree Manipulation
456 void cmark_node_unlink(cmark_node *node)
457
458
459 Unlinks a node, removing it from the tree, but not freeing its memory.
460 (Use cmark_node_free for that.)
461
462
463 int cmark_node_insert_before(cmark_node *node, cmark_node *sibling)
464
465
466 Inserts sibling before node. Returns 1 on success, 0 on failure.
467
468
469 int cmark_node_insert_after(cmark_node *node, cmark_node *sibling)
470
471
472 Inserts sibling after node. Returns 1 on success, 0 on failure.
473
474
475 int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode)
476
477
478 Replaces oldnode with newnode and unlinks oldnode (but does not free
479 its memory). Returns 1 on success, 0 on failure.
480
481
482 int cmark_node_prepend_child(cmark_node *node, cmark_node *child)
483
484
485 Adds child to the beginning of the children of node. Returns 1 on suc‐
486 cess, 0 on failure.
487
488
489 int cmark_node_append_child(cmark_node *node, cmark_node *child)
490
491
492 Adds child to the end of the children of node. Returns 1 on success, 0
493 on failure.
494
495
496 void cmark_consolidate_text_nodes(cmark_node *root)
497
498
499 Consolidates adjacent text nodes.
500
501
502 Parsing
503 Simple interface:
504
505 cmark_node *document = cmark_parse_document("Hello *world*", 13,
506 CMARK_OPT_DEFAULT);
507
508 Streaming interface:
509
510 cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
511 FILE *fp = fopen("myfile.md", "rb");
512 while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
513 cmark_parser_feed(parser, buffer, bytes);
514 if (bytes < sizeof(buffer)) {
515 break;
516 }
517 }
518 document = cmark_parser_finish(parser);
519 cmark_parser_free(parser);
520
521
522 cmark_parser * cmark_parser_new(int options)
523
524
525 Creates a new parser object.
526
527
528 cmark_parser * cmark_parser_new_with_mem(int options, cmark_mem *mem)
529
530
531 Creates a new parser object with the given memory allocator
532
533
534 void cmark_parser_free(cmark_parser *parser)
535
536
537 Frees memory allocated for a parser object.
538
539
540 void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t
541 len)
542
543
544 Feeds a string of length len to parser.
545
546
547 cmark_node * cmark_parser_finish(cmark_parser *parser)
548
549
550 Finish parsing and return a pointer to a tree of nodes.
551
552
553 cmark_node * cmark_parse_document(const char *buffer, size_t len, int
554 options)
555
556
557 Parse a CommonMark document in buffer of length len. Returns a pointer
558 to a tree of nodes. The memory allocated for the node tree should be
559 released using cmark_node_free when it is no longer needed.
560
561
562 cmark_node * cmark_parse_file(FILE *f, int options)
563
564
565 Parse a CommonMark document in file f, returning a pointer to a tree of
566 nodes. The memory allocated for the node tree should be released using
567 cmark_node_free when it is no longer needed.
568
569
570 Rendering
571 char * cmark_render_xml(cmark_node *root, int options)
572
573
574 Render a node tree as XML. It is the caller's responsibility to free
575 the returned buffer.
576
577
578 char * cmark_render_html(cmark_node *root, int options)
579
580
581 Render a node tree as an HTML fragment. It is up to the user to add an
582 appropriate header and footer. It is the caller's responsibility to
583 free the returned buffer.
584
585
586 char * cmark_render_man(cmark_node *root, int options, int width)
587
588
589 Render a node tree as a groff man page, without the header. It is the
590 caller's responsibility to free the returned buffer.
591
592
593 char * cmark_render_commonmark(cmark_node *root, int options, int
594 width)
595
596
597 Render a node tree as a commonmark document. It is the caller's respon‐
598 sibility to free the returned buffer.
599
600
601 char * cmark_render_latex(cmark_node *root, int options, int width)
602
603
604 Render a node tree as a LaTeX document. It is the caller's responsibil‐
605 ity to free the returned buffer.
606
607
608 Options
609 #define CMARK_OPT_DEFAULT 0
610
611
612 Default options.
613
614
615 Options affecting rendering
616 #define CMARK_OPT_SOURCEPOS (1 << 1)
617
618
619 Include a data-sourcepos attribute on all block elements.
620
621
622 #define CMARK_OPT_HARDBREAKS (1 << 2)
623
624
625 Render softbreak elements as hard line breaks.
626
627
628 #define CMARK_OPT_SAFE (1 << 3)
629
630
631 CMARK_OPT_SAFE is defined here for API compatibility, but it no longer
632 has any effect. "Safe" mode is now the default: set CMARK_OPT_UNSAFE to
633 disable it.
634
635
636 #define CMARK_OPT_UNSAFE (1 << 17)
637
638
639 Render raw HTML and unsafe links (javascript:, vbscript:, file:, and
640 data:, except for image/png, image/gif, image/jpeg, or image/webp mime
641 types). By default, raw HTML is replaced by a placeholder HTML comment.
642 Unsafe links are replaced by empty strings.
643
644
645 #define CMARK_OPT_NOBREAKS (1 << 4)
646
647
648 Render softbreak elements as spaces.
649
650
651 Options affecting parsing
652 #define CMARK_OPT_NORMALIZE (1 << 8)
653
654
655 Legacy option (no effect).
656
657
658 #define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
659
660
661 Validate UTF-8 in the input before parsing, replacing illegal sequences
662 with the replacement character U+FFFD.
663
664
665 #define CMARK_OPT_SMART (1 << 10)
666
667
668 Convert straight quotes to curly, --- to em dashes, -- to en dashes.
669
670
671 Version information
672 int cmark_version(void)
673
674
675 The library version as integer for runtime checks. Also available as
676 macro CMARK_VERSION for compile time checks.
677
678 · Bits 16-23 contain the major version.
679
680 · Bits 8-15 contain the minor version.
681
682 · Bits 0-7 contain the patchlevel.
683
684 In hexadecimal format, the number 0x010203 represents version 1.2.3.
685
686
687 const char * cmark_version_string(void)
688
689
690 The library version string for runtime checks. Also available as macro
691 CMARK_VERSION_STRING for compile time checks.
692
693
695 John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
696
697
698
699
700LOCAL March 19, 2019 cmark(3)