Skip to content

Commit 34997d4

Browse files
committed
Fix list tightness
- Set the end position precisely - Check list tightness by comparing line numbers - Remove `LAST_LINE_BLANK` flag See also commonmark/commonmark.js#269 . Classification of end positions: - The end of the current line: - Thematic breaks - ATX headings - Setext headings - Fenced code blocks closed explicitly - HTML blocks (`pre`, comments, and others) - The end of the previous line: - Fenced code blocks closed by the end of the parent or EOF - HTML blocks (`div` and others) - HTML blocks closed by the end of the parent or EOF - Paragraphs - Block quotes - Empty list items - The end position of the last child: - Non-empty list items - Lists - The end position of the last non-blank line: - Indented code blocks The first two cases are handed by `finalize` and `closed_explicitly` flag. Non empty list items and lists are handled in `switch` statements in `finalize`. Indented code blocks are handled by setting the end position every time non-blank line is added to the block.
1 parent fb9375b commit 34997d4

File tree

2 files changed

+65
-75
lines changed

2 files changed

+65
-75
lines changed

src/blocks.c

Lines changed: 65 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,10 @@
3232

3333
#define peek_at(i, n) (i)->data[n]
3434

35-
static bool S_last_line_blank(const cmark_node *node) {
36-
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
37-
}
38-
3935
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
4036
return (cmark_node_type)node->type;
4137
}
4238

43-
static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
44-
if (is_blank)
45-
node->flags |= CMARK_NODE__LAST_LINE_BLANK;
46-
else
47-
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
48-
}
49-
5039
static CMARK_INLINE bool S_is_line_end_char(char c) {
5140
return (c == '\n' || c == '\r');
5241
}
@@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) {
124113
mem->free(parser);
125114
}
126115

127-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
128-
129116
// Returns true if line has only space characters, else false.
130117
static bool is_blank_raw(const unsigned char *ptr, const bufsize_t size,
131118
bufsize_t offset) {
@@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
209196
return;
210197
}
211198

199+
// Scan forward until line end to keep trailing spaces of the last line.
212200
for (; i < ln->size; ++i) {
213201
c = ln->ptr[i];
214202

215203
if (!S_is_line_end_char(c))
216204
continue;
217205

218-
cmark_strbuf_truncate(ln, i);
206+
if (c == '\r' && i + 1 < ln->size && ln->ptr[i + 1] == '\n') {
207+
i++;
208+
}
209+
210+
cmark_strbuf_truncate(ln, i + 1);
219211
break;
220212
}
221213
}
222214

223-
// Check to see if a node ends with a blank line, descending
224-
// if needed into lists and sublists.
225-
static bool S_ends_with_blank_line(cmark_node *node) {
226-
if ((S_type(node) == CMARK_NODE_LIST ||
227-
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
228-
return(S_ends_with_blank_line(node->last_child));
229-
} else {
230-
return (S_last_line_blank(node));
231-
}
215+
// Check to see if a node ends with a blank line.
216+
static CMARK_INLINE bool S_ends_with_blank_line(cmark_node *node) {
217+
return node->next && node->end_line != node->next->start_line - 1;
232218
}
233219

234220
// returns true if content remains after link defs are resolved.
@@ -331,7 +317,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) {
331317
}
332318
}
333319

334-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
320+
// `closed_explicitly` states that the node is closed by explicit markers, or
321+
// the node cannot span more than one line:
322+
//
323+
// - Close tag of HTML blocks
324+
// - Closing code fence
325+
// - ATX headings
326+
// - Thematic breaks
327+
static cmark_node *finalize(cmark_parser *parser, cmark_node *b,
328+
bool closed_explicitly) {
335329
bufsize_t pos;
336330
cmark_node *item;
337331
cmark_node *subitem;
@@ -342,22 +336,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
342336
CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
343337
b->flags &= ~CMARK_NODE__OPEN;
344338

345-
if (parser->curline.size == 0) {
346-
// end of input - line number has not been incremented
347-
b->end_line = parser->line_number;
348-
b->end_column = parser->last_line_length;
349-
} else if (S_type(b) == CMARK_NODE_DOCUMENT ||
350-
(S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
351-
(S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
352-
b->end_line = parser->line_number;
353-
b->end_column = parser->curline.size;
354-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
355-
b->end_column -= 1;
356-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
357-
b->end_column -= 1;
358-
} else {
359-
b->end_line = parser->line_number - 1;
360-
b->end_column = parser->last_line_length;
339+
if (S_type(b) != CMARK_NODE_CODE_BLOCK || b->as.code.fenced) {
340+
if (parser->curline.size == 0) {
341+
// end of input - line number has not been incremented
342+
b->end_line = parser->line_number;
343+
b->end_column = parser->last_line_length;
344+
} else if (closed_explicitly) {
345+
b->end_line = parser->line_number;
346+
b->end_column = parser->curline.size;
347+
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
348+
b->end_column -= 1;
349+
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
350+
b->end_column -= 1;
351+
} else {
352+
b->end_line = parser->line_number - 1;
353+
b->end_column = parser->last_line_length;
354+
}
361355
}
362356

363357
cmark_strbuf *node_content = &parser->content;
@@ -371,7 +365,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
371365
case CMARK_NODE_CODE_BLOCK:
372366
if (!b->as.code.fenced) { // indented code
373367
remove_trailing_blank_lines(node_content);
374-
cmark_strbuf_putc(node_content, '\n');
375368
} else {
376369
// first line of contents becomes info
377370
for (pos = 0; pos < node_content->size; ++pos) {
@@ -412,16 +405,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
412405

413406
while (item) {
414407
// check for non-final non-empty list item ending with blank line:
415-
if (S_last_line_blank(item) && item->next) {
408+
if (item->next && S_ends_with_blank_line(item)) {
416409
b->as.list.tight = false;
417410
break;
418411
}
419412
// recurse into children of list item, to see if there are
420413
// spaces between them:
421414
subitem = item->first_child;
422415
while (subitem) {
423-
if ((item->next || subitem->next) &&
424-
S_ends_with_blank_line(subitem)) {
416+
if (subitem->next && S_ends_with_blank_line(subitem)) {
425417
b->as.list.tight = false;
426418
break;
427419
}
@@ -432,9 +424,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
432424
}
433425
item = item->next;
434426
}
427+
b->end_line = b->last_child->end_line;
428+
b->end_column = b->last_child->end_column;
435429

436430
break;
437431

432+
case CMARK_NODE_ITEM:
433+
if (b->last_child) {
434+
b->end_line = b->last_child->end_line;
435+
b->end_column = b->last_child->end_column;
436+
}
437+
// If the item is empty, it is closed when the next line is processed and
438+
// the end position is set by the normal path. Note that if the first line
439+
// and second line of a item are blank, it is closed.
440+
break;
441+
438442
case CMARK_NODE_DOCUMENT:
439443
resolve_all_reference_link_definitions(parser);
440444
break;
@@ -454,7 +458,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
454458
// if 'parent' isn't the kind of node that can accept this child,
455459
// then back up til we hit a node that can.
456460
while (!can_contain(S_type(parent), block_type)) {
457-
parent = finalize(parser, parent);
461+
parent = finalize(parser, parent, false);
458462
}
459463

460464
cmark_node *child =
@@ -594,10 +598,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) {
594598

595599
static cmark_node *finalize_document(cmark_parser *parser) {
596600
while (parser->current != parser->root) {
597-
parser->current = finalize(parser, parser->current);
601+
parser->current = finalize(parser, parser->current, false);
598602
}
599603

600-
finalize(parser, parser->root);
604+
finalize(parser, parser->root, false);
601605

602606
// Limit total size of extra content created from reference links to
603607
// document size to avoid superlinear growth. Always allow 100KB.
@@ -917,7 +921,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
917921
// the end of a line, we can stop processing it:
918922
*should_continue = false;
919923
S_advance_offset(parser, input, matched, false);
920-
parser->current = finalize(parser, container);
924+
parser->current = finalize(parser, container, true);
921925
} else {
922926
// skip opt. spaces of fence parser->offset
923927
int i = container->as.code.fence_offset;
@@ -1121,6 +1125,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11211125
// it's only now that we know the line is not part of a setext heading:
11221126
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
11231127
parser->first_nonspace + 1);
1128+
*container = finalize(parser, *container, true);
11241129
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
11251130
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
11261131
parser->indent < 4 &&
@@ -1207,35 +1212,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12071212
static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12081213
cmark_node *last_matched_container,
12091214
cmark_chunk *input) {
1210-
cmark_node *tmp;
12111215
// what remains at parser->offset is a text line. add the text to the
12121216
// appropriate container.
12131217

12141218
S_find_first_nonspace(parser, input);
12151219

1216-
if (parser->blank && container->last_child)
1217-
S_set_last_line_blank(container->last_child, true);
1218-
1219-
// block quote lines are never blank as they start with >
1220-
// and we don't count blanks in fenced code for purposes of tight/loose
1221-
// lists or breaking out of lists. we also don't set last_line_blank
1222-
// on an empty list item.
1223-
const cmark_node_type ctype = S_type(container);
1224-
const bool last_line_blank =
1225-
(parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1226-
ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1227-
!(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
1228-
!(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
1229-
container->start_line == parser->line_number));
1230-
1231-
S_set_last_line_blank(container, last_line_blank);
1232-
1233-
tmp = container;
1234-
while (tmp->parent) {
1235-
S_set_last_line_blank(tmp->parent, false);
1236-
tmp = tmp->parent;
1237-
}
1238-
12391220
// If the last line processed belonged to a paragraph node,
12401221
// and we didn't match all of the line prefixes for the open containers,
12411222
// and we didn't start any new containers,
@@ -1249,7 +1230,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12491230
} else { // not a lazy continuation
12501231
// Finalize any blocks that were not matched and set cur to container:
12511232
while (parser->current != last_matched_container) {
1252-
parser->current = finalize(parser, parser->current);
1233+
parser->current = finalize(parser, parser->current, false);
12531234
assert(parser->current != NULL);
12541235
}
12551236

@@ -1291,7 +1272,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12911272
}
12921273

12931274
if (matches_end_condition) {
1294-
container = finalize(parser, container);
1275+
container = finalize(parser, container, true);
12951276
assert(parser->current != NULL);
12961277
}
12971278
} else if (parser->blank) {
@@ -1324,6 +1305,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13241305
bool all_matched = true;
13251306
cmark_node *container;
13261307
cmark_chunk input;
1308+
bool need_set_end_position = false;
13271309

13281310
if (parser->options & CMARK_OPT_VALIDATE_UTF8)
13291311
cmark_utf8proc_check(&parser->curline, buffer, bytes);
@@ -1361,6 +1343,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13611343

13621344
add_text_to_container(parser, container, last_matched_container, &input);
13631345

1346+
need_set_end_position = S_type(container) == CMARK_NODE_CODE_BLOCK &&
1347+
!container->as.code.fenced &&
1348+
!parser->blank;
1349+
13641350
finished:
13651351
parser->last_line_length = input.len;
13661352
if (parser->last_line_length &&
@@ -1370,6 +1356,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13701356
input.data[parser->last_line_length - 1] == '\r')
13711357
parser->last_line_length -= 1;
13721358

1359+
if (need_set_end_position) {
1360+
container->end_line = parser->line_number;
1361+
container->end_column = parser->last_line_length;
1362+
}
1363+
13731364
cmark_strbuf_clear(&parser->curline);
13741365
}
13751366

src/node.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ typedef struct {
4848

4949
enum cmark_node__internal_flags {
5050
CMARK_NODE__OPEN = (1 << 0),
51-
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
5251
};
5352

5453
struct cmark_node {

0 commit comments

Comments
 (0)