32
32
33
33
#define peek_at (i , n ) (i)->data[n]
34
34
35
- static bool S_last_line_blank (const cmark_node * node ) {
36
- return (node -> flags & CMARK_NODE__LAST_LINE_BLANK ) != 0 ;
37
- }
38
-
39
35
static CMARK_INLINE cmark_node_type S_type (const cmark_node * node ) {
40
36
return (cmark_node_type )node -> type ;
41
37
}
42
38
43
- static void S_set_last_line_blank (cmark_node * node , bool is_blank ) {
44
- if (is_blank )
45
- node -> flags |= CMARK_NODE__LAST_LINE_BLANK ;
46
- else
47
- node -> flags &= ~CMARK_NODE__LAST_LINE_BLANK ;
48
- }
49
-
50
39
static CMARK_INLINE bool S_is_line_end_char (char c ) {
51
40
return (c == '\n' || c == '\r' );
52
41
}
@@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) {
124
113
mem -> free (parser );
125
114
}
126
115
127
- static cmark_node * finalize (cmark_parser * parser , cmark_node * b );
128
-
129
116
// Returns true if line has only space characters, else false.
130
117
static bool is_blank_raw (const unsigned char * ptr , const bufsize_t size ,
131
118
bufsize_t offset ) {
@@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
209
196
return ;
210
197
}
211
198
199
+ // Scan forward until line end to keep trailing spaces of the last line.
212
200
for (; i < ln -> size ; ++ i ) {
213
201
c = ln -> ptr [i ];
214
202
215
203
if (!S_is_line_end_char (c ))
216
204
continue ;
217
205
218
- cmark_strbuf_truncate (ln , i );
206
+ if (c == '\r' && i + 1 < ln -> size && ln -> ptr [i + 1 ] == '\n' ) {
207
+ i ++ ;
208
+ }
209
+
210
+ cmark_strbuf_truncate (ln , i + 1 );
219
211
break ;
220
212
}
221
213
}
222
214
223
- // Check to see if a node ends with a blank line, descending
224
- // if needed into lists and sublists.
225
- static bool S_ends_with_blank_line (cmark_node * node ) {
226
- if ((S_type (node ) == CMARK_NODE_LIST ||
227
- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
228
- return (S_ends_with_blank_line (node -> last_child ));
229
- } else {
230
- return (S_last_line_blank (node ));
231
- }
215
+ // Check to see if a node ends with a blank line.
216
+ static CMARK_INLINE bool S_ends_with_blank_line (cmark_node * node ) {
217
+ return node -> next && node -> end_line != node -> next -> start_line - 1 ;
232
218
}
233
219
234
220
// returns true if content remains after link defs are resolved.
@@ -331,7 +317,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) {
331
317
}
332
318
}
333
319
334
- static cmark_node * finalize (cmark_parser * parser , cmark_node * b ) {
320
+ // `closed_explicitly` states that the node is closed by explicit markers, or
321
+ // the node cannot span more than one line:
322
+ //
323
+ // - Close tag of HTML blocks
324
+ // - Closing code fence
325
+ // - ATX headings
326
+ // - Thematic breaks
327
+ static cmark_node * finalize (cmark_parser * parser , cmark_node * b ,
328
+ bool closed_explicitly ) {
335
329
bufsize_t pos ;
336
330
cmark_node * item ;
337
331
cmark_node * subitem ;
@@ -342,22 +336,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
342
336
CMARK_NODE__OPEN ); // shouldn't call finalize on closed blocks
343
337
b -> flags &= ~CMARK_NODE__OPEN ;
344
338
345
- if (parser -> curline . size == 0 ) {
346
- // end of input - line number has not been incremented
347
- b -> end_line = parser -> line_number ;
348
- b -> end_column = parser -> last_line_length ;
349
- } else if ( S_type ( b ) == CMARK_NODE_DOCUMENT ||
350
- ( S_type ( b ) == CMARK_NODE_CODE_BLOCK && b -> as . code . fenced ) ||
351
- ( S_type ( b ) == CMARK_NODE_HEADING && b -> as . heading . setext )) {
352
- b -> end_line = parser -> line_number ;
353
- b -> end_column = parser -> curline .size ;
354
- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\n' )
355
- b -> end_column -= 1 ;
356
- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
357
- b -> end_column -= 1 ;
358
- } else {
359
- b -> end_line = parser -> line_number - 1 ;
360
- b -> end_column = parser -> last_line_length ;
339
+ if (S_type ( b ) != CMARK_NODE_CODE_BLOCK || b -> as . code . fenced ) {
340
+ if ( parser -> curline . size == 0 ) {
341
+ // end of input - line number has not been incremented
342
+ b -> end_line = parser -> line_number ;
343
+ b -> end_column = parser -> last_line_length ;
344
+ } else if ( closed_explicitly ) {
345
+ b -> end_line = parser -> line_number ;
346
+ b -> end_column = parser -> curline . size ;
347
+ if ( b -> end_column && parser -> curline .ptr [ b -> end_column - 1 ] == '\n' )
348
+ b -> end_column -= 1 ;
349
+ if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
350
+ b -> end_column -= 1 ;
351
+ } else {
352
+ b -> end_line = parser -> line_number - 1 ;
353
+ b -> end_column = parser -> last_line_length ;
354
+ }
361
355
}
362
356
363
357
cmark_strbuf * node_content = & parser -> content ;
@@ -371,7 +365,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
371
365
case CMARK_NODE_CODE_BLOCK :
372
366
if (!b -> as .code .fenced ) { // indented code
373
367
remove_trailing_blank_lines (node_content );
374
- cmark_strbuf_putc (node_content , '\n' );
375
368
} else {
376
369
// first line of contents becomes info
377
370
for (pos = 0 ; pos < node_content -> size ; ++ pos ) {
@@ -412,16 +405,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
412
405
413
406
while (item ) {
414
407
// check for non-final non-empty list item ending with blank line:
415
- if (S_last_line_blank ( item ) && item -> next ) {
408
+ if (item -> next && S_ends_with_blank_line ( item ) ) {
416
409
b -> as .list .tight = false;
417
410
break ;
418
411
}
419
412
// recurse into children of list item, to see if there are
420
413
// spaces between them:
421
414
subitem = item -> first_child ;
422
415
while (subitem ) {
423
- if ((item -> next || subitem -> next ) &&
424
- S_ends_with_blank_line (subitem )) {
416
+ if (subitem -> next && S_ends_with_blank_line (subitem )) {
425
417
b -> as .list .tight = false;
426
418
break ;
427
419
}
@@ -432,9 +424,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
432
424
}
433
425
item = item -> next ;
434
426
}
427
+ b -> end_line = b -> last_child -> end_line ;
428
+ b -> end_column = b -> last_child -> end_column ;
435
429
436
430
break ;
437
431
432
+ case CMARK_NODE_ITEM :
433
+ if (b -> last_child ) {
434
+ b -> end_line = b -> last_child -> end_line ;
435
+ b -> end_column = b -> last_child -> end_column ;
436
+ }
437
+ // If the item is empty, it is closed when the next line is processed and
438
+ // the end position is set by the normal path. Note that if the first line
439
+ // and second line of a item are blank, it is closed.
440
+ break ;
441
+
438
442
case CMARK_NODE_DOCUMENT :
439
443
resolve_all_reference_link_definitions (parser );
440
444
break ;
@@ -454,7 +458,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
454
458
// if 'parent' isn't the kind of node that can accept this child,
455
459
// then back up til we hit a node that can.
456
460
while (!can_contain (S_type (parent ), block_type )) {
457
- parent = finalize (parser , parent );
461
+ parent = finalize (parser , parent , false );
458
462
}
459
463
460
464
cmark_node * child =
@@ -594,10 +598,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) {
594
598
595
599
static cmark_node * finalize_document (cmark_parser * parser ) {
596
600
while (parser -> current != parser -> root ) {
597
- parser -> current = finalize (parser , parser -> current );
601
+ parser -> current = finalize (parser , parser -> current , false );
598
602
}
599
603
600
- finalize (parser , parser -> root );
604
+ finalize (parser , parser -> root , false );
601
605
602
606
// Limit total size of extra content created from reference links to
603
607
// document size to avoid superlinear growth. Always allow 100KB.
@@ -917,7 +921,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
917
921
// the end of a line, we can stop processing it:
918
922
* should_continue = false;
919
923
S_advance_offset (parser , input , matched , false);
920
- parser -> current = finalize (parser , container );
924
+ parser -> current = finalize (parser , container , true );
921
925
} else {
922
926
// skip opt. spaces of fence parser->offset
923
927
int i = container -> as .code .fence_offset ;
@@ -1121,6 +1125,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1121
1125
// it's only now that we know the line is not part of a setext heading:
1122
1126
* container = add_child (parser , * container , CMARK_NODE_THEMATIC_BREAK ,
1123
1127
parser -> first_nonspace + 1 );
1128
+ * container = finalize (parser , * container , true);
1124
1129
S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1125
1130
} else if ((!indented || cont_type == CMARK_NODE_LIST ) &&
1126
1131
parser -> indent < 4 &&
@@ -1207,35 +1212,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1207
1212
static void add_text_to_container (cmark_parser * parser , cmark_node * container ,
1208
1213
cmark_node * last_matched_container ,
1209
1214
cmark_chunk * input ) {
1210
- cmark_node * tmp ;
1211
1215
// what remains at parser->offset is a text line. add the text to the
1212
1216
// appropriate container.
1213
1217
1214
1218
S_find_first_nonspace (parser , input );
1215
1219
1216
- if (parser -> blank && container -> last_child )
1217
- S_set_last_line_blank (container -> last_child , true);
1218
-
1219
- // block quote lines are never blank as they start with >
1220
- // and we don't count blanks in fenced code for purposes of tight/loose
1221
- // lists or breaking out of lists. we also don't set last_line_blank
1222
- // on an empty list item.
1223
- const cmark_node_type ctype = S_type (container );
1224
- const bool last_line_blank =
1225
- (parser -> blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1226
- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1227
- !(ctype == CMARK_NODE_CODE_BLOCK && container -> as .code .fenced ) &&
1228
- !(ctype == CMARK_NODE_ITEM && container -> first_child == NULL &&
1229
- container -> start_line == parser -> line_number ));
1230
-
1231
- S_set_last_line_blank (container , last_line_blank );
1232
-
1233
- tmp = container ;
1234
- while (tmp -> parent ) {
1235
- S_set_last_line_blank (tmp -> parent , false);
1236
- tmp = tmp -> parent ;
1237
- }
1238
-
1239
1220
// If the last line processed belonged to a paragraph node,
1240
1221
// and we didn't match all of the line prefixes for the open containers,
1241
1222
// and we didn't start any new containers,
@@ -1249,7 +1230,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1249
1230
} else { // not a lazy continuation
1250
1231
// Finalize any blocks that were not matched and set cur to container:
1251
1232
while (parser -> current != last_matched_container ) {
1252
- parser -> current = finalize (parser , parser -> current );
1233
+ parser -> current = finalize (parser , parser -> current , false );
1253
1234
assert (parser -> current != NULL );
1254
1235
}
1255
1236
@@ -1291,7 +1272,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1291
1272
}
1292
1273
1293
1274
if (matches_end_condition ) {
1294
- container = finalize (parser , container );
1275
+ container = finalize (parser , container , true );
1295
1276
assert (parser -> current != NULL );
1296
1277
}
1297
1278
} else if (parser -> blank ) {
@@ -1324,6 +1305,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1324
1305
bool all_matched = true;
1325
1306
cmark_node * container ;
1326
1307
cmark_chunk input ;
1308
+ bool need_set_end_position = false;
1327
1309
1328
1310
if (parser -> options & CMARK_OPT_VALIDATE_UTF8 )
1329
1311
cmark_utf8proc_check (& parser -> curline , buffer , bytes );
@@ -1361,6 +1343,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1361
1343
1362
1344
add_text_to_container (parser , container , last_matched_container , & input );
1363
1345
1346
+ need_set_end_position = S_type (container ) == CMARK_NODE_CODE_BLOCK &&
1347
+ !container -> as .code .fenced &&
1348
+ !parser -> blank ;
1349
+
1364
1350
finished :
1365
1351
parser -> last_line_length = input .len ;
1366
1352
if (parser -> last_line_length &&
@@ -1370,6 +1356,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1370
1356
input .data [parser -> last_line_length - 1 ] == '\r' )
1371
1357
parser -> last_line_length -= 1 ;
1372
1358
1359
+ if (need_set_end_position ) {
1360
+ container -> end_line = parser -> line_number ;
1361
+ container -> end_column = parser -> last_line_length ;
1362
+ }
1363
+
1373
1364
cmark_strbuf_clear (& parser -> curline );
1374
1365
}
1375
1366
0 commit comments