Skip to content

Commit

Permalink
Handle any arbitrary line endings
Browse files Browse the repository at this point in the history
Handle any arbitrary line endings, as long as they are some combination of carriage return and newline. Fixes #223.
  • Loading branch information
vincentlaucsb committed May 26, 2024
1 parent 25a4f7a commit 0abcfad
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
4 changes: 2 additions & 2 deletions include/internal/basic_csv_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ namespace csv {
case ParseFlags::NEWLINE:
this->data_pos++;

// Catches CRLF (or LFLF)
if (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE)
// Catches CRLF (or LFLF, CRCRLF, or any other non-sensical combination of newlines)
while (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE)
this->data_pos++;

// End of record -> Write record
Expand Down
56 changes: 55 additions & 1 deletion tests/test_read_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ TEST_CASE( "Test Parse Flags", "[test_parse_flags]" ) {
}

// Test Main Functions
TEST_CASE( "Test Reading CSV From Direct Input", "[read_csv_direct]" ) {
TEST_CASE("Test Reading CSV From Direct Input", "[read_csv_direct]" ) {
SECTION("Expected Results") {
auto rows = "A,B,C\r\n" // Header row
"123,234,345\r\n"
Expand Down Expand Up @@ -181,6 +181,60 @@ TEST_CASE( "Test leading and trailing escaped quote", "[read_csv_quote]" ) {
}
//! [Parse Example]

// Verify the CSV parser can handle any arbitrary line endings composed of carriage return & newline
TEST_CASE("Cursed Newlines", "[read_csv_cursed_newline]") {
auto row_str = GENERATE(as<std::string> {},
(
// Windows style
"A,B,C\r\n" // Header row
"123,234,345\r\n"
"1,2,3\r\n"
"4,5,6",

// Unix style
"A,B,C\n" // Header row
"123,234,345\n"
"1,2,3\n"
"4,5,6",

// Eww brother what is that...
"A,B,C\r\r\n" // Header row
"123,234,345\r\r\n"
"1,2,3\r\r\n"
"4,5,6",

// Doubled-up Windows style (ridiculous: but I'm sure it exists somewhere)
"A,B,C\r\n\r\n" // Header row
"123,234,345\r\n\r\n"
"1,2,3\r\n\r\n"
"4,5,6"
)
);

// Set CSVFormat to KEEP all rows, even empty ones (because there shouldn't be any)
CSVFormat format;
format.header_row(0).variable_columns(VariableColumnPolicy::KEEP);
auto rows = parse(row_str, format);

CSVRow row;
rows.read_row(row);
vector<string> first_row = { "123", "234", "345" };
REQUIRE(vector<string>(row) == first_row);
REQUIRE(row["A"] == "123");
REQUIRE(row["B"] == "234");
REQUIRE(row["C"] == "345");

rows.read_row(row);
vector<string> second_row = { "1", "2", "3" };
REQUIRE(vector<string>(row) == second_row);

rows.read_row(row);
vector<string> third_row = { "4", "5", "6" };
REQUIRE(vector<string>(row) == third_row);

REQUIRE(rows.n_rows() == 3);
}

TEST_CASE("Test Whitespace Trimming", "[read_csv_trim]") {
auto row_str = GENERATE(as<std::string> {},
"A,B,C\r\n" // Header row
Expand Down

0 comments on commit 0abcfad

Please sign in to comment.