Skip to content

Commit

Permalink
Simplify the split on newline regex
Browse files Browse the repository at this point in the history
  • Loading branch information
David Mesquita-Morris committed Nov 22, 2021
1 parent 208980a commit 9dc6022
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 4 deletions.
2 changes: 1 addition & 1 deletion lib/node/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ exports.parse = void 0;
function parse(text) {
// convert the csv formatted test into a table of tokens
const tokens = text.replace(/^\uFEFF|\r\n$|\n$|\r$/g, '') // trim byte order mark from beginning and trailing EOL if needed
.split(/\r\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL
.split(/\r?\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL
row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/) // split row into tokens based on comma delimiter (unless in quotes); see answer here: https://stackoverflow.com/questions/23582276/split-string-by-comma-but-ignore-commas-inside-quotes/23582323#23582323
.map(token => token.replace(/(^"|"$)/g, '') // dequote tokens if needed
.replace(/\"\"/g, '"'))); // replace double double quotes with double quotes
Expand Down
1 change: 1 addition & 0 deletions lib/node/test/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export {};
24 changes: 24 additions & 0 deletions lib/node/test/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
const csv = __importStar(require(".."));
const data = '\uFEFFa,b,c\r"1",2,3\n4,"5",6\r\n7,"The number eight: ""8""",9\r"a\r\na",b,"c, d"\n12';
console.log(csv.parse(data));
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@steelbreeze/csv",
"version": "1.0.0-alpha.1",
"version": "1.0.0-alpha.2",
"description": "Tools for reading and writnig files formatted as CSV",
"main": "lib/node/index.js",
"module": "lib/node/index.js",
Expand Down
3 changes: 1 addition & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/**
* Parses a string encoded as a comma seperated values
* @param text The source csv text.
Expand All @@ -7,7 +6,7 @@
export function parse(text: string): Array<any> {
// convert the csv formatted test into a table of tokens
const tokens = text.replace(/^\uFEFF|\r\n$|\n$|\r$/g, '') // trim byte order mark from beginning and trailing EOL if needed
.split(/\r\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL
.split(/\r?\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row => // split text into rows at EOL
row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/) // split row into tokens based on comma delimiter (unless in quotes); see answer here: https://stackoverflow.com/questions/23582276/split-string-by-comma-but-ignore-commas-inside-quotes/23582323#23582323
.map(token => token.replace(/(^"|"$)/g, '') // dequote tokens if needed
.replace(/\"\"/g, '"'))); // replace double double quotes with double quotes
Expand Down
5 changes: 5 additions & 0 deletions src/test/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import * as csv from '..';

const data = '\uFEFFa,b,c\r"1",2,3\n4,"5",6\r\n7,"The number eight: ""8""",9\r"a\r\na",b,"c, d"\n12';

console.log(csv.parse(data));

0 comments on commit 9dc6022

Please sign in to comment.