Skip to content

Commit d9c9d81

Browse files
committed
added more features
1 parent 794dd6f commit d9c9d81

File tree

1 file changed

+157
-32
lines changed

1 file changed

+157
-32
lines changed

process-csv.js

Lines changed: 157 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,26 @@
66
*/
77

88
const FLOAT_REGEXP = /^[-+]?[0-9]+(?:\.[0-9]*)?(?:[eE]\+[0-9]+)?$|^(?:[0-9]+)?\.[0-9]+(?:e+[0-9]+)?$|^[-+]?Infinity$|^[-+]?NaN$/;
9+
const MAX_ROW_BYTES = 8e+16; // 10 peta byte
910

1011
/**
1112
* @param {Object?} options
1213
* @param {string} [options.delimiter=','] - Specify what is the CSV delimeter.
14+
* @param {boolean} [options.allowSpecialQuotes=false] - Should quotes be treated as a special character that wraps cells. Does not include the header row.
15+
* @param {string} [options.quote='"'] - If allowSpecialQuotes is true, this will specify the quote character.
16+
* @param {boolean | string} [options.skipComments=false] - If true, lines which begin with # will be skipped. To use
17+
* a custom character passe it as a sring.
18+
* @param {Number} [options.skipLines=0] - Specifies the number of lines at the beginning of the file to skip over.
1319
* @param {boolean} [options.skipEmptyLines=false] - Should empty lines be automatically skipped?
1420
* @param {boolean} [options.parseNumbers=false] - Automatically parse numbers (with a . as the decimal separator).
1521
* @param {boolean} [options.parseBooleans=false] - Automatically parse booleans (Auto conversion to lowercase `true` and `false`).
16-
* @param {boolean} [options.ltrim=false] - Automatically trim first column.
17-
* @param {boolean} [options.rtrim=false] - Automatically trim last column.
18-
* @param {boolean} [options.trim=false] - If true, then both 'ltrim' and 'rtrim' are set to true.
19-
* @param {boolean} [options.skipHeader=false] - If true, then skip the first header row.
22+
* @param {boolean} [options.ltrim=false] - Automatically let trim columns.
23+
* @param {boolean} [options.rtrim=false] - Automatically right trim columns.
24+
* @param {boolean} [options.trim=false] - If true, trim all columns.
25+
* @param {Number} [options.maxRowBytes=MAX_ROW_BYTES] - Specifies the maximum number of bytes per row, the default value is on 10 peta byte.
2026
* @param {boolean} [options.rowAsObject=false] - If true, each row will be converted automatically to an object based
21-
* on the header. This implied `skipHeader=true`.
27+
* on the header. This implies `skipLines=1 & strict=true`.
28+
* @param {boolean} [options.strict=false] - If true, the number of columns in each row must match the number of headers.
2229
* @param {boolean} [options.errorLog=false] - If true, errors will be logged to the console whether the `error` event is used or not.
2330
* @returns {ProcessCSV}
2431
* @constructor
@@ -29,23 +36,32 @@ const ProcessCSV = function (options) {
2936
let pause = false;
3037
let opts = {
3138
delimiter: options.delimiter || ',',
39+
allowSpecialQuotes: options.allowSpecialQuotes || false,
40+
quote: options.quote || '"',
41+
skipComments: options.skipComments || false,
42+
skipLines: options.skipLines || 0,
3243
skipEmptyLines: options.skipEmptyLines || false,
3344
parseNumbers: options.parseNumbers || false,
3445
parseBooleans: options.parseBooleans || false,
3546
ltrim: options.ltrim || false,
3647
rtrim: options.rtrim || false,
3748
trim: options.trim || false,
38-
skipHeader: options.skipHeader || false,
49+
maxRowBytes: options.maxRowBytes || MAX_ROW_BYTES,
3950
rowAsObject: options.rowAsObject || false,
51+
strict: options.strict || false,
4052
errorLog: options.errorLog || false,
4153
};
4254

43-
if (opts.trim) opts.ltrim = opts.rtrim = true;
55+
if (opts.rowAsObject) {
56+
opts.strict = true;
4457

45-
if (opts.rowAsObject) opts.skipHeader = true;
58+
if (!opts.skipLines)
59+
opts.skipLines = 1;
60+
}
4661

4762
let allDataLines = [];
4863
let dataHeader = [];
64+
let ended = false;
4965

5066
/**
5167
* @desc Reads the file as a text
@@ -72,24 +88,50 @@ const ProcessCSV = function (options) {
7288
};
7389

7490
/**
75-
* @desc Processes the data from the csv file and filters it
76-
* @param {String} csv
91+
* Handles header columns that starts and ends with quotes
92+
* @param {Array} header
93+
* @returns {Array}
94+
*/
95+
const handleHeader = function (header) {
96+
for (let i = 0; i < header.length; i++) {
97+
const col = header[i].trim();
98+
if ((col[0] === '"' && col[col.length - 1] === '"') || (col[0] === '\'' && col[col.length - 1] === '\''))
99+
header[i] = col.substring(1, col.length - 1);
100+
}
101+
102+
return header;
103+
}
104+
105+
/**
106+
* @desc Processes the data
107+
* @param {string} csv
77108
*/
78109
const processData = async function (csv) {
79110
allDataLines = csv.split(/\r\n|\n/);
80111

81112
if (!allDataLines || !allDataLines[0]) {
82113
logError('w', 'No data to process, file is empty!');
83-
dispatch('error', new Error('No data to process, file is empty!'));
114+
dispatch('error', throwError('No data to process, file is empty!'));
84115

85116
return _this;
86117
} else {
87-
if (!opts.skipHeader) {
118+
if (opts.skipLines)
119+
for (let i = 0; i < opts.skipLines; i++) {
120+
if (i === 0)
121+
dataHeader = allDataLines.shift().split(opts.delimiter);
122+
else
123+
allDataLines.shift();
124+
}
125+
else if (!opts.skipLines) {
88126
dataHeader = allDataLines.shift().split(opts.delimiter);
89-
if (!opts.rowAsObject) dispatch('header', dataHeader);
90127
}
91128

92-
await postProcessColumns();
129+
if (dataHeader.length) {
130+
dataHeader = handleHeader(dataHeader);
131+
if (!opts.rowAsObject && !opts.skipLines) dispatch('header', dataHeader);
132+
}
133+
134+
await processRows();
93135
dispatch('finish', null);
94136
}
95137
};
@@ -98,18 +140,27 @@ const ProcessCSV = function (options) {
98140
* @desc Format data rows according to the given options
99141
* @returns {Array}
100142
*/
101-
const postProcessColumns = async function () {
143+
const processRows = async function () {
102144
let hold = !opts.skipEmptyLines ? allDataLines : allDataLines.filter(k => k != null && k !== '');
103145

104146
for (let j = 0; j < hold.length; j++) {
105-
// if paused stop the loop untill the user resumes
106147
if (j > 0 && pause)
107148
await pauseLoop();
108149

109150
let row = hold[j].split(opts.delimiter);
110151

111-
if (opts.ltrim) row[0] = row[0].trim()
112-
if (opts.rtrim) row[row.length - 1] = row[row.length - 1].trim();
152+
if (opts.skipComments) {
153+
const char = typeof opts.skipComments === 'string' ? opts.skipComments : '#';
154+
if (row[0][0] === char) continue;
155+
}
156+
157+
if (opts.allowSpecialQuotes) row = hanldeSpecialQuotes(row);
158+
159+
if (handleRowErrors(row, j + 1)) return;
160+
161+
if (opts.ltrim) row[0] = row[0].replace(/^\s+/, "");
162+
if (opts.rtrim) row[row.length - 1] = row[row.length - 1].replace(/$\s+/, "");
163+
if (opts.trim) row = row.map(col => col.trim());
113164

114165
if (opts.parseNumbers && row[0] !== '') row = parseRowNumbers(row);
115166
if (opts.parseBooleans && row[0] !== '') row = parseRowBooleans(row);
@@ -124,6 +175,62 @@ const ProcessCSV = function (options) {
124175
return _this;
125176
}
126177

178+
/**
179+
* @desc Hanldes special quotes column based in a row
180+
* @param {Array} row
181+
* @returns {Array}
182+
*/
183+
const hanldeSpecialQuotes = function (row) {
184+
let result = [];
185+
let status = 0;
186+
for (let i = 0; i < row.length; i++) {
187+
let cell = row[i];
188+
if (cell[0] == opts.quote && cell[cell.length - 1] !== opts.quote) {
189+
result.push(cell.slice(1));
190+
status = 1;
191+
} else if (cell[0] !== opts.quotecell && [cell.length - 1] == opts.quote) {
192+
result[(result.length - 1)] += " " + (cell.slice(0, -1));
193+
status = 0;
194+
} else if (status == 1) {
195+
result[result.length - 1] += (cell);
196+
status = 1;
197+
} else {
198+
result.push(cell);
199+
status = 0;
200+
}
201+
}
202+
203+
return result;
204+
}
205+
206+
/**
207+
* @desc Handles any erros in a row
208+
* @param {Array} row
209+
* @returns {boolean}
210+
*/
211+
const handleRowErrors = function (row, index) {
212+
let skip = true;
213+
if (opts.skipLines)
214+
skip = opts.skipLines > (index - 1);
215+
216+
if (skip && opts.strict && row.length !== dataHeader.length) {
217+
console.log("header length:", dataHeader.length)
218+
console.log("row length:", row.length)
219+
logError('e', `Row length does not match headers(in file row number ${index}).`);
220+
dispatch('error', throwError(`Row length does not match headers(in file row number ${index}).`));
221+
return true;
222+
} else {
223+
const str = JSON.stringify(row.join('')).replace(/[[],""]/g, '');
224+
if (opts.maxRowBytes < str.length) {
225+
logError('e', `Maximum row size has been exceeded(in file row number ${index}).`);
226+
dispatch('error', throwError(`Maximum row size has been exceeded(in file row number ${index}).`));
227+
return true;
228+
}
229+
}
230+
231+
return false;
232+
}
233+
127234
/**
128235
* @desc Parse any 'true' or 'false' string in a row to boolean
129236
* @param {Array} row
@@ -142,7 +249,7 @@ const ProcessCSV = function (options) {
142249
}
143250

144251
/**
145-
* @desc Parse any numbers in a row to numbers(float)
252+
* @desc Parse any string numbers in a row to numbers(float)
146253
* @param {Array} row
147254
* @returns {Array}
148255
*/
@@ -176,36 +283,45 @@ const ProcessCSV = function (options) {
176283
const errorHandler = function (event) {
177284
if (event.target.error.name == "NotReadableError") {
178285
logError('e', 'Cannot read file!');
179-
dispatch('error', new Error('Cannot read file!'));
286+
dispatch('error', throwError('Cannot read file!'));
180287

181288
return _this;
182289
}
183290
};
184291

185292
/**
186293
* @desc logs an error to the console
187-
* @param {String} type - e = error | w = warning
188-
* @param {String} message
294+
* @param {string} type - e = error | w = warning
295+
* @param {string} message
189296
*/
190297
const logError = function (type, message) {
191-
if (!errorLog) return;
298+
if (!opts.errorLog) return;
192299

193300
type === "e" ? console.error(message) : console.warn(message);
194301
}
195302

196303
/**
197-
* @desc Handles a specific file => this is the starting point of the ProcessCSV
304+
* @desc Returns a new error instance
305+
* @param {string} message
306+
* @returns {Object}
307+
*/
308+
const throwError = function (message) {
309+
return new Error(message);
310+
}
311+
312+
/**
313+
* @desc Handles a specific file
198314
* @param {Object} file
199315
*/
200-
this.processFile = function (file) {
316+
this.process = function (file) {
201317
if (file)
202318
if (window.FileReader) {
203319
readAsText(file);
204320

205321
return _this;
206322
} else {
207323
logError('e', 'FileReader are not supported in this browser, please switch to a different browser.');
208-
dispatch('error', new Error('FileReader are not supported in this browser, please switch to a different browser.'));
324+
dispatch('error', throwError('FileReader are not supported in this browser, please switch to a different browser.'));
209325

210326
return _this;
211327
}
@@ -237,23 +353,32 @@ const ProcessCSV = function (options) {
237353
* @desc Resume when ready to receive and process more rows.
238354
*/
239355
this.resume = function () {
240-
dispatch('resume-row', 'Receiving more rows...');
356+
setTimeout(() => dispatch('resume-row', 'Receiving more rows...'), 0);
241357
};
242358

359+
/**
360+
* @desc Ends processing
361+
*/
362+
this.end = function () {
363+
ended = true;
364+
}
365+
243366
/**
244367
* @desc Dispatch a custom event
245-
* @param {String} name - event name
368+
* @param {string} name - event name
246369
* @param {*} event - contains the data passed to the event
247370
*/
248371
const dispatch = function (name, event) {
249-
let callbacks = _this[name];
250-
if (callbacks) callbacks.forEach(callback => callback(event));
372+
if (!ended) {
373+
let callbacks = _this[name];
374+
if (callbacks) callbacks.forEach(callback => callback(event));
375+
} else return;
251376
};
252377

253378
/**
254379
* @desc Listen to a specific event
255-
* @param {String} name - event name
256-
* @param {Function} callback - callback function
380+
* @param {string} name - event name
381+
* @param {function} callback - callback function
257382
*/
258383
this.on = function (name, callback) {
259384
let callbacks = this[name];

0 commit comments

Comments
 (0)