66 */
77
88const FLOAT_REGEXP = / ^ [ - + ] ? [ 0 - 9 ] + (?: \. [ 0 - 9 ] * ) ? (?: [ e E ] \+ [ 0 - 9 ] + ) ? $ | ^ (?: [ 0 - 9 ] + ) ? \. [ 0 - 9 ] + (?: e + [ 0 - 9 ] + ) ? $ | ^ [ - + ] ? I n f i n i t y $ | ^ [ - + ] ? N a N $ / ;
9+ const MAX_ROW_BYTES = 8e+16 ; // 10 peta byte
910
1011/**
1112 * @param {Object? } options
1213 * @param {string } [options.delimiter=','] - Specify what is the CSV delimeter.
14+ * @param {boolean } [options.allowSpecialQuotes=false] - Should quotes be treated as a special character that wraps cells. Does not include the header row.
15+ * @param {string } [options.quote='"'] - If allowSpecialQuotes is true, this will specify the quote character.
16+ * @param {boolean | string } [options.skipComments=false] - If true, lines which begin with # will be skipped. To use
17+ * a custom character passe it as a sring.
18+ * @param {Number } [options.skipLines=0] - Specifies the number of lines at the beginning of the file to skip over.
1319 * @param {boolean } [options.skipEmptyLines=false] - Should empty lines be automatically skipped?
1420 * @param {boolean } [options.parseNumbers=false] - Automatically parse numbers (with a . as the decimal separator).
1521 * @param {boolean } [options.parseBooleans=false] - Automatically parse booleans (Auto conversion to lowercase `true` and `false`).
16- * @param {boolean } [options.ltrim=false] - Automatically trim first column .
17- * @param {boolean } [options.rtrim=false] - Automatically trim last column .
18- * @param {boolean } [options.trim=false] - If true, then both 'ltrim' and 'rtrim' are set to true .
19- * @param {boolean } [options.skipHeader=false ] - If true, then skip the first header row.
22+ * @param {boolean } [options.ltrim=false] - Automatically let trim columns .
23+ * @param {boolean } [options.rtrim=false] - Automatically right trim columns .
24+ * @param {boolean } [options.trim=false] - If true, trim all columns .
25+ * @param {Number } [options.maxRowBytes=MAX_ROW_BYTES ] - Specifies the maximum number of bytes per row, the default value is on 10 peta byte .
2026 * @param {boolean } [options.rowAsObject=false] - If true, each row will be converted automatically to an object based
21- * on the header. This implied `skipHeader=true`.
27+ * on the header. This implies `skipLines=1 & strict=true`.
28+ * @param {boolean } [options.strict=false] - If true, the number of columns in each row must match the number of headers.
2229 * @param {boolean } [options.errorLog=false] - If true, errors will be logged to the console whether the `error` event is used or not.
2330 * @returns {ProcessCSV }
2431 * @constructor
@@ -29,23 +36,32 @@ const ProcessCSV = function (options) {
2936 let pause = false ;
3037 let opts = {
3138 delimiter : options . delimiter || ',' ,
39+ allowSpecialQuotes : options . allowSpecialQuotes || false ,
40+ quote : options . quote || '"' ,
41+ skipComments : options . skipComments || false ,
42+ skipLines : options . skipLines || 0 ,
3243 skipEmptyLines : options . skipEmptyLines || false ,
3344 parseNumbers : options . parseNumbers || false ,
3445 parseBooleans : options . parseBooleans || false ,
3546 ltrim : options . ltrim || false ,
3647 rtrim : options . rtrim || false ,
3748 trim : options . trim || false ,
38- skipHeader : options . skipHeader || false ,
49+ maxRowBytes : options . maxRowBytes || MAX_ROW_BYTES ,
3950 rowAsObject : options . rowAsObject || false ,
51+ strict : options . strict || false ,
4052 errorLog : options . errorLog || false ,
4153 } ;
4254
43- if ( opts . trim ) opts . ltrim = opts . rtrim = true ;
55+ if ( opts . rowAsObject ) {
56+ opts . strict = true ;
4457
45- if ( opts . rowAsObject ) opts . skipHeader = true ;
58+ if ( ! opts . skipLines )
59+ opts . skipLines = 1 ;
60+ }
4661
4762 let allDataLines = [ ] ;
4863 let dataHeader = [ ] ;
64+ let ended = false ;
4965
5066 /**
5167 * @desc Reads the file as a text
@@ -72,24 +88,50 @@ const ProcessCSV = function (options) {
7288 } ;
7389
7490 /**
75- * @desc Processes the data from the csv file and filters it
76- * @param {String } csv
91+ * Handles header columns that starts and ends with quotes
92+ * @param {Array } header
93+ * @returns {Array }
94+ */
95+ const handleHeader = function ( header ) {
96+ for ( let i = 0 ; i < header . length ; i ++ ) {
97+ const col = header [ i ] . trim ( ) ;
98+ if ( ( col [ 0 ] === '"' && col [ col . length - 1 ] === '"' ) || ( col [ 0 ] === '\'' && col [ col . length - 1 ] === '\'' ) )
99+ header [ i ] = col . substring ( 1 , col . length - 1 ) ;
100+ }
101+
102+ return header ;
103+ }
104+
105+ /**
106+ * @desc Processes the data
107+ * @param {string } csv
77108 */
78109 const processData = async function ( csv ) {
79110 allDataLines = csv . split ( / \r \n | \n / ) ;
80111
81112 if ( ! allDataLines || ! allDataLines [ 0 ] ) {
82113 logError ( 'w' , 'No data to process, file is empty!' ) ;
83- dispatch ( 'error' , new Error ( 'No data to process, file is empty!' ) ) ;
114+ dispatch ( 'error' , throwError ( 'No data to process, file is empty!' ) ) ;
84115
85116 return _this ;
86117 } else {
87- if ( ! opts . skipHeader ) {
118+ if ( opts . skipLines )
119+ for ( let i = 0 ; i < opts . skipLines ; i ++ ) {
120+ if ( i === 0 )
121+ dataHeader = allDataLines . shift ( ) . split ( opts . delimiter ) ;
122+ else
123+ allDataLines . shift ( ) ;
124+ }
125+ else if ( ! opts . skipLines ) {
88126 dataHeader = allDataLines . shift ( ) . split ( opts . delimiter ) ;
89- if ( ! opts . rowAsObject ) dispatch ( 'header' , dataHeader ) ;
90127 }
91128
92- await postProcessColumns ( ) ;
129+ if ( dataHeader . length ) {
130+ dataHeader = handleHeader ( dataHeader ) ;
131+ if ( ! opts . rowAsObject && ! opts . skipLines ) dispatch ( 'header' , dataHeader ) ;
132+ }
133+
134+ await processRows ( ) ;
93135 dispatch ( 'finish' , null ) ;
94136 }
95137 } ;
@@ -98,18 +140,27 @@ const ProcessCSV = function (options) {
98140 * @desc Format data rows according to the given options
99141 * @returns {Array }
100142 */
101- const postProcessColumns = async function ( ) {
143+ const processRows = async function ( ) {
102144 let hold = ! opts . skipEmptyLines ? allDataLines : allDataLines . filter ( k => k != null && k !== '' ) ;
103145
104146 for ( let j = 0 ; j < hold . length ; j ++ ) {
105- // if paused stop the loop untill the user resumes
106147 if ( j > 0 && pause )
107148 await pauseLoop ( ) ;
108149
109150 let row = hold [ j ] . split ( opts . delimiter ) ;
110151
111- if ( opts . ltrim ) row [ 0 ] = row [ 0 ] . trim ( )
112- if ( opts . rtrim ) row [ row . length - 1 ] = row [ row . length - 1 ] . trim ( ) ;
152+ if ( opts . skipComments ) {
153+ const char = typeof opts . skipComments === 'string' ? opts . skipComments : '#' ;
154+ if ( row [ 0 ] [ 0 ] === char ) continue ;
155+ }
156+
157+ if ( opts . allowSpecialQuotes ) row = hanldeSpecialQuotes ( row ) ;
158+
159+ if ( handleRowErrors ( row , j + 1 ) ) return ;
160+
161+ if ( opts . ltrim ) row [ 0 ] = row [ 0 ] . replace ( / ^ \s + / , "" ) ;
162+ if ( opts . rtrim ) row [ row . length - 1 ] = row [ row . length - 1 ] . replace ( / $ \s + / , "" ) ;
163+ if ( opts . trim ) row = row . map ( col => col . trim ( ) ) ;
113164
114165 if ( opts . parseNumbers && row [ 0 ] !== '' ) row = parseRowNumbers ( row ) ;
115166 if ( opts . parseBooleans && row [ 0 ] !== '' ) row = parseRowBooleans ( row ) ;
@@ -124,6 +175,62 @@ const ProcessCSV = function (options) {
124175 return _this ;
125176 }
126177
178+ /**
179+ * @desc Hanldes special quotes column based in a row
180+ * @param {Array } row
181+ * @returns {Array }
182+ */
183+ const hanldeSpecialQuotes = function ( row ) {
184+ let result = [ ] ;
185+ let status = 0 ;
186+ for ( let i = 0 ; i < row . length ; i ++ ) {
187+ let cell = row [ i ] ;
188+ if ( cell [ 0 ] == opts . quote && cell [ cell . length - 1 ] !== opts . quote ) {
189+ result . push ( cell . slice ( 1 ) ) ;
190+ status = 1 ;
191+ } else if ( cell [ 0 ] !== opts . quotecell && [ cell . length - 1 ] == opts . quote ) {
192+ result [ ( result . length - 1 ) ] += " " + ( cell . slice ( 0 , - 1 ) ) ;
193+ status = 0 ;
194+ } else if ( status == 1 ) {
195+ result [ result . length - 1 ] += ( cell ) ;
196+ status = 1 ;
197+ } else {
198+ result . push ( cell ) ;
199+ status = 0 ;
200+ }
201+ }
202+
203+ return result ;
204+ }
205+
206+ /**
207+ * @desc Handles any erros in a row
208+ * @param {Array } row
209+ * @returns {boolean }
210+ */
211+ const handleRowErrors = function ( row , index ) {
212+ let skip = true ;
213+ if ( opts . skipLines )
214+ skip = opts . skipLines > ( index - 1 ) ;
215+
216+ if ( skip && opts . strict && row . length !== dataHeader . length ) {
217+ console . log ( "header length:" , dataHeader . length )
218+ console . log ( "row length:" , row . length )
219+ logError ( 'e' , `Row length does not match headers(in file row number ${ index } ).` ) ;
220+ dispatch ( 'error' , throwError ( `Row length does not match headers(in file row number ${ index } ).` ) ) ;
221+ return true ;
222+ } else {
223+ const str = JSON . stringify ( row . join ( '' ) ) . replace ( / [ [ ] , " " ] / g, '' ) ;
224+ if ( opts . maxRowBytes < str . length ) {
225+ logError ( 'e' , `Maximum row size has been exceeded(in file row number ${ index } ).` ) ;
226+ dispatch ( 'error' , throwError ( `Maximum row size has been exceeded(in file row number ${ index } ).` ) ) ;
227+ return true ;
228+ }
229+ }
230+
231+ return false ;
232+ }
233+
127234 /**
128235 * @desc Parse any 'true' or 'false' string in a row to boolean
129236 * @param {Array } row
@@ -142,7 +249,7 @@ const ProcessCSV = function (options) {
142249 }
143250
144251 /**
145- * @desc Parse any numbers in a row to numbers(float)
252+ * @desc Parse any string numbers in a row to numbers(float)
146253 * @param {Array } row
147254 * @returns {Array }
148255 */
@@ -176,36 +283,45 @@ const ProcessCSV = function (options) {
176283 const errorHandler = function ( event ) {
177284 if ( event . target . error . name == "NotReadableError" ) {
178285 logError ( 'e' , 'Cannot read file!' ) ;
179- dispatch ( 'error' , new Error ( 'Cannot read file!' ) ) ;
286+ dispatch ( 'error' , throwError ( 'Cannot read file!' ) ) ;
180287
181288 return _this ;
182289 }
183290 } ;
184291
185292 /**
186293 * @desc logs an error to the console
187- * @param {String } type - e = error | w = warning
188- * @param {String } message
294+ * @param {string } type - e = error | w = warning
295+ * @param {string } message
189296 */
190297 const logError = function ( type , message ) {
191- if ( ! errorLog ) return ;
298+ if ( ! opts . errorLog ) return ;
192299
193300 type === "e" ? console . error ( message ) : console . warn ( message ) ;
194301 }
195302
196303 /**
197- * @desc Handles a specific file => this is the starting point of the ProcessCSV
304+ * @desc Returns a new error instance
305+ * @param {string } message
306+ * @returns {Object }
307+ */
308+ const throwError = function ( message ) {
309+ return new Error ( message ) ;
310+ }
311+
312+ /**
313+ * @desc Handles a specific file
198314 * @param {Object } file
199315 */
200- this . processFile = function ( file ) {
316+ this . process = function ( file ) {
201317 if ( file )
202318 if ( window . FileReader ) {
203319 readAsText ( file ) ;
204320
205321 return _this ;
206322 } else {
207323 logError ( 'e' , 'FileReader are not supported in this browser, please switch to a different browser.' ) ;
208- dispatch ( 'error' , new Error ( 'FileReader are not supported in this browser, please switch to a different browser.' ) ) ;
324+ dispatch ( 'error' , throwError ( 'FileReader are not supported in this browser, please switch to a different browser.' ) ) ;
209325
210326 return _this ;
211327 }
@@ -237,23 +353,32 @@ const ProcessCSV = function (options) {
237353 * @desc Resume when ready to receive and process more rows.
238354 */
239355 this . resume = function ( ) {
240- dispatch ( 'resume-row' , 'Receiving more rows...' ) ;
356+ setTimeout ( ( ) => dispatch ( 'resume-row' , 'Receiving more rows...' ) , 0 ) ;
241357 } ;
242358
359+ /**
360+ * @desc Ends processing
361+ */
362+ this . end = function ( ) {
363+ ended = true ;
364+ }
365+
243366 /**
244367 * @desc Dispatch a custom event
245- * @param {String } name - event name
368+ * @param {string } name - event name
246369 * @param {* } event - contains the data passed to the event
247370 */
248371 const dispatch = function ( name , event ) {
249- let callbacks = _this [ name ] ;
250- if ( callbacks ) callbacks . forEach ( callback => callback ( event ) ) ;
372+ if ( ! ended ) {
373+ let callbacks = _this [ name ] ;
374+ if ( callbacks ) callbacks . forEach ( callback => callback ( event ) ) ;
375+ } else return ;
251376 } ;
252377
253378 /**
254379 * @desc Listen to a specific event
255- * @param {String } name - event name
256- * @param {Function } callback - callback function
380+ * @param {string } name - event name
381+ * @param {function } callback - callback function
257382 */
258383 this . on = function ( name , callback ) {
259384 let callbacks = this [ name ] ;
0 commit comments