mafintosh · czaefferer · Feb 18, 2019 · Feb 18, 2019 · shellscape · Jul 5, 2019
diff --git a/index.js b/index.js
@@ -153,7 +153,7 @@ class CsvParser extends Transform {
     }
 
     for (let i = start; i < end; i++) {
-      const isStartingQuote = !isQuoted && buf[i] === this.quote
+      const isStartingQuote = !isQuoted && buf[i] === this.quote && (i === start || buf[i - 1] === comma)
       const isEndingQuote = isQuoted && buf[i] === this.quote && i + 1 <= end && buf[i + 1] === comma
       const isEscape = isQuoted && buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote
 
@@ -222,8 +222,10 @@ class CsvParser extends Transform {
     const bufLen = buf.length
 
     for (let i = start; i < bufLen; i++) {
+      const prevChr = i > 0 ? buf[i - 1] : null
       const chr = buf[i]
       const nextChr = i + 1 < bufLen ? buf[i + 1] : null
+      const nextNextChr = i + 2 < bufLen ? buf[i + 2] : null
 
       this._currentRowBytes++
       if (this._currentRowBytes > this.maxRowBytes) {
@@ -237,10 +239,19 @@ class CsvParser extends Transform {
         if (this._escaped) {
           this._escaped = false
           // non-escaped quote (quoting the cell)
+          continue
         } else {
-          this._quoted = !this._quoted
+          // not in escape- or quote-mode, currently at start or previous char was separator or linebreak -> enter quote mode
+          if (!this._quoted && (prevChr === null || prevChr === this.separator || prevChr === nl || prevChr === this.newline)) {
+            this._quoted = true
+            continue
+          }
+          // in quote-mode but not escape-mode, next char is separator or linebreak -> leave quote mode
+          if (this._quoted && (nextChr === this.separator || (this.customNewline ? nextChr === this.newline : nextChr === nl || (nextChr === cr && nextNextChr === nl)))) {
+            this._quoted = false
+            continue
+          }
         }
-        continue
       }
 
       if (!this._quoted) {

diff --git a/test/data/unescaped_quotes.csv b/test/data/unescaped_quotes.csv
@@ -0,0 +1,11 @@
+a,b,c
+jo"e,sam,jan
+"jo"e",sam,jan
+joe,sa"m,jan
+joe,"sa"m",jan
+joe,sam,ja"n
+joe,sam,"ja"n"
+joe,"sa
+"m",jan
+joe,crlf,"jan"
+joe,sam,"ja"n"
diff --git a/test/maxRowBytes.test.js b/test/maxRowBytes.test.js
@@ -5,9 +5,9 @@ const { collect } = require('./helpers/helper')
 test.cb('optional row size limit', (t) => {
   const verify = (err, lines) => {
     t.is(err.message, 'Row exceeds the maximum size', 'strict row size')
-    t.is(lines.length, 4576, '4576 rows before error')
+    t.is(lines.length, 13, '13 rows before error')
     t.end()
   }
 
-  collect('max_row_size.csv', { maxRowBytes: 200 }, verify)
+  collect('max_row_size.csv', { maxRowBytes: 170 }, verify)
 })
diff --git a/test/snapshots/test.js.md b/test/snapshots/test.js.md
@@ -504,4 +504,79 @@ Generated by [AVA](https://ava.li).
 
 > Snapshot 1
 
-    []
+    [
+
+## cell with unescaped quotes
+
+> first row
+
+    Row {
+      a: 'jo"e',
+      b: 'sam',
+      c: 'jan',
+    }
+
+> second row
+
+    Row {
+      a: 'jo"e',
+      b: 'sam',
+      c: 'jan',
+    }
+
+> third row
+
+    Row {
+      a: 'joe',
+      b: 'sa"m',
+      c: 'jan',
+    }
+
+> fourth row
+
+    Row {
+      a: 'joe',
+      b: 'sa"m',
+      c: 'jan',
+    }
+
+> fifth row
+
+    Row {
+      a: 'joe',
+      b: 'sam',
+      c: 'ja"n',
+    }
+
+> sixth row
+
+    Row {
+      a: 'joe',
+      b: 'sam',
+      c: 'ja"n',
+    }
+
+> seventh row
+
+    Row {
+      a: 'joe',
+      b: `sa␊
+      "m`,
+      c: 'jan',
+    }
+
+> eighth row
+
+    Row {
+      a: 'joe',
+      b: 'crlf',
+      c: 'jan',
+    }
+
+> ninth row
+
+    Row {
+      a: 'joe',
+      b: 'sam',
+      c: 'ja"n',
+    }
diff --git a/test/snapshots/test.js.snap b/test/snapshots/test.js.snap
diff --git a/test/test.js b/test/test.js
@@ -145,6 +145,26 @@ test.cb('cell with newline', (t) => {
   })
 })
 
+test.cb('cell with unescaped quotes', (t) => {
+  const verify = (err, lines) => {
+    // console.log(lines);
+    t.false(err, 'no err')
+    t.snapshot(lines[0], 'first row')
+    t.snapshot(lines[1], 'second row')
+    t.snapshot(lines[2], 'third row')
+    t.snapshot(lines[3], 'fourth row')
+    t.snapshot(lines[4], 'fifth row')
+    t.snapshot(lines[5], 'sixth row')
+    t.snapshot(lines[6], 'seventh row')
+    t.snapshot(lines[7], 'eighth row')
+    t.snapshot(lines[8], 'ninth row')
+    t.is(lines.length, 9, '9 rows')
+    t.end()
+  }
+
+  collect('unescaped_quotes.csv', verify)
+})
+
 test.cb('cell with escaped quote in quotes', (t) => {
   const headers = bops.from('a\n')
   const cell = bops.from('"ha ""ha"" ha"\n')