You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
3.0 KiB
136 lines
3.0 KiB
import { Stream } from "./stream.js"; |
|
import { readFile } from "fs/promises"; |
|
|
|
type Options<T> = { |
|
quotes: string[]; |
|
delimeter: string; |
|
headers: T; |
|
}; |
|
|
|
export class CSVParser<T extends true | false = false> { |
|
quotes: string[] = ['"', "'"]; |
|
delimeter: string = ","; |
|
headers: boolean = false; |
|
|
|
constructor(options: Partial<Options<T>> = {}) { |
|
Object.assign(this, options); |
|
} |
|
|
|
async parseFile(filename: string, encoding: BufferEncoding = "utf-8") { |
|
const data = await readFile(filename, { |
|
encoding, |
|
}); |
|
return this.parse(data); |
|
} |
|
|
|
parse(str: string) { |
|
return this.parseStream(new Stream(str)); |
|
} |
|
|
|
private parseStream( |
|
stream: Stream |
|
): T extends true ? Array<{ [key: string]: string }> : string[] { |
|
const lines = []; |
|
let firstLine: string[] | undefined; |
|
|
|
while (!stream.eof) { |
|
const line = this.parseLine(stream); |
|
|
|
if (this.headers) { |
|
if (firstLine !== undefined) { |
|
const headers = firstLine; |
|
lines.push( |
|
line.reduce((obj: { [key: string]: string }, value, index) => { |
|
obj[ |
|
index < headers.length ? headers[index] : `column_${index + 1}` |
|
] = value; |
|
return obj; |
|
}, {}) |
|
); |
|
} else { |
|
firstLine = line; |
|
} |
|
} else { |
|
lines.push(line); |
|
} |
|
|
|
if (stream.eof) break; |
|
|
|
const next = stream.peek(); |
|
if (next === "\r") { |
|
stream.read(); |
|
if (stream.peek() === "\n") { |
|
stream.read(); |
|
} |
|
continue; |
|
} else if (next === "\n") { |
|
stream.read(); |
|
continue; |
|
} |
|
} |
|
|
|
return lines as T extends true |
|
? Array<{ [key: string]: string }> |
|
: string[]; |
|
} |
|
|
|
private parseLine(stream: Stream) { |
|
const values = []; |
|
|
|
while (!stream.eof) { |
|
values.push(this.parseValue(stream)); |
|
|
|
if (stream.eof) break; |
|
|
|
const next = stream.peek(); |
|
|
|
if (next === this.delimeter) { |
|
stream.read(); |
|
} else if (["\r", "\n"].includes(next)) { |
|
break; |
|
} else { |
|
stream.panic(`Unexpected character: "${next}"`); |
|
} |
|
} |
|
|
|
return values; |
|
} |
|
|
|
private parseValue(stream: Stream) { |
|
let str = ""; |
|
|
|
const spaces = stream.readSpaces(); |
|
|
|
const next = stream.peek(); |
|
|
|
if (this.quotes.includes(next)) { |
|
str += this.parseQuotedString(stream); |
|
// Read and discard trailing spaces |
|
stream.readSpaces(); |
|
} else { |
|
// If a string is not quoted, spaces are assumed to be part of the value |
|
str += spaces + stream.readUntil("\r", "\n", this.delimeter); |
|
} |
|
|
|
return str; |
|
} |
|
|
|
private parseQuotedString(stream: Stream) { |
|
const end = stream.read(); |
|
let str = ""; |
|
|
|
while (!stream.eof) { |
|
const chunk = stream.readUntil(end); |
|
str += chunk; |
|
stream.read(); |
|
if (stream.peek() === end) { |
|
stream.read(); |
|
str += end; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return str; |
|
} |
|
}
|
|
|