You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
137 lines
3.0 KiB
137 lines
3.0 KiB
2 years ago
|
import { Stream } from "./stream.js";
|
||
|
import { readFile } from "fs/promises";
|
||
|
|
||
|
type Options<T> = {
|
||
|
quotes: string[];
|
||
|
delimeter: string;
|
||
|
headers: T;
|
||
|
};
|
||
|
|
||
|
export class CSVParser<T extends true | false = false> {
|
||
|
quotes: string[] = ['"', "'"];
|
||
|
delimeter: string = ",";
|
||
|
headers: boolean = false;
|
||
|
|
||
|
constructor(options: Partial<Options<T>> = {}) {
|
||
|
Object.assign(this, options);
|
||
|
}
|
||
|
|
||
|
async parseFile(filename: string, encoding: BufferEncoding = "utf-8") {
|
||
|
const data = await readFile(filename, {
|
||
|
encoding,
|
||
|
});
|
||
|
return this.parse(data);
|
||
|
}
|
||
|
|
||
|
parse(str: string) {
|
||
|
return this.parseStream(new Stream(str));
|
||
|
}
|
||
|
|
||
|
private parseStream(
|
||
|
stream: Stream
|
||
|
): T extends true ? Array<{ [key: string]: string }> : string[] {
|
||
|
const lines = [];
|
||
|
let firstLine: string[] | undefined;
|
||
|
|
||
|
while (!stream.eof) {
|
||
|
const line = this.parseLine(stream);
|
||
|
|
||
|
if (this.headers) {
|
||
|
if (firstLine !== undefined) {
|
||
|
const headers = firstLine;
|
||
|
lines.push(
|
||
|
line.reduce((obj: { [key: string]: string }, value, index) => {
|
||
|
obj[
|
||
|
index < headers.length ? headers[index] : `column_${index + 1}`
|
||
|
] = value;
|
||
|
return obj;
|
||
|
}, {})
|
||
|
);
|
||
|
} else {
|
||
|
firstLine = line;
|
||
|
}
|
||
|
} else {
|
||
|
lines.push(line);
|
||
|
}
|
||
|
|
||
|
if (stream.eof) break;
|
||
|
|
||
|
const next = stream.peek();
|
||
|
if (next === "\r") {
|
||
|
stream.read();
|
||
|
if (stream.peek() === "\n") {
|
||
|
stream.read();
|
||
|
}
|
||
|
continue;
|
||
|
} else if (next === "\n") {
|
||
|
stream.read();
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return lines as T extends true
|
||
|
? Array<{ [key: string]: string }>
|
||
|
: string[];
|
||
|
}
|
||
|
|
||
|
private parseLine(stream: Stream) {
|
||
|
const values = [];
|
||
|
|
||
|
while (!stream.eof) {
|
||
|
values.push(this.parseValue(stream));
|
||
|
|
||
|
if (stream.eof) break;
|
||
|
|
||
|
const next = stream.peek();
|
||
|
|
||
|
if (next === this.delimeter) {
|
||
|
stream.read();
|
||
|
} else if (["\r", "\n"].includes(next)) {
|
||
|
break;
|
||
|
} else {
|
||
|
stream.panic(`Unexpected character: "${next}"`);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return values;
|
||
|
}
|
||
|
|
||
|
private parseValue(stream: Stream) {
|
||
|
let str = "";
|
||
|
|
||
|
const spaces = stream.readSpaces();
|
||
|
|
||
|
const next = stream.peek();
|
||
|
|
||
|
if (this.quotes.includes(next)) {
|
||
|
str += this.parseQuotedString(stream);
|
||
|
// Read and discard trailing spaces
|
||
|
stream.readSpaces();
|
||
|
} else {
|
||
|
// If a string is not quoted, spaces are assumed to be part of the value
|
||
|
str += spaces + stream.readUntil("\r", "\n", this.delimeter);
|
||
|
}
|
||
|
|
||
|
return str;
|
||
|
}
|
||
|
|
||
|
private parseQuotedString(stream: Stream) {
|
||
|
const end = stream.read();
|
||
|
let str = "";
|
||
|
|
||
|
while (!stream.eof) {
|
||
|
const chunk = stream.readUntil(end);
|
||
|
str += chunk;
|
||
|
stream.read();
|
||
|
if (stream.peek() === end) {
|
||
|
stream.read();
|
||
|
str += end;
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return str;
|
||
|
}
|
||
|
}
|