feat: support for multiple encodings in CSV files (#5756)

This commit is contained in:
Ariel Leyva
2026-02-14 01:37:28 -05:00
committed by GitHub
parent 88b97def9e
commit f67bccf8c5
11 changed files with 276 additions and 321 deletions

View File

@@ -1,64 +0,0 @@
export interface CsvData {
headers: string[];
rows: string[][];
}
/**
* Parse CSV content into headers and rows
* Supports quoted fields and handles commas within quotes
*/
export function parseCSV(
content: string,
columnSeparator: Array<string>
): CsvData {
if (!content || content.trim().length === 0) {
return { headers: [], rows: [] };
}
const lines = content.split(/\r?\n/);
const result: string[][] = [];
for (const line of lines) {
if (line.trim().length === 0) continue;
const row: string[] = [];
let currentField = "";
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
const nextChar = line[i + 1];
if (char === '"') {
if (inQuotes && nextChar === '"') {
// Escaped quote
currentField += '"';
i++; // Skip next quote
} else {
// Toggle quote state
inQuotes = !inQuotes;
}
} else if (columnSeparator.includes(char) && !inQuotes) {
// Field separator
row.push(currentField);
currentField = "";
} else {
currentField += char;
}
}
// Add the last field
row.push(currentField);
result.push(row);
}
if (result.length === 0) {
return { headers: [], rows: [] };
}
// First row is headers
const headers = result[0];
const rows = result.slice(1);
return { headers, rows };
}

View File

@@ -0,0 +1,95 @@
export const availableEncodings = [
"utf-8",
"ibm866",
"iso-8859-2",
"iso-8859-3",
"iso-8859-4",
"iso-8859-5",
"iso-8859-6",
"iso-8859-7",
"iso-8859-8",
"iso-8859-8-i",
"iso-8859-10",
"iso-8859-13",
"iso-8859-14",
"iso-8859-15",
"iso-8859-16",
"koi8-r",
"koi8-u",
"macintosh",
"windows-874",
"windows-1250",
"windows-1251",
"windows-1252",
"windows-1253",
"windows-1254",
"windows-1255",
"windows-1256",
"windows-1257",
"windows-1258",
"x-mac-cyrillic",
"gbk",
"gb18030",
"big5",
"euc-jp",
"iso-2022-jp",
"shift_jis",
"euc-kr",
"utf-16be",
"utf-16le",
];
export function decode(content: ArrayBuffer, encoding: string): string {
const decoder = new TextDecoder(encoding);
return decoder.decode(content);
}
export function isEncodableResponse(url: string): boolean {
const extensions = [".csv"];
if (typeof TextDecoder === "undefined") {
return false;
}
for (const extension of extensions) {
if (url.endsWith(extension)) {
return true;
}
}
return false;
}
export async function makeRawResource(
res: Response,
url: string
): Promise<Resource> {
const buffer = await res.arrayBuffer();
return {
items: [],
numDirs: 0,
numFiles: 0,
sorting: {} as Sorting,
index: 0,
extension: getExtension(url),
isDir: false,
isSymlink: false,
path: url,
size: buffer.byteLength,
modified: new Date().toISOString(),
name: url.split("/").pop() || "",
type: "text",
mode: 0,
url: `/files${url}`,
rawContent: buffer,
content: decode(buffer, "utf-8"),
};
}
function getExtension(url: string): string {
const lastDotIndex = url.lastIndexOf(".");
if (lastDotIndex === -1) {
return "";
}
return url.substring(lastDotIndex);
}