8000 Master merge by steveoni · Pull Request #478 · javascriptdata/danfojs · GitHub
[go: up one dir, main page]

Skip to content

Master merge #478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4161345
Update frame.ts
kgeis Jul 18, 2022
507195e
chore(deps): bump terser from 5.10.0 to 5.14.2 in /src/danfojs-base
dependabot[bot] Jul 20, 2022
b71ff69
chore(deps): bump terser from 5.11.0 to 5.14.2 in /src/danfojs-browser
dependabot[bot] Jul 20, 2022
817179b
Merge pull request #469 from kgeis/patch-1
steveoni Jul 29, 2022
9f2d088
chore(deps): bump shell-quote in /src/danfojs-browser
dependabot[bot] Jul 29, 2022
b8892c9
Allow Series.append() to use zero
BowTiedAztec Aug 17, 2022
485ba90
Merge pull request #477 from javascriptdata/dependabot/npm_and_yarn/s…
risenW Aug 19, 2022
dc1e29c
Merge pull request #474 from javascriptdata/dependabot/npm_and_yarn/s…
risenW Aug 19, 2022
756c200
Merge pull request #473 from javascriptdata/dependabot/npm_and_yarn/s…
risenW Aug 19, 2022
aaf6685
Merge pull request #487 from BowTiedAztec/dev
risenW Aug 19, 2022
237f5af
Add error handler for io functions
risenW Sep 23, 2022
13bb9cd
fix typing error
risenW Sep 23, 2022
d44c396
disabling test for online csvs
risenW Sep 23, 2022
edfe669
update stream start index
risenW Sep 25, 2022
7c7afbf
Merge pull request #503 from javascriptdata/chore/add-error-handler-f…
risenW Sep 25, 2022
7bafc9a
chore(deps): bump json-schema and jsprim in /src/danfojs-node
dependabot[bot] Sep 25, 2022
201ddf7
Merge pull request #504 from javascriptdata/dependabot/npm_and_yarn/s…
risenW Sep 25, 2022
e3f3579
add support for excel parsing options arg
risenW Sep 25, 2022
ca671ca
update types
risenW Sep 25, 2022
6d06a5c
Merge pull request #505 from javascriptdata/463-cant-parse-dates-from…
risenW Sep 25, 2022
039e518
Merge branch 'master' of https://github.com/javascriptdata/danfojs
risenW Sep 25, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/danfojs-base/core/frame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2523,7 +2523,7 @@ export default class DataFrame extends NDframe implements DataFrameInterface {
* Objects passed to the function are Series values whose
* index is either the DataFrame’s index (axis=0) or the DataFrame’s columns (axis=1)
* @param callable Function to apply to each column or row.
* @param options.axis 0 or 1. If 0, compute the power column-wise, if 1, row-wise
* @param options.axis 0 or 1. If 0, apply "callable" column-wise, else apply row-wise
*
* @example
* ```
Expand Down
4 changes: 2 additions & 2 deletions src/danfojs-base/core/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1855,8 +1855,8 @@ export default class Series extends NDframe implements SeriesInterface {
): Series | void {
const { inplace } = { inplace: false, ...options }

if (!newValue && typeof newValue !== "boolean") {
throw Error("Param Error: newValues cannot be null or undefined");
if (!newValue && typeof newValue !== "boolean" && typeof newValue !== "number") {
throw Error("Param Error: newValue cannot be null or undefined");
}

if (!index) {
Expand Down
4 changes: 2 additions & 2 deletions src/danfojs-base/io/browser/io.csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ const $streamCSV = async (file: string, callback: (df: DataFrame) => void, optio
const frameConfig = options?.frameConfig || {}

return new Promise(resolve => {
let count = -1
let count = 0
Papa.parse(file, {
...options,
dynamicTyping: true,
Expand Down Expand Up @@ -174,4 +174,4 @@ export {
$readCSV,
$streamCSV,
$toCSV,
}
}
22 changes: 16 additions & 6 deletions src/danfojs-base/io/browser/io.excel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ import {
* ```
*/
const $readExcel = async (file: any, options?: ExcelInputOptionsBrowser) => {
const { sheet, method, headers, frameConfig } = { sheet: 0, method: "GET", headers: {}, frameConfig: {}, ...options }
const {
sheet,
method,
headers,
frameConfig,
parsingOptions
} = { sheet: 0, method: "GET", headers: {}, frameConfig: {}, parsingOptions: {}, ...options }

if (typeof file === "string" && file.startsWith("http")) {

Expand All @@ -60,7 +66,7 @@ const $readExcel = async (file: any, options?: ExcelInputOptionsBrowser) => {
}
response.arrayBuffer().then(arrBuf => {
const arrBufInt8 = new Uint8Array(arrBuf);
const workbook = read(arrBufInt8, { type: "array" })
const workbook = read(arrBufInt8, { type: "array", ...parsingOptions });
const worksheet = workbook.Sheets[workbook.SheetNames[sheet]];
const data = utils.sheet_to_json(worksheet);
const df = new DataFrame(data, frameConfig);
Expand All @@ -74,7 +80,7 @@ const $readExcel = async (file: any, options?: ExcelInputOptionsBrowser) => {
} else if (file instanceof File) {
const arrBuf = await file.arrayBuffer()
const arrBufInt8 = new Uint8Array(arrBuf);
const workbook = read(arrBufInt8, { type: "array" })
const workbook = read(arrBufInt8, { type: "array", ...parsingOptions });
const worksheet = workbook.Sheets[workbook.SheetNames[sheet]];
const data = utils.sheet_to_json(worksheet);
const df = new DataFrame(data, frameConfig);
Expand All @@ -101,7 +107,11 @@ const $readExcel = async (file: any, options?: ExcelInputOptionsBrowser) => {
* ```
*/
const $toExcel = (df: NDframe | DataFrame | Series, options?: ExcelOutputOptionsBrowser) => {
let { fileName, sheetName } = { fileName: "./output.xlsx", sheetName: "Sheet1", ...options }
let {
fileName,
sheetName,
writingOptions
} = { fileName: "./output.xlsx", sheetName: "Sheet1", ...options }

if (!(fileName.endsWith(".xlsx"))) {
fileName = fileName + ".xlsx"
Expand All @@ -121,10 +131,10 @@ const $toExcel = (df: NDframe | DataFrame | Series, options?: ExcelOutputOptions
const worksheet = utils.aoa_to_sheet(data);
const wb = utils.book_new();
utils.book_append_sheet(wb, worksheet, sheetName);
writeFile(wb, `${fileName}`)
writeFile(wb, `${fileName}`, writingOptions)
};

export {
$readExcel,
$toExcel
}
}
204 changes: 127 additions & 77 deletions src/danfojs-base/io/node/io.csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis
const frameConfig = options?.frameConfig || {}

if (filePath.startsWith("http") || filePath.startsWith("https")) {
return new Promise(resolve => {
return new Promise((resolve, reject) => {
const optionsWithDefaults = {
header: true,
dynamicTyping: true,
Expand All @@ -60,6 +60,13 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis
}

const dataStream = request.get(filePath);
// reject any non-2xx status codes
dataStream.on('response', (response: any) => {
if (response.statusCode < 200 || response.statusCode >= 300) {
reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
}
});

const parseStream: any = Papa.parse(Papa.NODE_STREAM_INPUT, optionsWithDefaults as any);
dataStream.pipe(parseStream);

Expand All @@ -74,17 +81,24 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis
});

} else {
return new Promise(resolve => {
const fileStream = fs.createReadStream(filePath)
Papa.parse(fileStream, {
header: true,
dynamicTyping: true,
...options,
complete: results => {
const df = new DataFrame(results.data, frameConfig);
resolve(df);
return new Promise((resolve, reject) => {
fs.access(filePath, fs.constants.F_OK, (err) => {
if (err) {
reject("ENOENT: no such file or directory");
}
});

const fileStream = fs.createReadStream(filePath)

Papa.parse(fileStream, {
header: true,
dynamicTyping: true,
...options,
complete: results => {
const df = new DataFrame(results.data, frameConfig);
resolve(df);
}
});
})
});
}
};
Expand Down Expand Up @@ -113,9 +127,17 @@ const $streamCSV = async (filePath: string, callback: (df: DataFrame) => void, o
dynamicTyping: true,
...options,
}
return new Promise(resolve => {
let count = -1
return new Promise((resolve, reject) => {
let count = 0
const dataStream = request.get(filePath);

// reject any non-2xx status codes
dataStream.on('response', (response: any) => {
if (response.statusCode < 200 || response.statusCode >= 300) {
reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
}
});

const parseStream: any = Papa.parse(Papa.NODE_STREAM_INPUT, optionsWithDefaults);
dataStream.pipe(parseStream);

Expand All @@ -130,19 +152,26 @@ const $streamCSV = async (filePath: string, callback: (df: DataFrame) => void, o

});
} else {
const fileStream = fs.createReadStream(filePath)

return new Promise(resolve => {
let count = -1
Papa.parse(fileStream, {
header: true,
dynamicTyping: true,
...options,
step: results => {
const df = new DataFrame([results.data], { ...frameConfig, index: [count++] });
callback(df);
},
complete: () => resolve(null)
return new Promise((resolve, reject) => {
fs.access(filePath, fs.constants.F_OK, (err) => {
if (err) {
reject("ENOENT: no such file or directory");
}

const fileStream = fs.createReadStream(filePath)

let count = 0
Papa.parse(fileStream, {
header: true,
dynamicTyping: true,
...options,
step: results => {
const df = new DataFrame([results.data], { ...frameConfig, index: [count++] });
callback(df);
},
complete: () => resolve(null)
});
});
});
}
Expand Down Expand Up @@ -228,9 +257,17 @@ const $openCsvInputStream = (filePath: string, options: CsvInputOptionsNode) =>

if (filePath.startsWith("http") || filePath.startsWith("https")) {
const dataStream = request.get(filePath);

// reject any non-2xx status codes
dataStream.on('response', (response: any) => {
if (response.statusCode < 200 || response.statusCode >= 300) {
throw new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`);
}
});

const parseStream: any = Papa.parse(Papa.NODE_STREAM_INPUT, { header, dynamicTyping: true, ...options });
dataStream.pipe(parseStream);
let count = -1
let count = 0

parseStream.on("data", (chunk: any) => {
if (isFirstChunk) {
10000 Expand Down Expand Up @@ -258,37 +295,44 @@ const $openCsvInputStream = (filePath: string, options: CsvInputOptionsNode) =>
return csvInputStream;
} else {
const fileStream = fs.createReadStream(filePath)
let count = -1
Papa.parse(fileStream, {
...{ header, dynamicTyping: true, ...options },
step: results => {
if (isFirstChunk) {
if (header === true) {
ndFrameColumnNames = results.meta.fields || []
} else {
ndFrameColumnNames = results.data

fs.access(filePath, fs.constants.F_OK, (err) => {
if (err) {
throw new Error("ENOENT: no such file or directory");
}

let count = 0
Papa.parse(fileStream, {
...{ header, dynamicTyping: true, ...options },
step: results => {
if (isFirstChunk) {
if (header === true) {
ndFrameColumnNames = results.meta.fields || []
} else {
ndFrameColumnNames = results.data
}
isFirstChunk = false
return
}
isFirstChunk = false
return

const df = new DataFrame([results.data], {
columns: ndFrameColumnNames,
index: [count++]
})

csvInputStream.push(df);
},
complete: (result: any) => {
csvInputStream.push(null);
return null
},
error: (err) => {
csvInputStream.emit("error", err);
}
});

const df = new DataFrame([results.data], {
columns: ndFrameColumnNames,
index: [count++]
})

csvInputStream.push(df);
},
complete: (result: any) => {
csvInputStream.push(null);
return null
},
error: (err) => {
csvInputStream.emit("error", err);
}
return csvInputStream;
});

return csvInputStream;
}
};

Expand All @@ -314,39 +358,45 @@ const $openCsvInputStream = (filePath: string, options: CsvInputOptionsNode) =>
* ```
*/
const $writeCsvOutputStream = (filePath: string, options: CsvInputOptionsNode) => {
let isFirstRow = true
const fileOutputStream = fs.createWriteStream(filePath)
const csvOutputStream = new stream.Writable({ objectMode: true })
fs.access(filePath, fs.constants.F_OK, (err) => {
if (err) {
throw new Error("ENOENT: no such file or directory");
}

let isFirstRow = true
const fileOutputStream = fs.createWriteStream(filePath)
const csvOutputStream = new stream.Writable({ objectMode: true })

csvOutputStream._write = (chunk: DataFrame | Series, encoding, callback) => {
csvOutputStream._write = (chunk: DataFrame | Series, encoding, callback) => {

if (chunk instanceof DataFrame) {
if (chunk instanceof DataFrame) {

if (isFirstRow) {
isFirstRow = false
fileOutputStream.write($toCSV(chunk, { header: true, ...options }));
if (isFirstRow) {
isFirstRow = false
fileOutputStream.write($toCSV(chunk, { header: true, ...options }));
callback();
} else {
fileOutputStream.write($toCSV(chunk, { header: false, ...options }));
callback();
}

} else if (chunk instanceof Series) {

fileOutputStream.write($toCSV(chunk));
callback();

} else {
fileOutputStream.write($toCSV(chunk, { header: false, ...options }));
callback();
csvOutputStream.emit("error", new Error("ValueError: Intermediate chunk must be either a Series or DataFrame"))
}

} else if (chunk instanceof Series) {

fileOutputStream.write($toCSV(chunk));
callback();

} else {
csvOutputStream.emit("error", new Error("ValueError: Intermediate chunk must be either a Series or DataFrame"))
}

}

csvOutputStream.on("finish", () => {
fileOutputStream.end()
})
csvOutputStream.on("finish", () => {
fileOutputStream.end()
})

return csvOutputStream
return csvOutputStream
});
}


Expand All @@ -358,4 +408,4 @@ export {
$toCSV,
$writeCsvOutputStream,
$openCsvInputStream,
}
}
Loading
0