https://github.com/ffalt/xlsx-extract
nodejs lib for extracting data from XLSX files
https://github.com/ffalt/xlsx-extract
extracting-data node-module xlsx
Last synced: 7 months ago
JSON representation
nodejs lib for extracting data from XLSX files
- Host: GitHub
- URL: https://github.com/ffalt/xlsx-extract
- Owner: ffalt
- License: mit
- Created: 2013-11-29T09:55:43.000Z (over 12 years ago)
- Default Branch: main
- Last Pushed: 2023-03-17T12:44:15.000Z (about 3 years ago)
- Last Synced: 2024-11-08T07:34:58.208Z (over 1 year ago)
- Topics: extracting-data, node-module, xlsx
- Language: TypeScript
- Homepage:
- Size: 2.41 MB
- Stars: 44
- Watchers: 5
- Forks: 17
- Open Issues: 2
-
Metadata Files:
- Readme: README.md
- Changelog: CHANGELOG.md
- License: LICENSE
Awesome Lists containing this project
README
# xlsx-extract
extracts data from XLSX files with low memory footprint
xlsx-files can be pretty big, so nodejs & full featured xlsx-modules can reach memory limits or just use more than is needed for that task. (--max-old-space-size & --stack_size can't help you all the time either)
hence these magnificent features:
- files are parsed with sax parser `sax` or `node-expat`
- get rows/cells each by callback or write them to a .tsv or .json file
[](https://nodei.co/npm/xlsx-extract/)

[](http://opensource.org/licenses/MIT)
[](https://snyk.io/test/github/ffalt/xlsx-extract)
[](https://www.codacy.com/app/ffalt/xlsx-extract)
[](https://badgen.net/npm/dt/xlsx-extract)
## Install
```
npm install xlsx-extract
```
The XML files of the format are parsed with [sax-js](https://github.com/isaacs/sax-js) by default.
If you want to use the faster [node-expat](https://github.com/astro/node-expat) parser please install it manually and use the {parser:"expat"} option. (Needs native compiling on the destination system)
```
npm install node-expat
```
## Options
```
interface IXLSXExtractOptions {
// sheet selection (provide one of the following)
sheet_name?: string; // select by sheet name
sheet_nr?: string; // default "1" - select by number of the sheet starting on 1
sheet_id?: string; // select by sheet id, e.g. "1"
sheet_rid?: string; // select by internal sheet rid, e.g. "rId1'
sheet_all?: boolean; // default false - select all sheets
// sax parser selection
parser?: string; // default "sax" - 'sax'|'expat'
// row selection
ignore_header?: number; // default 0 - the number of header lines to ignore
include_empty_rows?: boolean; // default false - include empty rows in the middle/at start
// how to output sheet, rows and cells
format?: string; // default array - convert to 'array'||'json'||'tsv'||'obj'
// tsv output options
tsv_float_comma?: boolean; // default false - use "," als decimal point for floats
tsv_delimiter?: string; // default '\t' - use specified character to field delimiter
tsv_endofline?: string; // default depending on your operating system (node os.EOL) e.g. '\n'
// cell value formats
raw_values?: boolean; // default false - do not apply cell formats (get values as string as in xlsx)
round_floats?: boolean; // default true - round float values as the cell format defines (values will be reported as parsed floats otherwise)
date1904?: boolean; // default false - use date 1904 conversion
ignore_timezone?: boolean; // default false - ignore timezone in date parsing
convert_values?: { // apply cell number formats or not (values will be reported as strings otherwise)
ints?: boolean; // rounds to int if number format is for int
floats?: boolean; // rounds floats according to float number format
dates?: boolean; // converts xlsx date to js date
bools?: boolean; // converts xlsx bool to js boolean
};
// xlsx structure options
workfolder?: string; // default 'xl' - the workbook subfolder in zip structure
}
```
## Convenience API
```javascript
var XLSX = require('xlsx-extract').XLSX;
//dump arrays
new XLSX().extract('path/to/file.xlsx', {sheet_id:1}) // or sheet_name or sheet_nr
.on('sheet', function (sheet) {
console.log('sheet',sheet); //sheet is array [sheetname, sheetid, sheetnr]
})
.on('row', function (row) {
console.log('row', row); //row is a array of values or []
})
.on('cell', function (cell) {
console.log('cell', cell); //cell is a value or null
})
.on('error', function (err) {
console.error('error', err);
})
.on('end', function (err) {
console.log('eof');
});
//dump by row in tsv-format
new XLSX().extract('path/to/file.xlsx', {sheet_id:1, format:'tsv'}) // or sheet_name or sheet_nr
.on('sheet', function (sheet) {
console.log('sheet', sheet); //sheet is tsv sheetname sheetnr
})
.on('row', function (row) {
console.log(row); //row is a tsv line
})
.on('cell', function (cell) {
console.log(cell); //cell is a tsv value
})
.on('error', function (err) {
console.error(err);
})
.on('end', function (err) {
console.log('eof');
});
//convert to tsv-file (sheet info is not written to file)
new XLSX().convert('path/to/file.xlsx', 'path/to/destfile.tsv')
.on('error', function (err) {
console.error(err);
})
.on('end', function () {
console.log('written');
})
//convert to json-file (sheet info is not written to file)
new XLSX().convert('path/to/file.xlsx', 'path/to/destfile.json')
.on('error', function (err) {
console.error(err);
})
.on('end', function () {
console.log('written');
})
```