Why
You might want to read archived files in Node without decompressing them. Doing this via the composition of 2 streams can be quite handy.
How
Let's say you have a directory with a some zip archives. With text inside them. You can stream read the files, uncompress the stream, and read the lines, all in memory.
echo -e "line 1\nline 2" >> file1.txt
echo -e "line 3\nline 4" >> file2.txt
echo -e "line 5\nline 6" >> file3.txt
gzip *txt
const fs = require("fs");
const readline = require("readline");
const zlib = require("zlib");
function readAllLines(cb) {
let totalLines = 0;
let total = 0;
let completed = 0;
fs.readdir("./", function readDir(err, files) {
files = files.filter((x) => x.endsWith("gz"));
total = files.length;
files.forEach(function forEachFile(file, index) {
let input = fs.createReadStream(`./${file}`);
if (file.endsWith("gz")) input = input.pipe(zlib.createGunzip());
const lineReader = readline.createInterface({
input: input,
});
lineReader.on("line", function onLine(line) {
totalLines += 1;
});
lineReader.on("close", function onEnd(line) {
completed += 1;
// important, make sure all files have had the end event
if (completed === total) {
cb(totalLines);
}
});
});
});
}
readAllLines((totalLines) => console.log("Total Lines:", totalLines));
> node app.js
Total Lines: 6
Conclusion
Streams are quite handy for processing data without having to read it all in memory. You can have a service that processes compressed logs in the background, consuming almost no memory, without needing any intermediary files.