通过REST API端点,我得到了相当大的CSV文件,其结构如下( CSV文件中的JSON):
A,B,C,D
1,2,3,{"E":1,"F":2,"G":3}
1,2,3,{"E":1,"H":2}对于不同的工具,我需要一个具有扁平结构(没有嵌套JSON)的CSV。所以,最后,我想要一个看起来像这样的CSV。
A,B,C,E,F,G,H
1,2,3,1,2,3,
1,2,3,1,,,2(尽管列标题看起来很有条理,但这对我的用例并不重要)
由于CSV文件相当大,我正在寻找一种性能相对较高的方法。我将用JavaScript (Node.JS)编写这段代码(因为这是用于脚本所有其他部分的语言)。然而,现在我只是在寻找一种理论上的方法/伪代码来在性能问题上做到这一点。
据我所知,我可能需要遍历CSV文件两次。第一次我只需要得到所有的JSON密钥。第二次,我可以创建一个新的CSV文件并设置所有值。但是,我会正确地找出我必须在哪一列中写入值吗?
或者,在一个循环中将CSV文件“转换”为对象数组,然后使用CSV解析器(http://csv.adaltas.com/)将其转换回CSV会更有效?
发布于 2017-08-31 12:23:16
这是一个使用jq的解决方案
如果文件filter.jq包含
[
split("\n") # split string into lines
| (.[0] | split(",")) as $headers # split header
| (.[1:][] | split(",")) # split data rows
| select(length>0) # get rid of empty lines
| $headers[:-1] as $h1 # fixed headers
| .[:($h1|length)] as $p1 # fixed part
| .[($h1|length):] as $p2 # variable part
| (
[ [ $h1, $p1 ] # \
| transpose[] # \ assemble fixed object
| {key:.[0], value:.[1]|tonumber} # / from fixed keys and values
] | from_entries # /
) + (
$p2 | join(",") | fromjson # assemble variable object
)
]
| (map(keys) | add | unique) as $all # compute final headers
| [$all] + ( # add headers to
map(. as $b | reduce $all[] as $a ([];. + [$b[$a]])) # objects with all keys
| map(map(if . == null then "" else tostring end)) # convert values to strings
)
| .[] # scan final array
| @csv # convert to csv然后你的数据在一个名为data的文件中
jq -M -R -s -r -f filter.jq data将生成
"A","B","C","E","F","G","H"
"1","2","3","1","2","3",""
"1","2","3","1","","","2"发布于 2017-08-05 23:49:16
var express = require('express');
var app = express();
var bodyParser = require('body-parser');
var mysql=require('mysql');
var fs= require('fs');
var csv = require('fast-csv');
var formidable = require('formidable');
var urlencodedParser = bodyParser.urlencoded({ extended: false })
var con=mysql.createConnection({
host:'localhost',
user:'dheeraj',
password:'123',
database:'dheeraj'
});
app.use('/assets',express.static('assets'));
app.get('/d', function (req, res) {
res.sendFile( __dirname + "/" + "/d.html" );
})
app.post('/file_upload', urlencodedParser, function (req, res) {
//{
var form = new formidable.IncomingForm();
form.parse(req, function (err, fields, files) {
res.write('File uploaded');
//console.log(files.filetoupload);
fs.createReadStream(files.filetoupload.name)
.pipe(csv())
.on('data',function(data){
var d1=data[0];
var d2=data[1];
var d3=data[2];
var d4=data[3];
var d5=data[4];
con.query('insert into demo values(\''+d1+'\',\''+d2+'\',\''+d3+'\',\''+d4+'\',\''+d5+'\')',function(err,result)
{
console.log('inserted');
})
console.log(data);
})
.on('end',function(data){
console.log('read finished');
});
res.end();
})
})
var server = app.listen(8081, function () {
var host = server.address().address
var port = server.address().port
console.log("Example app listening at http://%s:%s", host, port)
})https://stackoverflow.com/questions/45523668
复制相似问题