文章/答案/技术大牛

发布

社区首页 >问答首页 >通过CSV文件进行解析以转换为JSON格式文件

问通过CSV文件进行解析以转换为JSON格式文件
EN

Code Review用户

提问于 2013-07-23 21:44:32

回答 1查看 6.4K关注 0票数 2

我得到以下CSV文件，我从excel电子表格中提取该文件。为了给出一些有帮助的背景信息，讨论了AGI数(将其视为蛋白质标识符)、这些蛋白质标识符的未修饰肽序列，以及对未修饰序列进行修改的修饰肽序列、这些修饰的指数/指数，以及重复肽的联合光谱计数。文本文件名为MASP.GlycoModReader.txt，信息格式如下：

AGI,UnMd Peptide (M) = x,Mod Peptide (oM) = Ox,Index/Indeces of Modification,counts,Combined 
Spectral count for repeated Peptides

AT1G56070.1,NMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR,NoMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR,2,17
AT1G56070.1,LYMEARPMEEGLAEAIDDGR,LYoMEARPoMEEGLAEAIDDGR,"3, 9",1
AT1G56070.1,EAMTPLSEFEDKL,EAoMTPLSEFEDKL,3,7
AT1G56070.1,LYMEARPMEEGLAEAIDDGR,LYoMEARPoMEEGLAEAIDDGR,"3, 9",2
AT1G56070.1,EGPLAEENMR,EGPLAEENoMR,9,2
AT1G56070.1,DLQDDFMGGAEIIK,DLQDDFoMGGAEIIK,7,1

提取上述内容后需要生成的输出文件如下所示：

AT1G56070.1,{"peptides": [{"sequence": "NMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR", "mod_sequence":    
"NoMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR" , "mod_indeces": 2, "spectral_count": 17}, {"sequence": 
"LYMEARPMEEGLAEAIDDGR" , "mod_sequence": "LYoMEARPoMEEGLAEAIDDGR", "mod_indeces": [3, 9], 
"spectral_count": 3}, {"sequence": "EAMTPLSEFEDKL" , "mod_sequence": "EAoMTPLSEFEDKL", 
"mod_indeces": [3,9], "spectral_count": 7}, {"sequence": "EGPLAEENMR", "mod_sequence": 
"EGPLAEENoMR", "mod_indeces": 9, "spectral_count": 2}, {"sequence": "DLQDDFMGGAEIIK", 
"mod_sequence": "DLQDDFoMGGAEIIK", "mod_indeces": [7], "spectral_count": 1}]}

我在下面给出了我的解决方案:如果有人用另一种语言有更好的解决方案，或者可以分析我的解决方案，并告诉我是否有更有效的方法来实现这一点，那么请在下面评论。谢谢。

#!/usr/bin/env node

var fs = require('fs');
var csv = require('csv');
var data ="proteins.csv";

/* Uses csv nodejs module to parse the proteins.csv file.
* Parses the csv file row by row and updates the peptide_arr.
* For new entries creates a peptide object, for similar entries it updates the
* counts in the peptide object with the same AGI#.
* Uses a peptide object to store protein ID AGI#, and the associated data.
* Writes all formatted peptide objects to a txt file - output.txt.
*/

// Tracks current row
var x = 0;
// An array of peptide objects stores the information from the csv file
var peptide_arr = [];

// csv module reads row by row from data 
csv()
.from(data)
.to('debug.csv')
.transform(function(row, index) {
    // For the first entry push a new peptide object with the AGI# (row[0]) 
    if(x == 0) {
    // cur is the current peptide read into row by csv module
    Peptide cur = new Peptide( row[0] );

    // Add the assoicated data from row (1-5) to cur
    cur.data.peptides.push({
        "sequence" : row[1];
        "mod_sequence" : row[2];
        if(row[5]){
        "mod_indeces" : "[" + row[3] + ", " + row[4] + "]";
        "spectral_count" : row[5];  
        } else {
        "mod_indeces" : row[3];
        "spectral_count" : row[4];  
        }
    });

    // Add the current peptide to the array
    peptide_arr.push(cur);
    }

    // Move to the next row
    x++;
});

// Loop through peptide_arr and append output with each peptide's AGI# and its data
String output = "";
for(var peptide in peptide_arr) 
{
    output = output + peptide.toString()
}
// Write the output to output.txt
fs.writeFile("output.txt", output);

/* Peptide Object :
 *  - id:AGI#
 *  - data: JSON Array associated
 */
function Peptide(id) // this is the actual function that does the ID retrieving and data 
                    // storage
{
    this.id = id;
    this.data = {
        peptides: []
    };
}

/* Peptide methods :
 *  - toJson : Returns the properly formatted string
 */
Peptide.prototype = {
    toString: function(){
        return this.id + "," + JSON.stringify(this.data, null, " ") + "/n"
    }
};

javascript

回答 1

Code Review用户

发布于 2013-11-18 06:09:40

总的来说，我认为你的想法是正确的。

但是我还是坚持住了：

cur.data.peptides.push({
    "sequence" : row[1];
    "mod_sequence" : row[2];
    if(row[5]){
    "mod_indeces" : "[" + row[3] + ", " + row[4] + "]";
    "spectral_count" : row[5];  
    } else {
    "mod_indeces" : row[3];
    "spectral_count" : row[4];  
    }
})

因为在我看来这不像是有效的Javascript。

我会这样构造它：

// An array of peptide objects stores the information from the csv file
var alldata = {
    peptides : []
};
var agi = null;
function Peptide(row) 
{
   this.sequence = row[1];
   this.mod_sequence = row[2];
    if(row[5]){
        this.mod_indeces = [ row[3], row[4] ];
        this.spectral_count = row[5];  
    } else {
        this.mod_indeces= row[3];
        this.spectral_count= row[4];  
    };
    return this;
}


// csv module reads row by row from data 
csv()
.from(data)
.to('debug.csv')
.transform(function(row, index) {    
    if(agi === null)
    {
        agi = row[0];
    }

    alldata.peptides.push(new Peptide(row));
});

// Write the output to output.txt
fs.writeFile("output.txt", agi + "," + JSON.stringify(alldata, null, " "));

我们将所有数据保存在一个对象中。似乎输出是由AGI分隔的，它是一个列表。不需要处理字符串，因为它都是作为一个对象来处理的，它的对象是Peptide函数中的数据。

它不是在x中保持计数，而是将一个名为agi的变量初始化为null。

票数 2

页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://codereview.stackexchange.com/questions/28899

复制

相似问题

问通过CSV文件进行解析以转换为JSON格式文件
EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问通过CSV文件进行解析以转换为JSON格式文件EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问通过CSV文件进行解析以转换为JSON格式文件
EN