我正在使用以下语句检查列catalytic.site.AXK.start.postion中的值是否位于另一列AxK.postion2中。如果是这样,则创建另一列catalytic.site.matched.sequence.position,指示TRUE的值。它似乎适用于某些价值观,但并不适用于所有的价值观。有谁知道为什么会发生这种情况,以及如何解决?谢谢!
df4$catalytic.site.matched.sequence.position[which(df4$catalytic.site.AXK.start.postion %in% df4$AxK.postion2)] <- TRUE

structure(list(Entry = c("Q86TB3", "Q96GD4", "Q00537", "Q8IZL9",
"Q13131", "Q96L96"), Entry.name = c("ALPK2_HUMAN", "AURKB_HUMAN",
"CDK17_HUMAN", "CDK20_HUMAN", "AAPK1_HUMAN", "ALPK3_HUMAN"),
Gene.names...primary.. = c("ALPK2", "AURKB", "CDK17", "CDK20",
"PRKAA1", "ALPK3"), Protein.names = c("Alpha-protein kinase 2 (EC 2.7.11.1) (Heart alpha-protein kinase)",
"Aurora kinase B (EC 2.7.11.1) (Aurora 1) (Aurora- and IPL1-like midbody-associated protein 1) (AIM-1) (Aurora/IPL1-related kinase 2) (ARK-2) (Aurora-related kinase 2) (STK-1) (Serine/threonine-protein kinase 12) (Serine/threonine-protein kinase 5) (Serine/threonine-protein kinase aurora-B)",
"Cyclin-dependent kinase 17 (EC 2.7.11.22) (Cell division protein kinase 17) (PCTAIRE-motif protein kinase 2) (Serine/threonine-protein kinase PCTAIRE-2)",
"Cyclin-dependent kinase 20 (EC 2.7.11.22) (CDK-activating kinase p42) (CAK-kinase p42) (Cell cycle-related kinase) (Cell division protein kinase 20) (Cyclin-dependent protein kinase H) (Cyclin-kinase-activating kinase p42)",
"5'-AMP-activated protein kinase catalytic subunit alpha-1 (AMPK subunit alpha-1) (EC 2.7.11.1) (Acetyl-CoA carboxylase kinase) (ACACA kinase) (EC 2.7.11.27) (Hydroxymethylglutaryl-CoA reductase kinase) (HMGCR kinase) (EC 2.7.11.31) (Tau-protein kinase PRKAA1) (EC 2.7.11.26)",
"Alpha-protein kinase 3 (EC 2.7.11.1) (Muscle alpha-protein kinase)"
), Binding.site = c(NA, "BINDING 106; /note=\"ATP\"; /evidence=\"ECO:0000255|PROSITE-ProRule:PRU00159\"",
"BINDING 221; /note=\"ATP\"; /evidence=\"ECO:0000255|PROSITE-ProRule:PRU00159\"",
"BINDING 33; /note=\"ATP\"; /evidence=\"ECO:0000255|PROSITE-ProRule:PRU00159\"",
"BINDING 56; /note=\"ATP\"; /evidence=\"ECO:0000255|PROSITE-ProRule:PRU00159\"",
NA), Site = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Sequence = c("MKDSEGPQRPPLCFLSTLLSQKVPEKSDAVLRCIISGQPKPEVTWYKNGQAIDGSGIISNYEFFENQYIHVLHLSCCTKNDAAVYQISAKNSFGMICCSASVEVECSSENPQLSPNLEDDRDRGWKHETGTHEEERANQIDEKEHPYKEEESISPGTPRSADSSPSKSNHSLSLQSLGNLDISVSSSENPLGVKGTRHTGEAYDPSNTEEIANGLLFLNSSHIYEKQDRCCHKTVHSMASKFTDGDLNNDGPHDEGLRSSQQNPKVQKYISFSLPLSEATAHIYPGDSAVANKQPSPQLSSEDSDSDYELCPEITLTYTEEFSDDDLEYLECSDVMTDYSNAVWQRNLLGTEHVFLLESDDEEMEFGEHCLGGCEHFLSGMGCGSRVSGDAGPMVATAGFCGHHSQPQEVGVRSSRVSKHGPSSPQTGMTLILGPHQDGTSSVTEQGRYKLPTAPEAAENDYPGIQGETRDSHQAREEFASDNLLNMDESVRETEMKLLSGESENSGMSQCWETAADKRVGGKDLWSKRGSRKSARVRQPGMKGNPKKPNANLRESTTEGTLHLCSAKESAEPPLTQSDKRETSHTTAAATGRSSHADARECAISTQAEQEAKTLQTSTDSVSKEGNTNCKGEGMQVNTLFETSQVPDWSDPPQVQVQETVRETISCSQMPAFSEPAGEESPFTGTTTISFSNLGGVHKENASLAQHSEVKPCTCGPQHEEKQDRDGNIPDNFREDLKYEQSISEANDETMSPGVFSRHLPKDARADFREPVAVSVASPEPTDTALTLENVCDEPRDREAVCAMECFEAGDQGTCFDTIDSLVGRPVDKYSPQEICSVDTELAEGQNKVSDLCSSNDKTLEVFFQTQVSETSVSTCKSSKDGNSVMSPLFTSTFTLNISHTASEGATGENLAKVENSTYPLASTVHAGQEQPSPSNSGGLDETQLLSSENNPLVQFKEGGDKSPSPSAADTTATPASYSSIVSFPWEKPTTLTANNECFQATRETEDTSTVTIATEVHPAKYLAVSIPEDKHAGGTEERFPRASHEKVSQFPSQVQLDHILSGATIKSTKELLCRAPSVPGVPHHVLQLPEGEGFCSNSPLQVDNLSGDKSQTVDRADFRSYEENFQERGSETKQGVQQQSLSQQGSLSAPDFQQSLPTTSAAQEERNLVPTAHSPASSREGAGQRSGWGTRVSVVAETAGEEDSQALSNVPSLSDILLEESKEYRPGNWEAGNKLKIITLEASASEIWPPRQLTNSESKASDGGLIIPDKVWAVPDSLKADAVVPELAPSEIAALAHSPEDAESALADSRESHKGEEPTISVHWRSLSSRGFSQPRLLESSVDPVDEKELSVTDSLSAASETGGKENVNNVSQDQEEKQLKMDHTAFFKKFLTCPKILESSVDPIDEISVIEYTRAGKPEPSETTPQGAREGGQSNDGNMGHEAEIQPAILQVPCLQGTILSENRISRSQEGSMKQEAEQIQPEEAKTAIWQVLQPSEGGERIPSGCSIGQIQESSDGSLGEAEQSKKDKAELISPTSPLSSCLPIMTHASLGVDTHNSTGQIHDVPENDIVEPRKRQYVFPVSQKRGTIENERGKPLPSSPDLTRFPCTSSPEGNVTDFLISHKMEEPKIEVLQIGETKPPSSSSSSAKTLAFISGERELEKAPKLLQDPCQKGTLGCAKKSREREKSLEARAGKSPGTLTAVTGSEEVKRKPEAPGSGHLAEGVKKKILSRVAALRLKLEEKENIRKNSAFLKKMPKLETSLSHTEEKQDPKKPSCKREGRAPVLLKKIQAEMFPEHSGNVKLSCQFAEIHEDSTICWTKDSKSIAQVQRSAGDNSTVSFAIVQASPKDQGLYYCCIKNSYGKVTAEFNLTAEVLKQLSSRQDTKGCEEIEFSQLIFKEDFLHDSYFGGRLRGQIATEELHFGEGVHRKAFRSTVMHGLMPVFKPGHACVLKVHNAIAYGTRNNDELIQRNYKLAAQECYVQNTARYYAKIYAAEAQPLEGFGEVPEIIPIFLIHRPENNIPYATVEEELIGEFVKYSIRDGKEINFLRRESEAGQKCCTFQHWVYQKTSGCLLVTDMQGVGMKLTDVGIATLAKGYKGFKGNCSMTFIDQFKALHQCNKYCKMLGLKSLQNNNQKQKQPSIGKSKVQTNSMTIKKAGPETPGEKKT",
"MAQKENSYPWPYGRQTAPSGLSTLPQRVLRKEPVTPSALVLMSRSNVQPTAAPGQKVMENSSGTPDILTRHFTIDDFEIGRPLGKGKFGNVYLAREKKSHFIVALKVLFKSQIEKEGVEHQLRREIEIQAHLHHPNILRLYNYFYDRRRIYLILEYAPRGELYKELQKSCTFDEQRTATIMEELADALMYCHGKKVIHRDIKPENLLLGLKGELKIADFGWSVHAPSLRRKTMCGTLDYLPPEMIEGRMHNEKVDLWCIGVLCYELLVGNPPFESASHNETYRRIVKVDLKFPASVPMGAQDLISKLLRHNPSERLPLAQVSAHPWVRANSRRVLPPSALQSVA",
"MKKFKRRLSLTLRGSQTIDESLSELAEQMTIEENSSKDNEPIVKNGRPPTSHSMHSFLHQYTGSFKKPPLRRPHSVIGGSLGSFMAMPRNGSRLDIVHENLKMGSDGESDQASGTSSDEVQSPTGVCLRNRIHRRISMEDLNKRLSLPADIRIPDGYLEKLQINSPPFDQPMSRRSRRASLSEIGFGKMETYIKLEKLGEGTYATVYKGRSKLTENLVALKEIRLEHEEGAPCTAIREVSLLKDLKHANIVTLHDIVHTDKSLTLVFEYLDKDLKQYMDDCGNIMSMHNVKLFLYQILRGLAYCHRRKVLHRDLKPQNLLINEKGELKLADFGLARAKSVPTKTYSNEVVTLWYRPPDVLLGSSEYSTQIDMWGVGCIFFEMASGRPLFPGSTVEDELHLIFRLLGTPSQETWPGISSNEEFKNYNFPKYKPQPLINHAPRLDSEGIELITKFLQYESKKRVSAEEAMKHVYFRSLGPRIHALPESVSIFSLKEIQLQKDPGFRNSSYPETGHGKNRRQSMLF",
"MDQYCILGRIGEGAHGIVFKAKHVETGEIVALKKVALRRLEDGFPNQALREIKALQEMEDNQYVVQLKAVFPHGGGFVLAFEFMLSDLAEVVRHAQRPLAQAQVKSYLQMLLKGVAFCHANNIVHRDLKPANLLISASGQLKIADFGLARVFSPDGSRLYTHQVATRWYRAPELLYGARQYDQGVDLWSVGCIMGELLNGSPLFPGKNDIEQLCYVLRILGTPNPQVWPELTELPDYNKISFKEQVPMPLEEVLPDVSPQALDLLGQFLLYPPHQRIAASKALLHQYFFTAPLPAHPSELPIPQRLGGPAPKAHPGPPHIHDFHVDRPLEESLLNPELIRPFILEG",
"MRRLSSWRKMATAEKQKHDGRVKIGHYILGDTLGVGTFGKVKVGKHELTGHKVAVKILNRQKIRSLDVVGKIRREIQNLKLFRHPHIIKLYQVISTPSDIFMVMEYVSGGELFDYICKNGRLDEKESRRLFQQILSGVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYAAPEVISGRLYAGPEVDIWSSGVILYALLCGTLPFDDDHVPTLFKKICDGIFYTPQYLNPSVISLLKHMLQVDPMKRATIKDIREHEWFKQDLPKYLFPEDPSYSSTMIDDEALKEVCEKFECSEEEVLSCLYNRNHQDPLAVAYHLIIDNRRIMNEAKDFYLATSPPDSFLDDHHLTRPHPERVPFLVAETPRARHTLDELNPQKSKHQGVRKAKWHLGIRSQSRPNDIMAEVCRAIKQLDYEWKVVNPYYLRVRRKNPVTSTYSKMSLQLYQVDSRTYLLDFRSIDDEITEAKSGTATPQRSGSVSNYRSCQRSDSDAEAQGKSSEVSLTSSVTSLDSSPVDLTPRPGSHTIEFFEMCANLIKILAQ",
"MEVAWLVYVLGQQPLARQGEGQSRLVPGRGLVLWLPGLPRSSPSWPAVDLAPLAPARPRGPLICHTGHEQAGREPGPGSSTKGPVLHDQDTRCAFLPRPPGPLQTRRYCRHQGRQGSGLGAGPGAGTWAPAPPGVSKPRCPGRARPGEGQQQVTTARPPAINRGARQPRAGAAAAGRGPGAGAWRTGEAAASAGPAVGEGGAMGSRRAPSRGWGAGGRSGAGGDGEDDGPVWIPSPASRSYLLSVRPETSLSSNRLSHPSSGRSTFCSIIAQLTEETQPLFETTLKSRSVSEDSDVRFTCIVTGYPEPEVTWYKDDTELDRYCGLPKYEITHQGNRHTLQLYRCREEDAAIYQASAQNSKGIVSCSGVLEVGTMTEYKIHQRWFAKLKRKAAAKLREIEQSWKHEKAVPGEVDTLRKLSPDRFQRKRRLSGAQAPGPSVPTREPEGGTLAAWQEGETETAQHSGLGLINSFASGEVTTNGEAAPENGEDGEHGLLTYICDAMELGPQRALKEESGAKKKKKDEESKQGLRKPELEKAAQSRRSSENCIPSSDEPDSCGTQGPVGVEQVQTQPRGRAARGPGSSGTDSTRKPASAVGTPDKAQKAPGPGPGQEVYFSLKDMYLENTQAVRPLGEEGPQTLSVRAPGESPKGKAPLRARSEGVPGAPGQPTHSLTPQPTRPFNRKRFAPPKPKGEATTDSKPISSLSQAPECGAQSLGKAPPQASVQVPTPPARRRHGTRDSTLQGQAGHRTPGEVLECQTTTAPTMSASSSSDVASIGVSTSGSQGIIEPMDMETQEDGRTSANQRTGSKKNVQADGKIQVDGRTRGDGTQTAQRTRADRKTQVDAGTQESKRPQSDRSAQKGMMTQGRAETQLETTQAGEKIQEDRKAQADKGTQEDRRMQGEKGMQGEKGTQSEGSAPTAMEGQSEQEVATSLGPPSRTPKLPPTAGPRAPLNIECFVQTPEGSCFPKKPGCLPRSEEAVVTASRNHEQTVLGPLSGNLMLPAQPPHEGSVEQVGGERCRGPQSSGPVEAKQEDSPFQCPKEERPGGVPCMDQGGCPLAGLSQEVPTMPSLPGTGLTASPKAGPCSTPTSQHGSTATFLPSEDQVLMSSAPTLHLGLGTPTQSHPPETMATSSEGACAQVPDVEGRTPGPRSCDPGLIDSLKNYLLLLLKLSSTETSGAGGESQVGAATGGLVPSATLTPTVEVAGLSPRTSRRILERVENNHLVQSAQTLLLSPCTSRRLTGLLDREVQAGRQALAAARGSWGPGPSSLTVPAIVVDEEDPGLASEGASEGEGEVSPEGPGLLGASQESSMAGRLGEAGGQAAPGQGPSAESIAQEPSQEEKFPGEALTGLPAATPEELALGARRKRFLPKVRAAGDGEATTPEERESPTVSPRGPRKSLVPGSPGTPGRERRSPTQGRKASMLEVPRAEEELAAGDLGPSPKAGGLDTEVALDEGKQETLAKPRKAKDLLKAPQVIRKIRVEQFPDASGSLKLWCQFFNILSDSVLTWAKDQRPVGEVGRSAGDEGPAALAIVQASPVDCGVYRCTIHNEHGSASTDFCLSPEVLSGFISREEGEVGEEIEMTPMVFAKGLADSGCWGDKLFGRLVSEELRGGGYGCGLRKASQAKVIYGLEPIFESGRTCIIKVSSLLVFGPSSETSLVGRNYDVTIQGCKIQNMSREYCKIFAAEARAAPGFGEVPEIIPLYLIYRPANNIPYATLEEDLGKPLESYCSREWGCAEAPTASGSSEAMQKCQTFQHWLYQWTNGSFLVTDLAGVDWKMTDVQIATKLRGYQGLKESCFPALLDRFASSHQCNAYCELLGLTPLKGPEAAHPQAKAKGSKSPSAGRKGSQLSPQPQKKGLPSPQGTRKSAPSSKATPQASEPVTTQLLGQPPTQEEGSKAQGMR"
), catalytic.site = c(NA, 106, 221, 33, 56, NA), catalytic.sequence = c("AGK",
"ALK", "AMK", "APK", "AIK", "ATK"), AxK.postion2 = c("239, 291, 516, 1417, 1665, 1681, 1695",
"2, 104", "219, 467", "31, 279, 310", "13, 54, 303, 427",
"392, 509, 516, 601, 859, 890, 1788"), catalytic.site.AXK.start.postion = c(NA,
"104", "219", "31", "54", NA), catalytic.site.matched.sequence.position = c(NA,
NA, NA, TRUE, TRUE, NA)), row.names = c(84L, 91L, 116L, 118L,
128L, 133L), class = "data.frame")发布于 2020-11-23 01:47:11
专注于你感兴趣的两篇专栏:
AxK.postion2 catalytic.site.AXK.start.postion
84 239, 291, 516, 1417, 1665, 1681, 1695 <NA>
91 2, 104 104
116 219, 467 219
118 31, 279, 310 31
128 13, 53, 303, 427 54
133 392, 509, 516, 601, 859, 890, 1788 <NA>看起来两者都是字符串。第一列AxK.position2包含逗号分隔的数字字符串。
如果使用%in%,本质上是在查看元素是否位于向量内。在这种情况下,您需要将字符串分离为单个元素(创建它们的向量)。
例如,
R> "5" %in% c("3, 4, 5")
[1] FALSE
R> "5" %in% c("3", "4", "5")
[1] TRUE若要拆分每行中的字符串,请使用strsplit
R> strsplit(df4$AxK.postion2, ', ')
[[1]]
[1] "239" "291" "516" "1417" "1665" "1681" "1695"
[[2]]
[1] "2" "104"
[[3]]
[1] "219" "467"
[[4]]
[1] "31" "279" "310"
[[5]]
[1] "13" "53" "303" "427"
[[6]]
[1] "392" "509" "516" "601" "859" "890" "1788"注意逗号后面的空格,因为在逗号后面的字符串中有空格。否则,您应该删除空白。
由于结果是一个列表,所以可以使用unlist,从而为每一行生成一个向量。
最终,这将为您提供正确的新列:
df4$catalytic.site.matched.sequence.position <-
df4$catalytic.site.AXK.start.postion %in% unlist(strsplit(df4$AxK.postion2, ', '))https://stackoverflow.com/questions/64958256
复制相似问题