2016-08-25 2 views
0

Кто-то может помочь мне решить эту проблему? У меня есть две таблицы, как я покажу вам ниже (data1 и data2). Я хотел бы выводить (data3), комбинируя оба. Идея состоит в том, чтобы выбирать только отрицательные значения из каждого «mir» (mir.1, mir2, ..., mir25) из data1, объединяя информацию соответствующего «mir» из data2 для генерации данных3.Фильтрация информации из одной таблицы для объединения с информацией другой таблицы с использованием R

Exemple:

data1 <-"P_G,mir-1,mir-2,mir-3,mir-4,mir-5,mir-6,mir-7,mir-8,mir-9,mir-10,mir-11,mir-12,mir-13,mir-14,mir-15,mir-16,mir-17,mir-18,mir-19,mir-20,mir-21,mir-22,mir-23,mir-24,mir-25 
P111179_ENSBTAG00000005989,NA,0.27,NA,NA,NA,NA,NA,-0.19,0.3580,NA,0.5234,-0.58,-0.24,-0.50,NA,NA,-0.11,-0.37,NA,NA,0.41,0.20,NA,-0.32,NA 
P137378_ENSBTAG00000038920,-0.09231,-0.30,NA,-0.02,0.2158,-0.16,-0.08,-0.41,-0.21,-0.21,-0.49,0.6939,-0.08,0.6030,0.6030,-0.15,0.5506,0.0810,-0.38,NA,0.36,-0.44,0.26,0.05,-0.10 
P136511_8_ENSBTAG00000043999,0.40317,0.38,NA,0.6185,-0.15,0.6674,-0.47,0.0065,0.2911,0.2911,0.3999,-0.62,-0.24,-0.53,-0.53,-0.23,-0.23,-0.25,-0.07,NA,0.11,0.29,-0.54,-0.33,-0.21 
P136181_1_ENSBTAG00000022991,-0.27507,-0.12,NA,-0.42,-0.01,-0.15,0.5465,-0.54,0.1782,0.1782,-0.42,0.6711,-0.23,0.4807,0.4807,-0.17,0.2977,0.3164,-0.41,0.09,0.29,0.16,-0.07,0.04,0.02 
P134814_ENSBTAG00000006541,-0.4182,NA,NA,NA,-0.61,0.1221,0.2858,NA,NA,NA,NA,0.0309,NA,0.3838,0.3838,NA,NA,-0.02,NA,NA,0.14,NA,-0.13,0.19,-0.04 
P133768_ENSBTAG00000021120,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 
P138444_ENSBTAG00000000894,NA,NA,NA,0.0455,-0.25,NA,NA,-0.31,NA,0.5985,0.4818,NA,NA,NA,NA,-0.23,NA,-0.14,NA,NA,0.25,NA,NA,-0.30,NA 
P138481_ENSBTAG00000005534,NA,NA,NA,0.1475,NA,NA,NA,0.1111,NA,0.2673,NA,NA,NA,NA,NA,NA,NA,-0.50,NA,NA,NA,NA,NA,0.10,NA 
P111219_ENSBTAG00000004910,NA,NA,NA,NA,NA,0.5612,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.09,-0.19,NA,NA,NA,NA,NA,NA,NA" 
data1 <-read.table(text=data1,sep=",",header=T) 

data2 <-"mirna,expression 
mir-1,1506.554 
mir-2,64.385 
mir-3,790.762 
mir-4,13.574 
mir-5,59.737 
mir-6,29832.972 
mir-7,10085.201 
mir-8,5.81 
mir-9,53.09 
mir-10,53.09 
mir-11,13096.997 
mir-12,561.459 
mir-13,0.63 
mir-14,57.918 
mir-15,57.918 
mir-16,0.695 
mir-17,75.411 
mir-18,258.793 
mir-19,1.16 
mir-20,3.95 
mir-21,24.966 
mir-22,31.509 
mir-23,4.391111111 
mir-24,16.659 
mir-25,141.242" 
data2 <-read.table(text=data2,sep=",",header=T) 

Ожидаемый результат:

data3 <- "P_G,mirna,expression 
P111179_ENSBTAG00000005989,mir-8,5.81 
P111179_ENSBTAG00000005989,mir-12,561.46 
P111179_ENSBTAG00000005989,mir-13,0.63 
P111179_ENSBTAG00000005989,mir-14,57.92 
P111179_ENSBTAG00000005989,mir-17,75.41 
P111179_ENSBTAG00000005989,mir-18,258.79 
P111179_ENSBTAG00000005989,mir-24,16.66 
P137378_ENSBTAG00000038920,mir-1,1506.55 
P137378_ENSBTAG00000038920,mir-2,64.39 
P137378_ENSBTAG00000038920,mir-4,13.57 
P137378_ENSBTAG00000038920,mir-6,29832.97 
P137378_ENSBTAG00000038920,mir-7,10085.20 
P137378_ENSBTAG00000038920,mir-8,5.81 
P137378_ENSBTAG00000038920,mir-9,53.09 
P137378_ENSBTAG00000038920,mir-10,53.09 
P137378_ENSBTAG00000038920,mir-11,13097.00 
P137378_ENSBTAG00000038920,mir-13,0.63 
P137378_ENSBTAG00000038920,mir-16,0.70 
P137378_ENSBTAG00000038920,mir-19,1.16 
P137378_ENSBTAG00000038920,mir-22,31.51 
P137378_ENSBTAG00000038920,mir-25,141.24" 
data3 <-read.table(text=data3,sep=",",header=T) 

ответ

3

Мы можем melt первый набор данных и присоединиться к on со вторым набором данных

library(data.table) 
unique(setDT(data2)[melt(setDT(data1), id.var="P_G", na.rm = TRUE, variable.name = "mirna", 
    value.name = "expression")[, 
    mirna:= sub("[.]", "-", mirna) 
    ][expression>0], on = "mirna"][], by = c("mirna", "expression")) 
Смежные вопросы