Skip to content

matching doesn't work

8 messages · Ana Marija, Rasmus Liland

#
Hi,

I have this code:

library(SNPRelate)

# get PLINK output
plink.genome <- read.table("plink.genome", header=TRUE)
FID1  IID1    FID2  IID2 RT EZ     Z0     Z1     Z2 PI_HAT PHE      DST
1 fam1054 G1054 fam1054  G700 OT  0 0.0045 0.9938 0.0017 0.4986  -1 0.839150
2 fam1054 G1054 fam1054  G701 OT  0 0.0000 1.0000 0.0000 0.5000  -1 0.838381
3 fam1079 G1079 fam2484 G2484 UN NA 0.0000 0.0007 0.9993 0.9997  -1 0.999889
4 fam1245 G1237 fam1245 G1245 OT  0 0.0036 0.9964 0.0000 0.4982  -1 0.838770
5 fam1245 G1241 fam1245 G1245 OT  0 0.0042 0.9854 0.0104 0.5031  -1 0.840569
6 fam0176  G174 fam0176  G176 OT  0 0.0000 1.0000 0.0000 0.5000  -1 0.837799
[1] G1054 G1054 G1079 G1237 G1241 G174
33 Levels: G1054 G1079 G1237 G1241 G174 G175 G177 G178 G1818 G2007 ... G578

snpgdsBED2GDS("output4.bed", "output4.fam","output4.bim", "HapMap.gds")
genofile <- snpgdsOpen("HapMap.gds")

# get SNPRelate output
ibd <- snpgdsIBDMoM(genofile, remove.monosnp=FALSE, kinship=TRUE)
head(ibdlist <- snpgdsIBDSelection(ibd))
   ID1   ID2        k0         k1     kinship
1 G1000 G1001 1.0000000 0.00000000 0.000000000
2 G1000 G1003 0.9938901 0.00000000 0.003054932
3 G1000 G1005 1.0000000 0.00000000 0.000000000
4 G1000 G1009 1.0000000 0.00000000 0.000000000

# adjust for the orders of sample pair
pair.samp <- paste(ibdlist$ID1, ibdlist$ID2, sep=" ")
 head(pair.samp)
[1] "G1000 G1001" "G1000 G1003" "G1000 G1005" "G1000 G1009" "G1000 G1052"
[6] "G1000 G1054"

plink.genome <- plink.genome[match(
    paste(plink.genome$IID1, plink.genome$IID2, sep=" "), pair.samp), ]
FID1 IID1 FID2 IID2   RT EZ Z0 Z1 Z2 PI_HAT PHE DST PPC RATIO IBS0 IBS1
NA   <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA
NA.1 <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA
NA.2 <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA
NA.3 <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA
NA.4 <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA
NA.5 <NA> <NA> <NA> <NA> <NA> NA NA NA NA     NA  NA  NA  NA    NA   NA   NA

So nothing is matching here. Can you please advise,

Thanks
Ana
#
On 2020-04-10 15:38 -0500, Ana Marija wrote:
| Hi,
| 
| I have this code:

Dear Ana,

none of the ID tuples in the head 
outputs you provided matches, so I added 
two lines in ibdlist that matches up;  
perhaps if you provided more lines that 
would have matched in a pastebin 
somewhere ...


options(stringsAsFactors=FALSE)
plink.genome <-
"FID1	IID1	FID2	IID2	RT	EZ	Z0	Z1	Z2	PI_HAT	PHE	DST
fam1054	G1054	fam1054	G700	OT	0	0.0045	0.9938	0.0017	0.4986	-1	0.839150
fam1054	G1054	fam1054	G701	OT	0	0.0000	1.0000	0.0000	0.5000	-1	0.838381
fam1079	G1079	fam2484	G2484	UN		0.0000	0.0007	0.9993	0.9997	-1	0.999889
fam1245	G1237	fam1245	G1245	OT	0	0.0036	0.9964	0.0000	0.4982	-1	0.838770
fam1245	G1241	fam1245	G1245	OT	0	0.0042	0.9854	0.0104	0.5031	-1	0.840569
fam0176	G174	fam0176	G176	OT	0	0.0000	1.0000	0.0000	0.5000	-1	0.837799"

plink.genome <-
read.delim(text=plink.genome, header=TRUE)

ibdlist <-
"ID1	ID2	k0	k1	kinship
G1000	G1001	1.0000000	0.00000000	0.000000000
G1000	G1003	0.9938901	0.00000000	0.003054932
G1241	G1245	1.0000000	0.00000000	0.000000000
G1000	G1005	1.0000000	0.00000000	0.000000000
G1079	G2484	0.9938901	0.00000000	0.003054932
G1000	G1009	1.0000000	0.00000000	0.000000000"
ibdlist <-
read.delim(text=ibdlist)

idx <-
match(paste(plink.genome$IID1, plink.genome$IID2),
  paste(ibdlist$ID1, ibdlist$ID2))
plink.genome[idx,]


See if this fits the rest of your 
data.frames ...  Did I nail it here, or?  

Best,
Rasmus
#
it didn't work unfortunately with your example:
character(0)

here is my whole:
plink.genome <- read.table("plink.genome", header=TRUE)

    FID1   IID1     FID2   IID2 RT    EZ      Z0      Z1      Z2
PI_HAT PHE       DST     PPC   RATIO    IBS0    IBS1    IBS2  HOMHOM
HETHET
  fam1054  G1054  fam1054   G700 OT     0  0.0045  0.9938  0.0017
0.4986  -1  0.839150  1.0000 209.7368     103  118630  250667 19.0000
3985.0000
  fam1054  G1054  fam1054   G701 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.838381  1.0000 266.0667      81  119124  249829 15.0000
3991.0000
  fam1079  G1079  fam2484  G2484 UN    NA  0.0000  0.0007  0.9993
0.9997  -1  0.999889  1.0000      NA       0      82  370250  0.0000
4591.0000
  fam1245  G1237  fam1245  G1245 OT     0  0.0036  0.9964  0.0000
0.4982  -1  0.838770  1.0000 188.4762      83  118647  249728 21.0000
3958.0000
  fam1245  G1241  fam1245  G1245 OT     0  0.0042  0.9854  0.0104
0.5031  -1  0.840569  1.0000 265.8667      97  117116  250690 15.0000
3988.0000
  fam0176   G174  fam0176   G176 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.837799  1.0000 190.0476     107  118306  246937 21.0000
3991.0000
  fam0176   G175  fam0176   G176 OT     0  0.0045  0.9909  0.0046
0.5001  -1  0.839611  1.0000 198.6500     102  117026  248327 20.0000
3973.0000
  fam0179   G177  fam0179   G179 OT     0  0.0091  0.9903  0.0006
0.4958  -1  0.838517  1.0000 116.5000     207  117568  247532 34.0000
3961.0000
  fam0179   G178  fam0179   G179 OT     0  0.0098  0.9902  0.0000
0.4951  -1  0.838160  1.0000 90.4318     224  117440  246548 44.0000
3979.0000
  fam1818  G1818  fam1818  G1819 OT     0  0.0050  0.9884  0.0066
0.5008  -1  0.839882  1.0000 208.8947     115  117679  250401 19.0000
3969.0000
  fam1818  G1818  fam1818  G1820 OT     0  0.0050  0.9950  0.0000
0.4975  -1  0.838709  1.0000 208.6842     115  119083  250671 19.0000
3965.0000
  fam2450  G2007  fam2450  G2450 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.831786  1.0000 79.0196     278  120248  238553 51.0000
4030.0000
  fam2450  G2024  fam2450  G2450 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.832052  1.0000 104.6750     242  118317  235124 40.0000
4187.0000
  fam2181  G2181  fam5745  G5745 UN    NA  0.0002  0.0034  0.9964
0.9981  -1  0.999402  1.0000      NA       4     432  367475  0.0000
4613.0000
  fam2183  G2183  fam2183  G2184 OT     0  0.0058  0.9836  0.0106
0.5024  -1  0.840452  1.0000 164.9583     133  117858  252192 24.0000
3959.0000
  fam2183  G2183  fam2183  G2185 OT     0  0.0045  0.9929  0.0026
0.4990  -1  0.839280  1.0000 136.2414     105  118760  251251 29.0000
3951.0000
  fam2280  G2280  fam2280  G2281 OT     0  0.0042  0.9893  0.0065
0.5011  -1  0.839937  1.0000 248.7500      97  117600  250265 16.0000
3980.0000
  fam2280  G2280  fam2280  G2282 OT     0  0.0033  0.9885  0.0082
0.5024  -1  0.840294  1.0000 264.8000      76  117095  249900 15.0000
3972.0000
  fam2286  G2284  fam2286  G2286 OT     0  0.0048  0.9952  0.0000
0.4976  -1  0.838745  1.0000 179.5909     112  118986  250534 22.0000
3951.0000
  fam2286  G2285  fam2286  G2286 OT     0  0.0046  0.9846  0.0108
0.5031  -1  0.840586  1.0000 331.4167     105  116441  249328 12.0000
3977.0000
  fam2813  G2813  fam4274  G4274 UN    NA  0.0058  0.0495  0.9447
0.9695  -1  0.990555  1.0000 239.2105     131    6616  357351 19.0000
4545.0000
  fam3238  G3237  fam3238  G3238 OT     0  0.0055  0.9913  0.0033
0.4989  -1  0.839299  1.0000 160.6800     125  117652  249060 25.0000
4017.0000
  fam3238  G3238  fam3238  G3264 OT     0  0.0035  0.9855  0.0110
0.5037  -1  0.840733  1.0000 173.3043      81  116986  250706 23.0000
3986.0000
  fam4257  G4257  fam0891   G891 UN    NA  0.2323  0.4978  0.2699
0.5188  -1  0.859862  1.0000 10.1332    5314   92050  268982 383.0000
3881.0000
  fam4275  G4275  fam4275  G4304 OT     0  0.0053  0.9947  0.0000
0.4973  -1  0.837959  1.0000 136.0345     122  118563  247910 29.0000
3945.0000
  fam4275  G4275  fam4275  G4352 OT     0  0.0048  0.9798  0.0154
0.5053  -1  0.841317  1.0000 209.3684     110  116482  251129 19.0000
3978.0000
  fam4454  G4452  fam4454  G4454 OT     0  0.0055  0.9840  0.0104
0.5024  -1  0.840440  1.0000 136.8966     127  116709  249681 29.0000
3970.0000
  fam4454  G4453  fam4454  G4454 OT     0  0.0057  0.9842  0.0101
0.5022  -1  0.840379  1.0000 160.0000     130  116255  248588 25.0000
4000.0000
  fam4666  G4666  fam0686   G686 UN    NA  0.0000  0.0026  0.9974
0.9987  -1  0.999576  1.0000      NA       0     312  367735  0.0000
4580.0000
  fam4691  G4691  fam4722  G4722 UN    NA  0.3986  0.4841  0.1173
0.3593  -1  0.818992  1.0000  5.9269    9073  113787  241580 602.0000
3568.0000
  fam5092  G5089  fam5092  G5092 OT     0  0.0051  0.9928  0.0022
0.4985  -1  0.839156  1.0000 234.4706     117  118225  249900 17.0000
3986.0000
  fam5092  G5092  fam5092  G5097 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.837560  1.0000 218.9444     100  119554  248956 18.0000
3941.0000
  fam5365  G5106  fam5365  G5365 OT     0  0.0057  0.9916  0.0027
0.4985  -1  0.839191  1.0000 208.0000     131  118747  251154 19.0000
3952.0000
  fam5365  G5118  fam5365  G5365 OT     0  0.0052  0.9886  0.0061
0.5004  -1  0.839781  1.0000 164.8750     121  118035  250954 24.0000
3957.0000
  fam0523   G523  fam0523   G551 OT     0  0.0037  0.9829  0.0135
0.5049  -1  0.841107  1.0000 180.6818      85  116769  251126 22.0000
3975.0000
  fam0523   G523  fam0523   G552 OT     0  0.0000  1.0000  0.0000
0.5000  -1  0.838173  1.0000 225.2353      81  119148  249406 17.0000
3829.0000
  fam5282  G5282  fam5282  G5328 OT     0  0.0033  0.9933  0.0035
0.5001  -1  0.839545  1.0000 232.1765      75  117898  249882 17.0000
3947.0000
  fam5681  G5681  fam0587   G587 UN    NA  0.0035  0.9871  0.0094
0.5030  -1  0.840482  1.0000 188.9524      79  116708  249522 21.0000
3968.0000
  fam0578   G578  fam6039  G6039 UN    NA  0.0000  0.0095  0.9905
0.9952  -1  0.998462  1.0000 4553.0000       1    1123  364629  1.0000
4553.0000
On Fri, Apr 10, 2020 at 4:24 PM Rasmus Liland <jensrasmus at gmail.com> wrote:
#
On 2020-04-10 17:05 -0500, Ana Marija wrote:
Hi!  Perhaps csv formatting are better 
suited for these emails ... the two 
ibdlist lines I added still matches in 
this example, added lookup for kinship 
... see if this works for you:


plink.genome <- "FID1,IID1,FID2,IID2,RT,EZ,Z0,Z1,Z2,PI_HAT,PHE,DST,PPC,RATIO,IBS0,IBS1,IBS2,HOMHOM,HETHET
fam1054,G1054,fam1054,G700,OT,0,0.0045,0.9938,0.0017,0.4986,-1,0.839150,1.0000,209.7368,103,118630,250667,19.0000,3985.0000
fam1054,G1054,fam1054,G701,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.838381,1.0000,266.0667,81,119124,249829,15.0000,3991.0000
fam1079,G1079,fam2484,G2484,UN,NA,0.0000,0.0007,0.9993,0.9997,-1,0.999889,1.0000,NA,0,82,370250,0.0000,4591.0000
fam1245,G1237,fam1245,G1245,OT,0,0.0036,0.9964,0.0000,0.4982,-1,0.838770,1.0000,188.4762,83,118647,249728,21.0000,3958.0000
fam1245,G1241,fam1245,G1245,OT,0,0.0042,0.9854,0.0104,0.5031,-1,0.840569,1.0000,265.8667,97,117116,250690,15.0000,3988.0000
fam0176,G174,fam0176,G176,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.837799,1.0000,190.0476,107,118306,246937,21.0000,3991.0000
fam0176,G175,fam0176,G176,OT,0,0.0045,0.9909,0.0046,0.5001,-1,0.839611,1.0000,198.6500,102,117026,248327,20.0000,3973.0000
fam0179,G177,fam0179,G179,OT,0,0.0091,0.9903,0.0006,0.4958,-1,0.838517,1.0000,116.5000,207,117568,247532,34.0000,3961.0000
fam0179,G178,fam0179,G179,OT,0,0.0098,0.9902,0.0000,0.4951,-1,0.838160,1.0000,90.4318,224,117440,246548,44.0000,3979.0000
fam1818,G1818,fam1818,G1819,OT,0,0.0050,0.9884,0.0066,0.5008,-1,0.839882,1.0000,208.8947,115,117679,250401,19.0000,3969.0000
fam1818,G1818,fam1818,G1820,OT,0,0.0050,0.9950,0.0000,0.4975,-1,0.838709,1.0000,208.6842,115,119083,250671,19.0000,3965.0000
fam2450,G2007,fam2450,G2450,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.831786,1.0000,79.0196,278,120248,238553,51.0000,4030.0000
fam2450,G2024,fam2450,G2450,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.832052,1.0000,104.6750,242,118317,235124,40.0000,4187.0000
fam2181,G2181,fam5745,G5745,UN,NA,0.0002,0.0034,0.9964,0.9981,-1,0.999402,1.0000,NA,4,432,367475,0.0000,4613.0000
fam2183,G2183,fam2183,G2184,OT,0,0.0058,0.9836,0.0106,0.5024,-1,0.840452,1.0000,164.9583,133,117858,252192,24.0000,3959.0000
fam2183,G2183,fam2183,G2185,OT,0,0.0045,0.9929,0.0026,0.4990,-1,0.839280,1.0000,136.2414,105,118760,251251,29.0000,3951.0000
fam2280,G2280,fam2280,G2281,OT,0,0.0042,0.9893,0.0065,0.5011,-1,0.839937,1.0000,248.7500,97,117600,250265,16.0000,3980.0000
fam2280,G2280,fam2280,G2282,OT,0,0.0033,0.9885,0.0082,0.5024,-1,0.840294,1.0000,264.8000,76,117095,249900,15.0000,3972.0000
fam2286,G2284,fam2286,G2286,OT,0,0.0048,0.9952,0.0000,0.4976,-1,0.838745,1.0000,179.5909,112,118986,250534,22.0000,3951.0000
fam2286,G2285,fam2286,G2286,OT,0,0.0046,0.9846,0.0108,0.5031,-1,0.840586,1.0000,331.4167,105,116441,249328,12.0000,3977.0000
fam2813,G2813,fam4274,G4274,UN,NA,0.0058,0.0495,0.9447,0.9695,-1,0.990555,1.0000,239.2105,131,6616,357351,19.0000,4545.0000
fam3238,G3237,fam3238,G3238,OT,0,0.0055,0.9913,0.0033,0.4989,-1,0.839299,1.0000,160.6800,125,117652,249060,25.0000,4017.0000
fam3238,G3238,fam3238,G3264,OT,0,0.0035,0.9855,0.0110,0.5037,-1,0.840733,1.0000,173.3043,81,116986,250706,23.0000,3986.0000
fam4257,G4257,fam0891,G891,UN,NA,0.2323,0.4978,0.2699,0.5188,-1,0.859862,1.0000,10.1332,5314,92050,268982,383.0000,3881.0000
fam4275,G4275,fam4275,G4304,OT,0,0.0053,0.9947,0.0000,0.4973,-1,0.837959,1.0000,136.0345,122,118563,247910,29.0000,3945.0000
fam4275,G4275,fam4275,G4352,OT,0,0.0048,0.9798,0.0154,0.5053,-1,0.841317,1.0000,209.3684,110,116482,251129,19.0000,3978.0000
fam4454,G4452,fam4454,G4454,OT,0,0.0055,0.9840,0.0104,0.5024,-1,0.840440,1.0000,136.8966,127,116709,249681,29.0000,3970.0000
fam4454,G4453,fam4454,G4454,OT,0,0.0057,0.9842,0.0101,0.5022,-1,0.840379,1.0000,160.0000,130,116255,248588,25.0000,4000.0000
fam4666,G4666,fam0686,G686,UN,NA,0.0000,0.0026,0.9974,0.9987,-1,0.999576,1.0000,NA,0,312,367735,0.0000,4580.0000
fam4691,G4691,fam4722,G4722,UN,NA,0.3986,0.4841,0.1173,0.3593,-1,0.818992,1.0000,5.9269,9073,113787,241580,602.0000,3568.0000
fam5092,G5089,fam5092,G5092,OT,0,0.0051,0.9928,0.0022,0.4985,-1,0.839156,1.0000,234.4706,117,118225,249900,17.0000,3986.0000
fam5092,G5092,fam5092,G5097,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.837560,1.0000,218.9444,100,119554,248956,18.0000,3941.0000
fam5365,G5106,fam5365,G5365,OT,0,0.0057,0.9916,0.0027,0.4985,-1,0.839191,1.0000,208.0000,131,118747,251154,19.0000,3952.0000
fam5365,G5118,fam5365,G5365,OT,0,0.0052,0.9886,0.0061,0.5004,-1,0.839781,1.0000,164.8750,121,118035,250954,24.0000,3957.0000
fam0523,G523,fam0523,G551,OT,0,0.0037,0.9829,0.0135,0.5049,-1,0.841107,1.0000,180.6818,85,116769,251126,22.0000,3975.0000
fam0523,G523,fam0523,G552,OT,0,0.0000,1.0000,0.0000,0.5000,-1,0.838173,1.0000,225.2353,81,119148,249406,17.0000,3829.0000
fam5282,G5282,fam5282,G5328,OT,0,0.0033,0.9933,0.0035,0.5001,-1,0.839545,1.0000,232.1765,75,117898,249882,17.0000,3947.0000
fam5681,G5681,fam0587,G587,UN,NA,0.0035,0.9871,0.0094,0.5030,-1,0.840482,1.0000,188.9524,79,116708,249522,21.0000,3968.0000
fam0578,G578,fam6039,G6039,UN,NA,0.0000,0.0095,0.9905,0.9952,-1,0.998462,1.0000,4553.0000,1,1123,364629,1.0000,4553.0000"
plink.genome <- read.csv(text=plink.genome)

ibdlist <- "ID1,ID2,k0,k1,kinship
G1000,G1001,1.0000000,0.00000000,0.000000000
G1000,G1003,0.9938901,0.00000000,0.003054932
G1241,G1245,1.0000000,0.00000000,0.000000000
G1000,G1005,1.0000000,0.00000000,0.000000000
G1079,G2484,0.9938901,0.00000000,0.003054932
G1000,G1009,1.0000000,0.00000000,0.000000000"
ibdlist <- read.csv(text=ibdlist)

x <- paste(plink.genome$IID1, plink.genome$IID2)
table <- paste(ibdlist$ID1, ibdlist$ID2)
idx <- match(x=x, table=table)
ibdlist[idx,"kinship"]


Best,
Rasmus
#
so if I understand correctly you would just remove sep=" " from my codes?

Thank you so much for working on this.
Is there is any chance you can change my original code (pasted bellow)
with changes you think should work?

library(SNPRelate)

# get PLINK output
plink.genome <- read.table("plink.genome", header=TRUE)

snpgdsBED2GDS("output4.bed", "output4.fam","output4.bim", "HapMap.gds")
genofile <- snpgdsOpen("HapMap.gds")

# get SNPRelate output
ibd <- snpgdsIBDMoM(genofile, remove.monosnp=FALSE, kinship=TRUE)

# adjust for the orders of sample pair
pair.samp <- paste(ibdlist$ID1, ibdlist$ID2, sep=" ")
plink.genome <- plink.genome[match(
    paste(plink.genome$IID1, plink.genome$IID2, sep=" "), pair.samp), ]
On Fri, Apr 10, 2020 at 6:03 PM Rasmus Liland <jensrasmus at gmail.com> wrote:
#
On 2020-04-10 18:46 -0500, Ana Marija wrote:
Yes, it seems like my tab separated values got converted to 
spaces ... so comma separated values seemed like a sane choice 
...

But ... I do not have all those specific files called here ... 
this seems very similar to what you had before ... what is it 
that does not work?  Do you get any errors here at all, or?
#
I am not sure what I am suppose to run from your codes.
Can you just send me lines of codes which I should run? (without part
where you are loading your data frames)
(assuming my files are as I showed them)

Or the whole idea was to remove sep=" " from everywhere?
On Fri, Apr 10, 2020 at 7:01 PM Rasmus Liland <jensrasmus at gmail.com> wrote:
#
On 2020-04-10 19:05 -0500, Ana Marija wrote:
Dear Ana,

try these lines:


plink.genome <-
  read.table("plink.genome", header=TRUE)
SNPRelate::snpgdsBED2GDS(
  "output4.bed",
  "output4.fam",
  "output4.bim",
  "HapMap.gds")
genofile <-
  SNPRelate::snpgdsOpen("HapMap.gds")
ibd <- SNPRelate::snpgdsIBDMoM(
  genofile,
  remove.monosnp=FALSE,
  kinship=TRUE)
x <- paste(plink.genome$IID1,
           plink.genome$IID2)
table <- paste(ibd$ID1, ibd$ID2)
idx <- match(x=x, table=table)
plink.genome[idx,]


Best,
Rasmus