Conduct pairwise column comparisons without comparing a column to itself - R-help

Fri, Oct 19, 2007 4:20 PM #

A little different solution, but it gives you the matches and the
columns in a more compact form.  You can always take the data and use
it to put into your array.

[,1] [,2] [,3]
[1,]    1    1    2
[2,]    2    3    3

+     match <- which(same[, cbn[1, .col]] & same[, cbn[2, .col]])
+     if (length(match) == 0) return(NULL)  # no matches
+     # now return the values
+     cbind(LA=X[match, 2 * cbn[1, .col]],
+           LB=X[match, 2 * cbn[2, .col]],
+           col1=cbn[1, .col],
+           col2=cbn[2, .col])
+ })

c1 c2 c3 c4 c5 c6
 [1,]  1  2  1  2  1  3
 [2,]  4  4  3  3  2  2
 [3,]  3  3  2  2  1  4
 [4,]  2  4  4  3  1  3
 [5,]  4  4  4  1  2  1
 [6,]  1  3  3  3  2  1
 [7,]  3  4  4  2  2  2
 [8,]  2  1  4  4  3  3
 [9,]  4  3  2  4  2  3
[10,]  3  2  2  3  1  4

LA LB col1 col2
[1,]  4  3    1    2
[2,]  3  2    1    2
[3,]  4  2    1    3
[4,]  3  2    2    3
[5,]  4  3    2    3

On 10/19/07, Luke Neraas <lukasneraas.r at gmail.com> wrote:

#Hi Jim,
# here is a simpler version of my puzzle
# I have added a bit of explanation near the bottom of this puzzle
# I apologize for the confusion and sloppiness earlier.


# I  have a question regarding pairwise calculations of a matrix using a
"for-loop."
# Below I have a matrix "X" with 6 columns. These are Genotypic data so
Column1 & Column2 is
# a unit, Column3 & Column4 is a unit, Column5 & Column6 is a unit,
# I have a loop designed to calculate the number of times an individual in
Column"i" & Column"j"
# has the same value and the same individual has two values that are the
same in Column"k" & Column"l" .
# I have another series of code that adds a 2 to a specific location in a
results data frame called " result.df".
# I have written a loop that accomplishes this "pair of columns" pairwise
comparison, but it also compares
# some of the "pairs of Columns" to themselves. Is there a way to get around
this?


# creation of the data matrix
c1<- c(1,4,3,2,4,1,3,2,4,3)
c2<- c(2,4,3,4,4,3,4,1,3,2)
c3<- c(1,3,2,4,4,3,4,4,2,2)
c4<- c(2,3,2,3,1,3,2,4,4,3)
c5<- c(1,2,1,1,2,2,2,3,2,1)
c6<- c(3,2,4,3,1,1,2,3,3,4)


X<-cbind(c1,c2,c3,c4,c5,c6)

X

## Creation of the result dataframe
result<- matrix(0,16,2)
result.df<-data.frame(result)
result.df[,1] <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
result.df[,2] <- c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4)
names(result.df)[1]<-"L(A)a(ij)"
names(result.df)[2]<-"L(B)a(kl)"

result.df



### The loop written to find Double Homozygotes


for (i in seq(1,(ncol(X)-3), by=2)){
    j <- i+1
for (k in seq(3,(ncol(X)-1), by=2)){
    l <- k+1

    match.rows <- ((X [,i] == X [, j] ) &   ( X [,k] == X [, l]))

    double_homo_i <- X [match.rows, i]
    double_homo_k <- X [match.rows, k]

    double_homo<- cbind( double_homo_i, double_homo_k)
    double_homo.df<-data.frame(double_homo,Counts=2)
       names(double_homo.df)[1]<-"L(A)a(ij)"
       names(double_homo.df)[2]<- "L(B)a(kl)"


# Below takes the result from each loop and puts in the result.df dataframe.

count<-double_homo.df

almost.df<-aggregate(count$Counts, list(count[,1],count[,2]),
FUN=sum)

temp<-order(almost.df$Group.1)
final.df<-almost.df[temp,]
names(final.df)[1]<-"L(A)a(ij)"
names(final.df )[2]<-"L(B)a(kl)"

result.df<-merge(result.df,final.df,by=c("L(A)a(ij)","L(B)a(kl)"), all.x=T)

             }
             }



# Below are the result I get with the code above.

result.df



#     L(A)a(ij) L(B)a(kl) C1C2~C3C4 C1C2~C5C6 C3C4~C3C4 C3C4~C5C6
# 1         1        1               NA              NA             NA
        NA
# 2         1        2               NA              NA             NA
        NA
# 3         1        3               NA              NA             NA
        NA
# 4         1        4               NA              NA             NA
        NA
# 5         2        1               NA              NA             NA
        NA
# 6         2        2               NA              NA               2
          NA
# 7         2        3               NA              NA             NA
        NA
# 8         2        4               NA              NA             NA
        NA
# 9         3        1               NA              NA             NA
        NA
# 10       3        2                 2               NA             NA
           2
# 11       3        3               NA              NA                4
         NA
# 12       3        4               NA              NA             NA
       NA
# 13       4        1               NA              NA             NA
       NA
# 14       4        2               NA                2              NA
         NA
# 15       4        3                 2               NA             NA
           2
# 16       4        4               NA              NA                2
         NA

# The first column in result.df is the value of the number (1-4) in a the
first "column pair" comparison from "X" that has the same value in a row.
# The second column in result.df is the value of the number (1-4) in a
"column pair" comparison from "X" that has the same value in a row for that
# column pair.
# The third column in result.df has the value 2 added to the data.frame if
the condition is met.
# for example in :X" Col1 & Col2 row 3 has a "3 3" and Col3 & Col4 has a "2
2" in row three. Therefore the result.df$C1C2~C3C4 has a 2 added to
# the row where results.df$L(A)a(ij)=3 and results.df$L(B)a(kl)=2.
# My major problem stems from having "Column pairs" compared to themselves,
such as result.df$C3C4~C3C4   are the results from
# X[,3:4] compared to itself.
# is there way to write the loop so these "Column Pairs" are not compared to
themselves.
# Perhaps a change in the code for my loop :
#                        for (i in seq(1,(ncol(X)-3), by=2)){
#                        j <- i+1
#                        for (k in seq(3,(ncol(X)-1), by=2)){
#                        l <- k+1



# Here is the Result I am looking for.

     L(A)a(ij) L(B)a(kl) C1C2~C3C4 C1C2~C5C6 C3C4~C5C6
# 1         1        1             NA             NA              NA
# 2         1        2             NA             NA              NA
# 3         1        3             NA             NA              NA
# 4         1        4             NA             NA              NA
# 5         2        1             NA             NA              NA
# 6         2        2             NA             NA              NA
# 7         2        3             NA             NA              NA
# 8         2        4             NA             NA              NA
# 9         3        1             NA             NA              NA
# 10        3        2              2              NA                2
# 11        3        3            NA             NA              NA
# 12        3        4            NA             NA              NA
# 13        4        1            NA             NA              NA
# 14        4        2            NA               2               NA
# 15        4        3              2             NA                 2
# 16        4        4            NA            NA               NA


# Any help or ideas would be greatly appreciated

# Thanks in advance

# Luke Neraas

# lukasneraas.r at gmail.com

# University of Alaska Fairbanks
# School of Fisheries and Ocean Sciences
# 11120 Glacier Highway
# UAF Fisheries Division
# Juneau, AK 99801

Jim Holtman
Cincinnati, OH
+1 513 646 9390

What is the problem you are trying to solve?