Skip to content

t-test across columns

4 messages · Nico Met, David L Carlson, arun

#
Hi,
Not sure about the format for the 2nd part.
df1<- ##data

library(plyr)
df2<-ddply(df1,.(name,cat),summarize, cbind(t.test(val,df1$val)$statistic,t.test(val,df1$val)$p.value))
?df3<-cbind(df2[,1:2],data.frame(df2[,3]))
?colnames(df3)[3:4]<- c("t-val","p.val")
library(reshape2)
df3m<-? melt(df3,id.var=c("name","cat"))
xtabs(value~name+cat+variable,data=df3m)
, , variable = t-val

????? cat
name????? p178266580??? p178269196??? p178316310??? p191287337??? p195158904
? 12.2 -1.1697701975 -5.2812696387 -1.2740973341? 2.1926665883? 0.1529759080
? 15.9 -2.5063901671? 0.0000000000 -0.2169806106? 1.5455008954 -1.6574358795
????? cat
name????? p196921846??? p197427158??? p238921966
? 12.2? 0.2260409495 -0.3320635130? 3.3659689025
? 15.9? 6.6278680348? 0.0000000000? 0.0000000000

, , variable = p.val

????? cat
name????? p178266580??? p178269196??? p178316310??? p191287337??? p195158904
? 12.2? 0.3092408498? 0.0003382099? 0.3762474897? 0.0419925673? 0.8812900356
? 15.9? 0.0147796276? 0.0000000000? 0.8365830321? 0.1822041450? 0.1096087365
????? cat
name????? p196921846??? p197427158??? p238921966
? 12.2? 0.8226135494? 0.7435688987? 0.0071990164
? 15.9? 0.0005489640? 0.0000000000? 0.0000000000

#or
res<-dcast(df3m,name~cat+variable,value.var="value")
row.names(res)<- res[,1]
?res1<- res[,-1]
res1
???? p178266580_t-val p178266580_p.val p178269196_t-val p178269196_p.val
12.2???????? -1.16977?????? 0.30924085???????? -5.28127???? 0.0003382099
15.9???????? -2.50639?????? 0.01477963?????????????? NA?????????????? NA
???? p178316310_t-val p178316310_p.val p191287337_t-val p191287337_p.val
12.2?????? -1.2740973??????? 0.3762475???????? 2.192667?????? 0.04199257
15.9?????? -0.2169806??????? 0.8365830???????? 1.545501?????? 0.18220414
???? p195158904_t-val p195158904_p.val p196921846_t-val p196921846_p.val
12.2??????? 0.1529759??????? 0.8812900??????? 0.2260409????? 0.822613549
15.9?????? -1.6574359??????? 0.1096087??????? 6.6278680????? 0.000548964
???? p197427158_t-val p197427158_p.val p238921966_t-val p238921966_p.val
12.2?????? -0.3320635??????? 0.7435689???????? 3.365969????? 0.007199016
15.9?????????????? NA?????????????? NA?????????????? NA?????????????? NA

A.K.


----- Original Message -----
From: Nico Met <nicomet80 at gmail.com>
To: R help <r-help at r-project.org>
Cc: 
Sent: Monday, July 15, 2013 11:50 AM
Subject: [R] t-test across columns

Dear all,

I would like to do t-test across two columns "name" with different "cat"
with overall mean ("val").

(Removing if there is a single observation)

And finally, make a matrix with t-value and p-value associated with a name
(in rows) and cat (in columns)

dput(x)
structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9"
), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("p178266580",
"p178269196", "p178316310", "p191287337", "p195158904", "p196921846",
"p197427158", "p238921966"), class = "factor"), val = c(148.90772,
184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975,
177.453775, 184.799525, 212.39065, 205.504525, 186.152025, 194.337075,
193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261,
196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842,
196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686,
219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474,
210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407,
227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082,
232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649,
211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334,
249.357266666667, 239.98525, 234.508483333333, 243.865083333333,
233.595816666667, 248.1219, 225.289416666667, 248.220883333333,
193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496,
201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094,
201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038,
206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011,
158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146
)), .Names = c("name", "cat", "val"), class = "data.frame", row.names = c(
NA,
97L))

Thanks

Nico

??? [[alternative HTML version deleted]]

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
#
This may be close to what you want:
+      t.test(val~name, y)})
+       p.value=y$p.value)))
t      p.value
p178266580 -0.1156475 0.9144054453
p178316310 -1.0874356 0.4143944591
p191287337 -0.6776053 0.5315717871
p195158904  1.1522850 0.2769290728
p196921846 -4.2342996 0.0003925339

But I'm not sure what you mean about columns for each cat unless you
want the frequencies:
name
cat          12.2 15.9
  p178266580    4   11
  p178316310    2    3
  p191287337    3    5
  p195158904    8    7
  p196921846   26    5
12.2 15.9          t      p.value
p178266580    4   11 -0.1156475 0.9144054453
p178316310    2    3 -1.0874356 0.4143944591
p191287337    3    5 -0.6776053 0.5315717871
p195158904    8    7  1.1522850 0.2769290728
p196921846   26    5 -4.2342996 0.0003925339

-------------------------------------
David L Carlson
Associate Professor of Anthropology
Texas A&M University
College Station, TX 77840-4352



-----Original Message-----
From: r-help-bounces at r-project.org
[mailto:r-help-bounces at r-project.org] On Behalf Of Nico Met
Sent: Monday, July 15, 2013 10:50 AM
To: R help
Subject: [R] t-test across columns

Dear all,

I would like to do t-test across two columns "name" with different
"cat"
with overall mean ("val").

(Removing if there is a single observation)

And finally, make a matrix with t-value and p-value associated with
a name
(in rows) and cat (in columns)

dput(x)
structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9"
), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label =
c("p178266580",
"p178269196", "p178316310", "p191287337", "p195158904",
"p196921846",
"p197427158", "p238921966"), class = "factor"), val = c(148.90772,
184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975,
177.453775, 184.799525, 212.39065, 205.504525, 186.152025,
194.337075,
193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261,
196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842,
196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686,
219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474,
210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407,
227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082,
232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649,
211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334,
249.357266666667, 239.98525, 234.508483333333, 243.865083333333,
233.595816666667, 248.1219, 225.289416666667, 248.220883333333,
193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496,
201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094,
201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038,
206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011,
158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146
)), .Names = c("name", "cat", "val"), class = "data.frame",
row.names = c(
NA,
97L))

Thanks

Nico


______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
#
Hi,
May be I misunderstood ur question.
The output David got could be also obtained by:
#df1 dataset

library(plyr)

df2<-ddply(df1,.(cat),function(x) if(min(table(x$name))>1){x1<- t.test(val~name,x);cbind(t=x1$statistic,p.value=x1$p.value)})
?df2
#???????? cat????????? t????? p.value
#1 p178266580 -0.1156475 0.9144054453
#2 p178316310 -1.0874356 0.4143944591
#3 p191287337 -0.6776053 0.5315717871
#4 p195158904? 1.1522850 0.2769290728
#5 p196921846 -4.2342996 0.0003925339

But, the second part is still unclear.

A.K.


----- Original Message -----
From: David Carlson <dcarlson at tamu.edu>
To: 'Nico Met' <nicomet80 at gmail.com>; 'R help' <r-help at r-project.org>
Cc: 
Sent: Monday, July 15, 2013 1:33 PM
Subject: Re: [R] t-test across columns

This may be close to what you want:
+? ? ? t.test(val~name, y)})
+? ? ?  p.value=y$p.value)))
? ? ? ? ? ? ? ? ? ? t? ? ? p.value
p178266580 -0.1156475 0.9144054453
p178316310 -1.0874356 0.4143944591
p191287337 -0.6776053 0.5315717871
p195158904? 1.1522850 0.2769290728
p196921846 -4.2342996 0.0003925339

But I'm not sure what you mean about columns for each cat unless you
want the frequencies:
? ? ? ? ? ? name
cat? ? ? ? ? 12.2 15.9
? p178266580? ? 4?  11
? p178316310? ? 2? ? 3
? p191287337? ? 3? ? 5
? p195158904? ? 8? ? 7
? p196921846?  26? ? 5
? ? ? ? ?  12.2 15.9? ? ? ? ? t? ? ? p.value
p178266580? ? 4?  11 -0.1156475 0.9144054453
p178316310? ? 2? ? 3 -1.0874356 0.4143944591
p191287337? ? 3? ? 5 -0.6776053 0.5315717871
p195158904? ? 8? ? 7? 1.1522850 0.2769290728
p196921846?  26? ? 5 -4.2342996 0.0003925339

-------------------------------------
David L Carlson
Associate Professor of Anthropology
Texas A&M University
College Station, TX 77840-4352



-----Original Message-----
From: r-help-bounces at r-project.org
[mailto:r-help-bounces at r-project.org] On Behalf Of Nico Met
Sent: Monday, July 15, 2013 10:50 AM
To: R help
Subject: [R] t-test across columns

Dear all,

I would like to do t-test across two columns "name" with different
"cat"
with overall mean ("val").

(Removing if there is a single observation)

And finally, make a matrix with t-value and p-value associated with
a name
(in rows) and cat (in columns)

dput(x)
structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9"
), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label =
c("p178266580",
"p178269196", "p178316310", "p191287337", "p195158904",
"p196921846",
"p197427158", "p238921966"), class = "factor"), val = c(148.90772,
184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975,
177.453775, 184.799525, 212.39065, 205.504525, 186.152025,
194.337075,
193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261,
196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842,
196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686,
219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474,
210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407,
227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082,
232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649,
211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334,
249.357266666667, 239.98525, 234.508483333333, 243.865083333333,
233.595816666667, 248.1219, 225.289416666667, 248.220883333333,
193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496,
201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094,
201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038,
206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011,
158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146
)), .Names = c("name", "cat", "val"), class = "data.frame",
row.names = c(
NA,
97L))

Thanks

Nico

??? [[alternative HTML version deleted]]

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.