structure(list(class.1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 0L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 0L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L), class.2 = c(2L,
2L, 2L, 2L, 0L, 0L, 2L, 0L, 0L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
2L, 0L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), class.3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 2L, 1L, 2L, 0L, 2L, 2L, 0L, 2L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 1L, 0L, 0L, 2L, 2L, 2L, 2L, 2L,
0L, 2L, 2L, 1L, 0L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L), terms = structure(c(9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 69L, 69L, 69L, 69L, 69L, 40L, 40L, 40L, 40L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 98L, 98L, 98L, 98L, 98L,
98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 23L, 87L, 87L, 87L,
87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L,
87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L,
87L, 87L), .Label = c("#accountability",
"#accountability,#anonymity,anonymity",
"#accountability,recovery", "#anonymity,anonymity",
"#anonymous,anonymous",
"#attacker,security", "#authentication,access control", "#confidential",
"#dac", "#encryption,#privacy,#security", "#identifier",
"#identifier,identifier",
"#intrusion,#security,security", "#mac", "#mac,#security",
"#mac,password",
"#mac,security", "#password,privacy", "#password,security",
"#prevention,prevention",
"#privacy,#security,password", "#privacy,identifiable",
"#privacy,information privacy,privacy",
"#privacy,intrusion", "#privacy,location privacy,privacy",
"#privacy,password,security",
"#privacy,personal data", "#privacy,personal information,privacy",
"#privacy,security", "#pseudonym", "#pseudonymity",
"#security,authentication,identity management",
"#security,identity management,security", "#security,mac,security",
"#security,malicious,security", "#security,personal information",
"#security,retention", "#token", "#token,token",
"accountability,anonymous",
"accountability,audit trail", "accountability,confidential",
"accountability,security", "accountability,token", "adversary,pin",
"anonymity,authentication", "anonymity,security",
"anonymous,disclosure",
"anonymous,password", "authentication,password,security",
"authorization,mac",
"authorization,permission", "confidential,disclosure",
"confidential,disclosure,security",
"confidential,mac", "confidential,personal information",
"confidential,pin",
"confidential,privilege", "confidentiality,security", "consent",
"dac", "dac,pcm", "data aggregation,privacy", "data controller",
"data protection,encryption", "data protection,recovery", "data
protection,security",
"data quality,security", "data security,encryption,security",
"data security,mac,security", "data security,personal data,security",
"data security,prevention,security", "detection", "detection,mac",
"detection,password", "deterrence,prevention", "digital signature",
"disclosure,password", "disclosure,private information",
"disclosure,security",
"encryption,password,recovery", "encryption,private data", "id
management,privacy",
"id management,security", "identifier", "identifier,token", "location
privacy,privacy",
"mac,password,security", "mac,permission", "mac,prevention",
"mac,privacy", "mac,pseudonym", "malicious,prevention",
"non-repudiation",
"password,prevention,security", "password,private information",
"password,recovery", "password,user id", "permission,personal data",
"permission,privacy,privacy policy", "personal data", "personal
identification number,pin",
"personal information", "personal information,security", "prevention",
"prevention,privilege", "privacy,privacy policy", "privacy,privacy
preferences",
"private information,security", "recovery,retention", "recovery,token",
"retention,token", "sensitive data", "token"), class = "factor")),
.Names
= c("class.1",
"class.2", "class.3", "terms"), row.names = c(NA, 100L), class =
"data.frame")
On Mon, May 25, 2015 at 2:04 PM, John Kane <jrkrideau at inbox.com> wrote:
Hi Mohammad,
If you are just starting with R a sense of total confusion is often the
first feeling. Welcome :).
If you are a SAS or SPSS user this may help
https://science.nature.nps.gov/im/datamgmt/statistics/r/documents/r_for_sas_spss_users.pdf
[
https://science.nature.nps.gov/im/datamgmt/statistics/r/documents/r_for_sas_spss_users.pdf
]
If anything, I am even more lost than before.
Did Jim Lemon's approach help? Confuse ?
Perhaps one of the problems is that the data did not come through
cleanly. You posted in HTML and the R-help list strips out all HTML so
the
result often is mangled beyond any real use.
I may have imagined that your data are more complicated than they
really
are if all you really want is some kind of frequency count possibly by
some
conditioning variable. Is this it?
It seems too simple but that is what I read that Excel is doing (as
incompetently as usual---I had not realised it was possible to be even
less
impressed with Excel than I already was.)
Can you send us some more data in dput() format. See the links I
provided
earlier or have a look at ?dput for more information.
If you have lot of data, a representative sample is fine. It is often
enough to do something like :
dput(head(mydata, 100))
which supplies 100 rows of data.
Just output the dput() data, copy and paste into your email, et voil?
we have the exact same data.
The reason for dput() is that it provides a snapshot of exactly how the
data exists on your machine. Given all sorts of differences between
OS's,
personal settings, human languages and so on. what I or another R-help
reader see or read in may not correspond to what you have. Using dput()
avoids all of this.
Here is a simple example of what I mean. If you look at dat1 and dat2
they 'look' the same but ... I could read in data either way depending
on
all sorts of variable and have no idea which, if either is how you see
the
data.
Data are supplied in dput() format, just copy and paste into R.
=====
dat1 <- structure(list(aa = structure(1:10, .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10"), class = "factor"), bb = c(10L,
9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L)), .Names = c("aa", "bb"), row.names
=
c(NA,
-10L), class = "data.frame")
dat2 <- structure(list(aa = 1:10, bb = c(10L, 9L, 8L, 7L, 6L, 5L, 4L,
3L, 2L, 1L)), .Names = c("aa", "bb"), row.names = c(NA, -10L), class =
"data.frame")
dat1
dat2 # looks a lot like dat1
with(dat1, aa*bb)
with(dat2 , aa*bb)
str(dat1)
str(dat2)
=======
John Kane
Kingston ON Canada
-----Original Message-----
From: mxalimohamma at ualr.edu
Sent: Mon, 25 May 2015 12:14:46 -0500
To: jrkrideau at inbox.com
Subject: Re: [R] Problem with comparing multiple data sets
Hi John.
Thank you for your response.
Here is a small portion of my actual data set. What I am supposed to do
is to use a function similar to mode function in excel to find the most
frequent value (class) for each term.
V1 V2 V3 V4
1 class 1 class 2 class 3 terms
2 0 2 0 #dac
3 0 2 0 #dac
4 0 2 0 #dac
5 0 2 0 #dac
6 1 0 1 #dac
7 0 0 0 #dac
....
Since I just started using R. I don't know where I am going with this.
I
appreciate any help.
On Sat, May 23, 2015 at 8:23 AM, John Kane <jrkrideau at inbox.com> wrote:
Hi Mohammad
Welcome to the R-help list.
There probably is a fairly easy way to what you want but I think we
probably need a bit more background information on what you are trying
to
achieve. I know I'm not exactly clear on your decision rule(s).
It would also be very useful to see some actual sample data in useable
R
format.Have a look at these links
http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example
[
http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example]
[
http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example
[
http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example]]
and http://adv-r.had.co.nz/Reproducibility.html [
http://adv-r.had.co.nz/Reproducibility.html] [
http://adv-r.had.co.nz/Reproducibility.html [
http://adv-r.had.co.nz/Reproducibility.html]] for some hints on what you
might want to include in your question.
In particular, read up about dput() in those links and/or see ?dput.
This is the generally preferred way to supply sample or illustrative
data
to the R-help list. It basically creates a perfect copy of the data as
it
exists on 'your' machine so that R-help readers see exactly what you do.
John Kane
Kingston ON Canada