Hello.
Now i tried to do what you told me.
I used the str(fuction), and data$date1 and data3$date1 where both listed
"character". I changed "name" to character but it did not work either.
I also changed all variables to character, with no positive result.
str(data)
'data.frame': 14446 obs. of 15 variables:
$ id : chr "1" "1" "1" "1" ...
$ compid : chr "2514" "2514" "2514" "2514" ...
$ secid : chr "15856" "15856" "15856" "15856" ...
$ name : chr "A-pressen" "A-pressen" "A-pressen" "A-pressen" ...
$ period : chr "1" "2" "3" "4" ...
$ date : chr "17.05.1980" "17.05.1981" "17.05.1982" "17.05.1983" ...
$ enddate: chr "17.05.1981" "17.05.1982" "17.05.1983" "17.05.1984" ...
$ div : chr NA NA NA NA ...
$ ndivs : chr NA NA NA NA ...
$ posdiv : chr NA NA NA NA ...
$ ddiv2 : chr NA NA NA NA ...
$ ddiv3 : chr NA NA NA NA ...
$ ddiv4 : chr NA NA NA NA ...
$ ddiv5 : chr NA NA NA NA ...
$ ddiv6 : chr NA NA NA NA ...
str(data3)
'data.frame': 812354 obs. of 9 variables:
$ date : chr "02.01.1996" "03.01.1996" "04.01.1996"
"05.01.1996" ...
$ Securityid : chr "6001" "6001" "6001" "6001" ...
$ Symbol : chr "AAV" "AAV" "AAV" "AAV" ...
$ name : chr "Adresseavisen" "Adresseavisen"
"Adresseavisen" "Adresseavisen" ...
$ Securitytype : chr "Ordinary Shares" "Ordinary Shares"
"Ordinary Shares" "Ordinary Shares" ...
$ Unadjusted : chr "200" "200" "200" "200" ...
$ Event.adjusted : chr "200" "200" "200" "200" ...
$ Div.and.Event.adjusted: chr "109,7595375" "109,7595375" "109,7595375"
"109,7595375" ...
$ Sharesissued : chr "1901646" "1901646" "1901646" "1901646" ...
Here is some suitable data for "data"
structure(list(id = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"),
compid = c("2514", "2514", "2514", "2514", "2514", "2514",
"2514", "2514", "2514", "2514", "2514", "2514", "2514", "2514",
"2514", "2514", "2514", "2514", "2514", "2514"), secid = c("15856",
"15856", "15856", "15856", "15856", "15856", "15856", "15856",
"15856", "15856", "15856", "15856", "15856", "15856", "15856",
"15856", "15856", "15856", "15856", "15856"), name = c("A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen"), period = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20"), date = c("17.05.1980",
"17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984", "17.05.1985",
"17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989", "17.05.1990",
"17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994", "17.05.1995",
"17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999"),
enddate = c("17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984",
"17.05.1985", "17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989",
"17.05.1990", "17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994",
"17.05.1995", "17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999",
"17.05.2000"), div = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
"0", "0", "0", "0", "0", "5", "0", "1.1", "1.2", "1", "0"
), ndivs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0",
"0", "0", "0", "1", "0", "1", "1", "1", "0"), posdiv = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "1",
NA, "1", "1", "1", NA), ddiv2 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, "0", "0", "0", "0", "0", NA, "0", "1", NA,
NA), ddiv3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"0", "0", "0", "0", "0", "0", "0", "0", "-1"), ddiv4 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0", "0",
"0", "0", "0", "0", "0"), ddiv5 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "0", "0", "0", "0", "0", "0",
"0"), ddiv6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0", "0", "0", "0", "0", "0")), .Names = c("id",
"compid", "secid", "name", "period", "date", "enddate", "div",
"ndivs", "posdiv", "ddiv2", "ddiv3", "ddiv4", "ddiv5", "ddiv6"
), row.names = c(NA, 20L), class = "data.frame")
Here is some suitable data for "data3":
structure(list(date = c("02.01.1996", "03.01.1996", "04.01.1996",
"05.01.1996", "08.01.1996", "09.01.1996", "10.01.1996", "11.01.1996",
"12.01.1996", "15.01.1996", "16.01.1996", "17.01.1996", "18.01.1996",
"19.01.1996", "22.01.1996", "23.01.1996", "24.01.1996", "25.01.1996",
"26.01.1996", "29.01.1996"), Securityid = c("6001", "6001", "6001",
"6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001",
"6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001",
"6001"), Symbol = c("AAV", "AAV", "AAV", "AAV", "AAV", "AAV",
"AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV",
"AAV", "AAV", "AAV", "AAV", "AAV"), name = c("Adresseavisen",
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen",
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen",
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen",
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen",
"Adresseavisen", "Adresseavisen", "Adresseavisen"), Securitytype =
c("Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares"), Unadjusted =
c("200",
"200", "200", "200", "200", "200", "200", "200", "200", "200",
"200", "200", "200", "200", "200", "200", "200", "200", "200",
"200"), Event.adjusted = c("200", "200", "200", "200", "200",
"200", "200", "200", "200", "200", "200", "200", "200", "200",
"200", "200", "200", "200", "200", "200"), Div.and.Event.adjusted =
c("109,7595375",
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375",
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375",
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375",
"109,7595375", "109,7595375", "109,7595375", "109,7595375"),
Sharesissued = c("1901646", "1901646", "1901646", "1901646",
"1901646", "1901646", "1901646", "1901646", "1901646", "1901646",
"1901646", "1901646", "1901646", "1901646", "1901646", "1901646",
"1901646", "1901646", "1901646", "1901646")), .Names = c("date",
"Securityid", "Symbol", "name", "Securitytype", "Unadjusted",
"Event.adjusted", "Div.and.Event.adjusted", "Sharesissued"), row.names =
c(NA,
20L), class = "data.frame")
When I run the function:
data4<-merge(data,data3, by=c("name","date1"), all=T)
structure(list(name = c("A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen",
"A-pressen", "A-pressen"), date = c("01.02.1999", "01.02.2000",
"01.02.2001", "01.02.2002", "01.03.1999", "01.03.2000", "01.03.2001",
"01.03.2002", "01.04.2003", "01.06.1999", "01.06.2001", "01.07.1999",
"01.07.2002", "01.07.2003", "01.08.2000", "01.08.2001", "01.08.2002",
"01.08.2003", "01.09.1999", "01.09.2000"), id = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
compid = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), secid = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), period = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), enddate = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), div = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ndivs = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), posdiv = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ddiv2 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ddiv3 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ddiv4 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ddiv5 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), ddiv6 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Securityid = c("15856",
"15856", "15856", "15856", "15856", "15856", "15856", "15856",
"15856", "15856", "15856", "15856", "15856", "15856", "15856",
"15856", "15856", "15856", "15856", "15856"), Symbol = c("APR",
"APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR",
"APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR",
"APR"), Securitytype = c("Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares",
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares"),
Unadjusted = c("120", "140", "160", "105", "110", "190",
"160", "112", "115", "120", "150", "127", "106,5", "154",
"155", "160", "111", "155", "127", "205"), Event.adjusted = c("120",
"140", "160", "105", "110", "190", "160", "112", "115", "120",
"150", "127", "106,5", "154", "155", "160", "111", "155",
"127", "205"), Div.and.Event.adjusted = c("111,4092308",
"129,9774359", "148,545641", "100,1538462", "102,1251282",
"176,3979487", "148,545641", "106,8307692", "109,6923077",
"111,4092308", "143,0769231", "117,9081026", "101,5846154",
"154", "143,9035897", "152,6153846", "105,8769231", "155",
"117,9081026", "190,3241026"), Sharesissued = c("8839643",
"8839643", "8854307", "8866191", "8839643", "8839643", "8854307",
"8867791", "8885537", "8839643", "8866191", "8839643", "8885537",
"8903842", "8854307", "8866191", "8885537", "8903842", "8839643",
"8854307")), .Names = c("name", "date", "id", "compid", "secid",
"period", "enddate", "div", "ndivs", "posdiv", "ddiv2", "ddiv3",
"ddiv4", "ddiv5", "ddiv6", "Securityid", "Symbol", "Securitytype",
"Unadjusted", "Event.adjusted", "Div.and.Event.adjusted", "Sharesissued"
), row.names = c(NA, 20L), class = "data.frame")
If I want to just keep the observations from "data" (14446 observations)
merged with "data3". Is it correct to use:
data4<-merge(data,data3, by=c("name","date1"), all=T)
or should I use:
data4<-merge(data,data3, by=c("name","date1"), all.x=F)?
(Then I get ca 14000 obs, but "NA" in all variables from "data3".
In advance, thank you.
--
View this message in context: http://r.789695.n4.nabble.com/merging-two-dataframes-tp3932869p3940157.html
Sent from the R help mailing list archive at Nabble.com.