Skip to content
Prev 318544 / 398503 Next

Conditional Weighted Average (ddply or any other function)

Hi John,

The sample size is huge involving 10,000 + firms. I have put a
representative sample using dput ( Name, ticker and country have been
changed so that firms cannot be identified due to proprietary data
set, also EPS is not required and removed from the dataset)

structure(list(NAME = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 7L,
7L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("CCC", "CTAX",
"INN", "NOB", "SH", "SZ", "WASH", "WILLSON"), class = "factor"),
    Ticker = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
    8L, 8L, 8L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
    6L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("CC13",
    "CT56", "INN12", "NB12", "SH12", "SZ12", "W12", "W15"), class = "factor"),
    Industry = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
    4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L,
    5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Commercial &
Professional Serv",
    "Energy", "Media", "Retail", "Transportation"), class = "factor"),
    Sector = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Consumer
Discretionary",
    "Energy", "Industrials"), class = "factor"), Country = structure(c(4L,
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L,
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L), .Label = c("Brazil", "China", "India", "UK"), class =
"factor"),
    FISCALYEAR = structure(c(3L, 2L, 1L, 4L, 5L, 6L, 3L, 2L,
    1L, 4L, 5L, 6L, 3L, 2L, 1L, 4L, 5L, 6L, 3L, 3L, 2L, 1L, 4L,
    5L, 6L, 3L, 2L, 1L, 4L, 5L, 6L, 3L, 2L, 1L, 4L, 5L, 6L, 3L,
    2L, 1L, 4L, 5L, 6L, 3L, 2L, 1L, 4L, 5L, 6L), .Label = c("FY-1",
    "FY-2", "FY-3", "FY0", "FY1", "FY2"), class = "factor"),
    ROE = c(0.026, 0.0656, 0.1621, 0.1885, 0.1968, 0.2126, 0.0207,
    0.0319, 0.0963, 0.0431, 0.066, 0.066, 0.0707, 0.0797, 0.0781,
    0.078, 0.098, 0.126, 0.0352, 0.2625, 0.3714, 0.2929, 0.3133,
    0.2509, 0.2398, 0.2779, 0.1109, 0.0509, 0.069, 0.1017, 0.1298,
    0.5842, 0.3953, 0.4429, 0.3616, 0.26, 0.2, 0.4472, 0.2912,
    0.21, 0.2849, 0.3553, 0.4347, 0.3289, 0.3846, 0.2643, 0.0458,
    0.1265, 0.28), MKT = c(2919236084, 836858582, 2015182617,
    3399344971, 4324821777, 4324821777, 7619453125, 3579844727,
    4132238281, 3712239990, 2879757813, 2879757813, 1525237793,
    700357605, 1814942993, 1858225342, 1242890503, 1242890503,
    1879700000, 557093400, 224900300, 1634700000, 1443200000,
    3582664735, 3582664735, 5830366211, 10660833984, 9024061523,
    7628660645, 9154108398, 9154108398, 7064532227, 1804380005,
    6331067871, 10445639648, 9153587891, 9153587891, 6231200000,
    4.078e+09, 10107500000, 12460300000, 17800051556, 17800051556,
    513478700, 260993500, 882575400, 1.151e+09, 855938413, 855938413
    )), .Names = c("NAME", "Ticker", "Industry", "Sector", "Country",
"FISCALYEAR", "ROE", "MKT"), class = "data.frame", row.names = c(NA,
-49L))

Thanks,
Punit