Hello
I have the following problem : group the lines of a dataframe when no
information change (Matricule, Nom, Sexe, DateNaissance, Contrat, Pays)
and when the value of Debut of lines i = value Fin of lines i-1
I can obtain it with a do loop. Is it possible to avoid the loop ?
The dataframe initial is df1
dput(df1)
structure(list(Matricule = c(1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L,
6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L), Nom = c("VERON", "VERON", "VERON", "BENARD",
"BENARD", "BENARD", "BENARD", "BENARD", "BENARD", "DALNIC", "DALNIC",
"DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI"), Sexe = c("F?minin", "F?minin", "F?minin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"F?minin", "F?minin", "F?minin", "F?minin", "F?minin", "F?minin",
"F?minin", "F?minin", "F?minin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin"), DateNaissance = c("02/09/1935",
"02/09/1935", "02/09/1935", "01/04/1935", "01/04/1935", "01/04/1935",
"01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
"19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940",
"19/02/1940", "19/02/1940", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961"), contrat = c("CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDD d?tach? ext. Cirad", "CDD d?tach? ext. Cirad",
"CDD d?tach? ext. Cirad", "CDD d?tach? ext. Cirad", "CDD d?tach? ext.
Cirad",
"CDD d?tach? ext. Cirad", "CDD d?tach? ext. Cirad", "CDD d?tach? ext.
Cirad",
"CDD d?tach? ext. Cirad", "CDI D?tach?s Autres", "CDI D?tach?s Autres",
"CDI D?tach?s Autres", "CDI D?tach?s Autres", "CDI D?tach?s Autres",
"CDI D?tach?s Autres", "CDI D?tach?s Autres", "CDI D?tach?s Autres",
"CDI D?tach?s Autres", "CDI D?tach?s Autres", "CDI D?tach?s Autres",
"CDI D?tach?s Autres", "CDI D?tach?s Autres", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun"), Pays = c("France", "France", "France", "Philippines",
"Philippines", "Philippines", "France", "France", "France", "France",
"France", "Martinique", "Martinique", "Martinique", "Martinique",
"Martinique", "Martinique", "Martinique", "Cameroun", "Cameroun",
"Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun",
"Cameroun", "France", "France", "France", "France", "France",
"France", "France", "Congo", "Congo", "Congo", "Congo", "Congo",
"Congo", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon",
"Congo", "Congo"), Debut = c("24/01/1995", "01/05/1997", "31/12/1997",
"02/02/1995", "28/02/1995", "01/03/1995", "13/03/1995", "01/01/1996",
"31/01/1996", "24/01/1995", "01/07/1995", "01/09/1995", "01/07/1997",
"01/01/1998", "01/08/1998", "01/01/2000", "17/01/2000", "29/02/2000",
"26/01/1995", "01/07/1996", "16/09/1997", "01/01/1998", "01/07/1998",
"04/11/1999", "01/01/2001", "01/04/2001", "31/08/2001", "01/09/2001",
"02/09/2001", "01/12/2001", "01/02/2003", "01/04/2003", "01/01/2004",
"01/03/2004", "01/09/2004", "01/01/2005", "01/04/2005", "28/10/2006",
"01/01/2007", "01/04/2007", "01/09/2007", "01/01/2009", "01/04/2009",
"01/01/2010", "01/01/2011", "01/04/2011", "05/09/2012", "01/01/2013"
), Fin = c("30/04/1997", "30/12/1997", "31/12/1997", "27/02/1995",
"28/02/1995", "12/03/1995", "30/06/1995", "30/01/1996", "31/01/1996",
"30/06/1995", "31/08/1995", "30/06/1997", "31/12/1997", "31/07/1998",
"31/12/1999", "16/01/2000", "28/02/2000", "29/02/2000", "30/06/1996",
"15/09/1997", "31/12/1997", "30/06/1998", "03/11/1999", "31/12/2000",
"31/03/2001", "30/08/2001", "31/08/2001", "01/09/2001", "30/11/2001",
"31/01/2003", "31/03/2003", "31/12/2003", "29/02/2004", "31/08/2004",
"31/12/2004", "31/03/2005", "27/10/2006", "31/12/2006", "31/03/2007",
"31/08/2007", "31/12/2008", "31/03/2009", "31/12/2009", "31/12/2010",
"31/03/2011", "04/09/2012", "31/12/2012", "31/12/4712")), .Names =
c("Matricule",
"Nom", "Sexe", "DateNaissance", "contrat", "Pays", "Debut", "Fin"
), class = "data.frame", row.names = c(NA, -48L))
The dataframe to be obtained is df2
dput(df2)
structure(list(Mat = c(1L, 6L, 6L, 6L, 8L, 8L, 934L, 934L, 934L,
934L, 934L), Nom = c("VERON", "BENARD", "BENARD", "BENARD", "DALNIC",
"DALNIC", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI"), Sexe = c("F?minin",
"Masculin", "Masculin", "Masculin", "F?minin", "F?minin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin"), DateNaissance =
c("02/09/1935",
"01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961"
), contrat = c("CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDD d?tach? ext. Cirad", "CDI D?tach?s
Autres",
"CDI D?tach?s Autres", "CDI commun", "CDI commun"), Pays = c("France",
"Philippines", "France", "France", "France", "Martinique", "Cameroun",
"France", "Congo", "Gabon", "Congo"), Debut = c("24/01/1995",
"02/02/1995", "13/03/1995", "01/01/1996", "24/01/1995", "01/09/1995",
"26/01/1995", "01/09/2001", "01/09/2004", "01/09/2007", "05/09/2012"
), Fin = c("31/12/1997", "12/03/1995", "30/06/1995", "31/01/1996",
"31/08/1995", "29/02/2000", "31/08/2001", "31/08/2004", "31/08/2007",
"04/09/2012", "31/12/4712")), .Names = c("Mat", "Nom", "Sexe",
"DateNaissance", "contrat", "Pays", "Debut", "Fin"), class =
"data.frame", row.names = c(NA,
-11L))
Thank you for your help