Check if there are duplicate rows in a dataframe based on grouped columns
check_duplicates.Rd
Check if there are duplicate rows in a dataframe based on grouped columns
Examples
# Load the dplyr package
library(dplyr)
#>
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#>
#> filter, lag
#> The following objects are masked from ‘package:base’:
#>
#> intersect, setdiff, setequal, union
# Create a sample data frame
data_df <- data.frame(
ID = c(1, 1, 2, 3, 4),
Age = c(25, 25, 30, 40, 50),
Income = c(50000, 50000, 60000, 70000, 80000)
)
# Call the function to check for duplicates in the entire data frame
check_duplicates(data_df)
#> [1] "Duplicates Exist:"
#> ID Age Income
#> 1 1 25 50000
# Call the function to check for duplicates within the "ID" column
check_duplicates(data_df, group_by_vars = "ID")
#> [1] "Duplicates Exist:"
#> ID Age Income
#> 1 1 25 50000
# Call the function to check for duplicates within the "ID" and "Age" columns
check_duplicates(data_df, group_by_vars = c("ID", "Age"))
#> [1] "Duplicates Exist:"
#> ID Age Income
#> 1 1 25 50000