make_factors <- function(data, max_levels=15) {
# convert all columns in <data> that are not already factors
# and that have fewer than <max_levels> distinct values into factors.
# If the column is numeric, it becomes an ordered factor.
stopifnot(is.data.frame(data))
for(n in names(data)){
if(!is.factor(data[[n]]) &&
length(unique(data[[n]])) <= max_levels) {
data[[n]] <- if(!is.numeric(data[[n]])){
as.factor(data[[n]])
} else {
ordered(data[[n]])
}
}
}
data
}
# create dataset with one numeric column <foo> with few distinct entries
# and one character column <baz> with few distinct entries :
data <- iris
data <- within(data, {
foo <- round(iris[, 1])
baz <- as.character(foo)
})
table(data$foo)
## 4 5 6 7 8
## 5 47 68 24 6
str(data)
## 'data.frame': 150 obs. of 7 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ baz : chr "5" "5" "5" "5" ...
## $ foo : num 5 5 5 5 5 5 5 5 4 5 ...
str(make_factors(data))
## 'data.frame': 150 obs. of 7 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ baz : Factor w/ 5 levels "4","5","6","7",..: 2 2 2 2 2 2 2 2 1 2 ...
## $ foo : Ord.factor w/ 5 levels "4"<"5"<"6"<"7"<..: 2 2 2 2 2 2 2 2 1 2 ...