Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR R

detect factors in r

make_factors <- function(data, max_levels=15) {
    # convert all columns in <data> that are not already factors
    # and that have fewer than <max_levels> distinct values into factors. 
    # If the column is numeric, it becomes an ordered factor.

    stopifnot(is.data.frame(data))
    for(n in names(data)){
        if(!is.factor(data[[n]]) && 
                length(unique(data[[n]])) <= max_levels) {
            data[[n]] <- if(!is.numeric(data[[n]])){
                 as.factor(data[[n]])
            } else {
                 ordered(data[[n]])
            }    
        }
    }
    data
}


# create dataset with one numeric column <foo> with few  distinct entries 
# and one character column <baz> with few  distinct entries :
data <- iris
data <- within(data, {
     foo <- round(iris[, 1])
     baz <- as.character(foo)
})   


table(data$foo)

## 4  5  6  7  8 
## 5 47 68 24  6 

str(data)

## 'data.frame':    150 obs. of  7 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ baz         : chr  "5" "5" "5" "5" ...
##  $ foo         : num  5 5 5 5 5 5 5 5 4 5 ...

str(make_factors(data))

## 'data.frame':    150 obs. of  7 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ baz         : Factor w/ 5 levels "4","5","6","7",..: 2 2 2 2 2 2 2 2 1 2 ...
##  $ foo         : Ord.factor w/ 5 levels "4"<"5"<"6"<"7"<..: 2 2 2 2 2 2 2 2 1 2 ...
Source by stackoverflow.com #
 
PREVIOUS NEXT
Tagged: #detect #factors
ADD COMMENT
Topic
Name
7+7 =