x <- "1888 is the longest year in Roman numerals: MDCCCLXXXVIII" str_extract(x, "CC?") #> [1] "CC" str_extract(x, "CC+") #> [1] "CCC" str_extract(x, 'C[LX]+') #> [1] "CLXXX"