> Formatting strings > Detecting matches
Working with text data in R # Format numbers with sprintf()
sprintf("%.3e", pi) # "3.142e+00"
# Highlight string matches in HTML widget with str_view_all()
str_view_all(suits, "[ae]")
Clubs
# Substitute value in a string with an expression
Diamonds
Learn R online at www.DataCamp.com glue('The answer is {ans}', ans = 30 + 10) # The answer is 40
Hearts
Spades
# Substitute value in a string with an expression
cards <- data.frame(value = c("8", "Queen", "Ace"),
# Detect if a regex pattern is present in strings with str_detect()
suit = c("Diamonds", "Hearts", "Spades"))
str_detect(suits, "[ae]") # FALSE TRUE TRUE TRUE
> Packages to install for this cheat sheet cards %>% glue_data("{value} of {suit}")
# Find the index of strings that match a regex with str_which()
str_which(suits, "[ae]") # 2 3 4
# 8 of Diamonds
# Queen of Hearts
Some functionality from this cheat sheet comes with base-R, but the following packages are also used # Count the number of matches with str_count()
throughout this cheat sheet. # Ace of Spades
str_count(suits, "[ae]") # 0 1 2 2
library(stringr)
# Wrap strings across multiple lines
# Locate the position of matches within strings with str_locate()
library(snakecase)
str_wrap('The answer to the universe is 42', width = 25)
str_locate(suits, "[ae]")
library(glue) # The answer to the
# start end
# universe is 42 # [1,] NA NA
Functions with names starting str_ are from stringr; those with names starting to_ are from snakecase;
those with glue in the name are from glue. # [2,] 3 3
# [3,] 2 2
# [4,] 3 3
> Splitting strings
> Example data > Extracting matches
# Split strings into list of characters with str_split(pattern = "")
Throughout this cheat sheet, we’ll be using this vector containing the following strings. str_split(suits, pattern = "")
# Extract matches from strings with str_extract()
suits <- c("Clubs", "Diamonds", "Hearts", "Spades")
# "C" "l" "u" "b" "s"
str_extract(suits, ".[ae].") # NA "iam" "Hea" "pad"
# "D" "i" "a" "m" "o" "n" "d" "s"
# "H" "e" "a" "r" "t" "s"
# Extract matches and capture groups with str_match()
str_match(suits, ".([ae])(.)")
> Get string lengths and substrings
# "S" "p" "a" "d" "e" "s"
# Split strings by a separator with str_split()
# [,1] [,2] [,3]
str_split(suits, pattern = "a")
# [1,] NA NA NA
# Get the number of characters with nchar()
# [2,] "iam" "a" "m"
nchar(suits) # Returns 5 8 6 6
# [3,] "Hea" "e" "a"
# "Clubs"
# [4,] "pad" "a" "d"
# Get substrings by position with str_sub()
# "Di" "monds"
stringr::str_sub(suits, 1, 4) # Returns "Club" "Diam" "Hear" "Spad"
# "He" "rts"
# Get subset of strings that match with str_subset()
# "Sp" "des"
str_subset(suits, "d") # "Diamonds" "Spades"
# Remove whitespace from the start/end with str_trim()
str_trim(" Lost in Whitespace ") # Returns "Lost in Whitespace"
# Split strings into matrix of n pieces with str_split_fixed()
str_split_fixed(suits, pattern = 'a', n = 2)
> Replacing matches
# Truncate strings to a maximum width with str_trunc()
str_trunc(suits, width = 5) # Returns "Clubs" "Di..." "He..." "Sp..."
# [,1] [,2]
# [1,] "Clubs" ""
# Pad strings to a constant width with str_pad()
# [2,] "Di" "monds"
str_pad(suits, width = 8) # Returns " Clubs" "Diamonds" " Hearts" " Spades"
# Replace a regex match with another string with str_replace()
# [3,] "He" "rts"
str_replace(suits, "a", "4") # "Clubs" "Di4monds" "He4rts" "Sp4des"
# Pad strings on right with str_pad(side="right")
# [4,] "Sp" "des"
str_pad(suits, width = 8, side = "right", pad = "!")
# Remove a match with str_remove()
# Returns "Clubs!!!" "Diamonds" "Hearts!!" "Spades!!"
str_remove(suits, "s") # "Club" "Diamond" "Heart" "Spade"
> Joining or concatenating strings # Replace a substring with `str_sub<-`()
str_sub(suits, start = 1, end = 3) <- c("Bi", "Al", "Yu", "Hi")
suits # Returns "Bibs" "Almonds" "Yurts" "Hides"
> Changing case # Combine two strings with paste0()
paste0(suits, '5') # "Clubs5" "Diamonds5" "Hearts5" "Spades5"
# Convert to lowercase with tolower()
# Combine strings with a separator with paste()
tolower(suits) # Returns "clubs" "diamonds" "hearts" "spades"
paste(5, suits, sep = " of ") # "5 of Clubs" "5 of Diamonds" "5 of Hearts" "5 of
Spades"
# Convert to uppercase with toupper()
toupper(suits) # Returns "CLUBS" "DIAMONDS" "HEARTS" "SPADES"
Learn R Online at
# Collapse character vector to string with paste() or paste0()
paste(suits, collapse = ", ") # "Clubs, Diamonds, Hearts, Spades"
www.DataCamp.com
# Convert to title case with to_title_case()
to_title_case("hello, world!") # Returns "Hello, World!"
# Duplicate and concatenate strings with str_dup()
# Convert to sentence case with to_sentence_case()
str_dup(suits, 2) # "ClubsClubs" "DiamondsDiamonds" "HeartsHearts"
to_sentence_case("hello, world!") # Returns "Hello, world!" "SpadesSpades"