stringr包介绍
来源:互联网 发布:linux dd 备份u盘 编辑:程序博客网 时间:2024/05/18 18:43
1. case用法:
str_to_upper(string, locale = "")
str_to_lower(string, locale = "")
str_to_title(string, locale = "")
string为要处理的字符串;locale为要翻译的...。
> dog <- "The quick brown dog"> str_to_upper(dog)[1] "THE QUICK BROWN DOG"> str_to_lower(dog)[1] "the quick brown dog"> str_to_title(dog)[1] "The Quick Brown Dog"> str_to_upper("i", "en") # english[1] "I"> str_to_upper("i", "tr") # Turkish[1] "İ"
2. str_c的用法:str_c(..., sep = "", collapse = NULL)
... 为一组字符串向量;sep为插入字符串向量的字符串;collapse为把输入的字符串合并为单个字符串(默认没有)> str_c("Letter", letters, sep = ": ") [1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" [6] "Letter: f" "Letter: g" "Letter: h" "Letter: i" "Letter: j"[11] "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o"[16] "Letter: p" "Letter: q" "Letter: r" "Letter: s" "Letter: t"[21] "Letter: u" "Letter: v" "Letter: w" "Letter: x" "Letter: y"[26] "Letter: z"
> str_c(letters, collapse = "")[1] "abcdefghijklmnopqrstuvwxyz"> str_c(letters, collapse = ",")[1] "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z"> str_c(c("a", NA, "B"), "-d")[1] "a-d" NA "B-d"> str_c(str_replace_na(c("a", NA, "b")), "-d")
[1] "a-d" "NA-d" "b-d"
3. str_count的用法:
str_count(string, pattern = "")
string为字符串;pattern为寻找模式。
> fruit <- c("apple", "banana", "pear", "pineapple")> str_count(fruit, "a")[1] 1 3 1 1> str_count(fruit, "p")[1] 2 0 1 3> str_count(fruit, c("a","b","p","p")) # 对应每一个查找[1] 1 1 1 3> str_count(c("a.", "...", ".a.a"), ".") # 此处. 为正则表达式[1] 2 3 4> str_count(c("a.", "...", ".a.a"), fixed(".")) #fixed(".")为只查找.号,也可用"\\."[1] 1 3 2
4. str_detect的用法:
str_detect(string, pattern)
string与pattern如3.
> str_detect(fruit, "a") # 检测是否有a[1] TRUE TRUE TRUE TRUE> str_detect(fruit, "^a") # 检测字符串是否以a开头[1] TRUE FALSE FALSE FALSE> str_detect(fruit, "a$") # 检测字符串是否以a结尾[1] FALSE TRUE FALSE FALSE
5. str_extract/str_extract_all的用法:
str_extract(string, pattern)
str_extract_all(string, pattern, simplify = FALSE)
string,pattern如上;simplify:FALSE为返回字符串向量,TRUE为返回字符串矩阵。
> shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")> str_extract(shopping_list, "\\d") # \\d+ 更好一些 [1] "4" NA NA "2"
> str_extract(shopping_list, "[a-z]+")[1] "apples" "bag" "bag" "milk"
> str_extract(shopping_list, "[a-z]{1,4}")[1] "appl" "bag" "bag" "milk"> str_extract(shopping_list, "\\b[a-z]{1,4}\\b") # \\b 为边界 [1] NA "bag" "bag" "milk"> str_extract_all(shopping_list, "[a-z]+") #由此看出str_extract与str_extract_all的不同[[1]][1] "apples" "x" [[2]][1] "bag" "of" "flour"[[3]][1] "bag" "of" "sugar"[[4]][1] "milk" "x" > str_extract_all(shopping_list, "\\b[a-z]+\\b")[[1]][1] "apples"[[2]][1] "bag" "of" "flour"[[3]][1] "bag" "of" "sugar"[[4]]
> str_extract_all(shopping_list, "\\b[a-z]+\\b", simplify = TRUE) # 生成字符串矩阵 [,1] [,2] [,3] [1,] "apples" "" "" [2,] "bag" "of" "flour"[3,] "bag" "of" "sugar"[4,] "milk" "" "" > str_extract_all("This is, suprisingly, a sentence.", boundary("word"))# 以单词为边界[[1]][1] "This" "is" "suprisingly" "a" [5] "sentence"
6. str_match的用法:
str_match(string, pattern)
string与pattern用法如上。
> strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569",+ "387 287 6718", "apple", "233.398.9187 ", "482 952 3315",+ "239 923 8115 and 842 566 4692", "Work: 579-499-7527", "$1000",+ "Home: 543.355.3679")> strings [1] " 219 733 8965" "329-293-8753 " [3] "banana" "595 794 7569" [5] "387 287 6718" "apple" [7] "233.398.9187 " "482 952 3315" [9] "239 923 8115 and 842 566 4692" "Work: 579-499-7527" [11] "$1000" "Home: 543.355.3679" > phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" # 正则表达式的用法详见《正则表达式必知必会》> str_extract(strings, phone) [1] "219 733 8965" "329-293-8753" NA "595 794 7569" "387 287 6718" [6] NA "233.398.9187" "482 952 3315" "239 923 8115" "579-499-7527"[11] NA "543.355.3679"> str_match(strings, phone) [,1] [,2] [,3] [,4] [1,] "219 733 8965" "219" "733" "8965" [2,] "329-293-8753" "329" "293" "8753" [3,] NA NA NA NA [4,] "595 794 7569" "595" "794" "7569" [5,] "387 287 6718" "387" "287" "6718" [6,] NA NA NA NA [7,] "233.398.9187" "233" "398" "9187" [8,] "482 952 3315" "482" "952" "3315" [9,] "239 923 8115" "239" "923" "8115"[10,] "579-499-7527" "579" "499" "7527"[11,] NA NA NA NA [12,] "543.355.3679" "543" "355" "3679"
7. str_pad的用法:
str_pad(string, width, side = c("left", "right", "both"), pad = " ")
string为字符串;width为空格的最小宽度;side为空格字符填充的方向;pad为填充的单个字符,默认的为空格。
> rbind(+ str_pad("hadley", 30, "left"),+ str_pad("hadley", 30, "right"),+ str_pad("hadley", 30, "both")+ ) [,1] [1,] " hadley"[2,] "hadley "[3,] " hadley "
> rbind(+ str_pad("hadley", 30, "left", pad = "."),+ str_pad("hadley", 30, "right", pad = "."),+ str_pad("hadley", 30, "both", pad = ".")+ ) [,1] [1,] "........................hadley"[2,] "hadley........................"[3,] "............hadley............"
8. str_replace的用法:
str_replace(string, pattern, replacement)
string为字符串;pattern为要替换的内容,常为正则表达式;replacement为替换者。
> fruits <- c("one apple", "two pears", "three bananas")> str_replace(fruits, "[aeiou]", "-")[1] "-ne apple" "tw- pears" "thr-e bananas"> str_replace_all(fruits, "[aeiou]", "-")[1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
> str_replace(fruits, "([aeiou])", "")[1] "ne apple" "tw pears" "thre bananas"
> str_replace_all(str_c(fruits, collapse = "---"), c("one" = 1, "two" = 2, "three" = 3))##如果使用多个模式与替换作用于同一个字符串,可以把名称传递给模式 [1] "1 apple---2 pears---3 bananas"
9. str_split的用法:
str_split(string, pattern, n = Inf, simplify = FALSE)
str_split_fixed(string, pattern, n)
string为字符串;pattern为分离模式;n为分割为多少块;simplify:FALSE的时候返回字符串向量列表,为TRUE的时候返回字符串矩阵。
10. str_sub的用法:
str_sub(string, start = 1L, end = -1L)
string为字符串;start和end分别为开始和结束字符。
11. str_subset的用法:
str_subset(string, pattern)
string与pattern用法如上。
> fruit <- c("apple", "banana", "pear", "pinapple")> str_subset(fruit, "a")[1] "apple" "banana" "pear" "pinapple"> str_subset(fruit, "^a")[1] "apple"> str_detect(fruit, "^a") # 对比str_detect与str_subset的用法[1] TRUE FALSE FALSE FALSE> str_subset(fruit, "a$")[1] "banana"> str_subset(fruit, "b")[1] "banana"> str_subset(fruit, "[aeiou]")[1] "apple" "banana" "pear" "pinapple"
> hw <- "Hadley Wickham"> str_sub(hw, 1, 6)[1] "Hadley"> str_sub(hw, end = 6)[1] "Hadley"> str_sub(hw, 8, 14)[1] "Wickham"> str_sub(hw, c(1,8), c(6,14))[1] "Hadley" "Wickham"> str_sub(hw, -1)[1] "m"> str_sub(hw, -7)[1] "Wickham"
> x <- "BBCDEF"> str_sub(x, 1, 1)[1] "B"> str_sub(x, 1, 1) <- "A+ > str_sub(x, 1, 1) <- "A"> x[1] "ABCDEF"> str_sub(x, -1, -1) <- "K"> x[1] "ABCDEK"> str_sub(x, -2, -2) <- "GHIJ"; x[1] "ABCDGHIJK"> str_sub(x, 2, -2) <- ""; x[1] "AK"
> fruits <- c(+ "apples and oranges and pears and bananas",+ "pineapples and mangos and guavas"+ )> fruits[1] "apples and oranges and pears and bananas" "pineapples and mangos and guavas" > str_split(fruits, "and")[[1]][1] "apples " " oranges " " pears " " bananas" [[2]][1] "pineapples " " mangos " " guavas" > str_split(fruits, "and", simplify = TRUE) [,1] [,2] [,3] [,4] [1,] "apples " " oranges " " pears " " bananas"[2,] "pineapples " " mangos " " guavas" ""
> str_split(fruits, "and", n=3)[[1]][1] "apples " " oranges " " pears and bananas"[[2]][1] "pineapples " " mangos " " guavas"
> str_split(fruits, "and", n=5)[[1]][1] "apples " " oranges " " pears " " bananas" [[2]][1] "pineapples " " mangos " " guavas" > str_split_fixed(fruits, "and", 3) [,1] [,2] [,3] [1,] "apples " " oranges " " pears and bananas"[2,] "pineapples " " mangos " " guavas" > str_split_fixed(fruits, "and", 4) # n大于分割的字符串时,多余的用空字符串表示 [,1] [,2] [,3] [,4] [1,] "apples " " oranges " " pears " " bananas"[2,] "pineapples " " mangos " " guavas" ""
> str_subset(c("a", "b", NA), ".") # 自动去掉缺失值[1] "a" "b"
12. word的用法:
sep为单词之间的分隔符。
> sentences <- c("Jane saw a cat", "Jane sat down")> sentences[1] "Jane saw a cat" "Jane sat down" > word(sentences, 1)[1] "Jane" "Jane"> word(sentences, 2)[1] "saw" "sat"> word(sentences, -1)[1] "cat" "down"> word(sentences, 2, -1)[1] "saw a cat" "sat down" > word(sentences[1], 1:3, -1)[1] "Jane saw a cat" "saw a cat" "a cat" > word(sentences[1], 1, 1:4)[1] "Jane" "Jane saw" "Jane saw a" "Jane saw a cat"> str <- 'abc.def..123.4568.999'> word(str, 1, sep = fixed('..'))#提取分隔后的第一个[1] "abc.def"> word(str, 2, sep = fixed('..')) #提取分隔后的第二个[1] "123.4568.999"> word(str, 1, -1, sep = fixed('..'))[1] "abc.def..123.4568.999"
0 0
- stringr包介绍
- R----stringr包介绍学习
- Stringr包简介
- 正则表达式 stringr包
- 用stringr包处理字符串
- R语言-用stringr包处理字符串
- stringr包字符处理函数简介
- 【R语言 字符串处理】stringr 包的强大之处
- stringr数据处理
- Learning R---stringr
- Hibernates包介绍
- spring jar包介绍
- j2ee jar包介绍
- Libnids开发包介绍
- OpenGL开发包介绍
- JAVA3D包功能介绍
- Android包、ADB介绍
- Delphi中的包介绍
- 向量自回归与结构向量误差修正模型
- tidyr之gather、separate、spread…
- 博为峰Java技术文章 ——JavaSE Swing JTabbedPane选项卡面板I
- 图论
- dplyr包介绍
- stringr包介绍
- [Educational Codeforces Round 17 F (762F)] Tree nesting
- poj 2965 枚举+DFS
- windows使用sublime text3编译lua
- java设计模式之命令模式
- HDU 1513 Palindrome (动态规划 & LCS)
- Android Studio导入项目问题
- codevs 高低位交换 5641
- List comprehension