R语言数据预处理
筛选、排序、分段、拆分、分组、合并、拼接、转置、索引、重塑
筛选
1 用which来筛选符合条件的值
wage2_3 <- wage2_1[which(wage2_1$female==1),]
wage2_3
wage2_4 <- wage2_1[which(wage2_1$female==0),]
wage2_4
#2 用subset来筛选符合条件的值
wage2_5_male <- subset(wage2_1,female == ‘0’,select = wage:married)
wage2_5_male
wage2_6_female <- subset(wage2_1,female == ‘1’,select = wage:married)
wage2_6_female
3 用dplyr 中的管道函数来筛选符合条件的值
library(dplyr)
wage2_7_male<- wage2_1 %>%
group_by(female) %>%
filter(female == 0)
library(haven)
WAGE1_DTA <- read_dta(“C:\Users\13886\Desktop\鏌ヨ\R\WAGE1.DTA.dta”)
View(WAGE1_DTA)
wage2 <- data.frame(wage = WAGE1_DTA$wage,
educ = WAGE1_DTA$educ,
exper = WAGE1_DTA$exper,
tenture = WAGE1_DTA$tenure,
female = WAGE1_DTA$female,
married = WAGE1_DTA$married)
wage2
library(dplyr)
wage2 %>% separate(female, c(“feamale”, “male”))
wage2_male<- wage2 %>%
group_by(female) %>%
filter(female == 0)
wage2_female<- wage2 %>%
group_by(female) %>%
filter(female == 1 )
wage2_male
wage2_female
wage3 <- log(wage2)
wage3
wage4 <- rbind(wage2,wage3)
wage4
wage5 <- cbind(wage2,wage3)
wage5
wage2_male_1<- wage2_male %>%
group_by(exper) %>%
filter(exper >20 & exper<40 )
wage2_male_1
a<- wage2 %>%
group_by(exper) %>%
filter(exper > 20 )
a
b <- wage2_male %>%
group_by(exper) %>%
filter(exper < 40 & exper >20)
b
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
查找与修改
查找
df
df[2,]
df[,4]
df$Chinese
df[3]
df[which(df$ID==4),]
df[3,4]
df[3,‘Math’]
df[which(df$Chinese == 57), ‘Math’]
df[which(df$Class ==2),‘English’]
修改行或者列
修改某一行或列
df[1,] <- c(1,2,65,59,73)
df[,‘English’] <- c(23,45,67,87,34,46,87,95,43,76,23,94)
df
修改某一个值 直接将需要修改后的值
赋值给上述查询某个值的操做即可
df[which(df$Chinese <20), ‘Chinese’] <-20
df
修改行列名
可用rownames()及colnames()得到数据框的行列名,
rownames(data.frame)[行号] 或 colnames(data.frame)[列号]
可得到指定位置的行名或者列名
若修改直接赋值给该变量即可
colnames(df)
colnames(df)[4]
colnames(df)[4] <- ‘math’
colnames(df) <- c(“ID”,“Class”,“Chinese”,“Math”,“English”)
colnames(df)
删除
删除行或列,仅需要选出该数据框的部分行或列,然后将其赋给该变量即可,
其中在列号或行号前添加-表示不选该行或该列
在这里,为了方便接下来的操作,我们将选出后的数据框赋给其他变量
要实现删除操作应当将选出后的数据框赋给自己
df.tmp <- df[, c(1,3,5)]
df.tmp
df.tmp <- df[-3,]
df.tmp
添加
添加行 data.frame [新行号,] <- 行值
df[13,] <- c(13,2,62,19,38)
df
df <- df[c(1,1:12),]
df
df <- df[rep(1:12,each = 2),]
df
添加列 data.frame$ 新列名 <- 列名
df$Physics <- c(23,34,67,23,56,67,78,23,54,56,67,34,50)
df
df[,7] <- c(1:13)
df
mutate(df,Chemistry = Chinese Math English Physics)
dplyr 包常用函数
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
df
arrange()排序
arrange(df,Chinese)
arrange(df,Chinese,Math)
arrange(df,desc(Chinese))
distinct()函数 去重
df1 <- df[rep(1:nrow(df),each = 2),]
df1
df1 <- distinct(df1)
df1
group_by()函数分组 summarise()函数概括
group_by()与summarise()函数常连用,
用于对不同的分组进行操作,在这里再介绍一个管道函数“%>%”,
其作用是把左件的值发送给右件的表达式,
并作为右件表达式函数的第一个参数
df %>%
group_by(Class) %>%
summarise(max = max(Chinese))
#filter()函数 筛选
filter(.data, …, .preserve = FALSE)
选出符合条件的行(返回数据框格式)
df %>%
group_by(Class) %>%
filter(Chinese == max(Chinese))
select()函数 选择
select(df,ID,Chinese,Math,English)
rbind()函数与cbind()函数 合并
df1 <- data.frame(ID = 13, Class = 2,
Chinese = 65, Math = 26, English = 84)
df1
rbind(df,df1)
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
dplyr 包常用函数
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
df
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhggjbeb
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
TikTok加速器哪个好免费的TK加速器推荐
TK小达人 10-01 -
怎样阻止微信小程序自动打开
PHP中文网 06-13