データ解析の前準備 in R
平均やばらつきをみたり,何か解析手法を適用する前にデータ自体をある程度把握しておきたい.
難しそうで,かっこいい手法やるよりも,まずはしっかりと基本的な特徴を眺めてみることも重要.
そこで,一般的にやることをRでさくっとやるためのテンプレート.
# single variant x = rnorm(n=100, mean=0, sd=3) # test data summary(x) # base statistics (mean, median, max, min) mean(x) # If you want to get a specific column data. length(x) # data size var(x) # un-biased variance sd(x) # un-biased standard deviation mean((mean(x) - x)^2) # sample variance plot(x, col="blue") # scatterplot hist(x, col="blue") # histogram # two variant err = rnorm(n=100, mean=0, sd=1.0) y = 4.0*x + err plot(x,y) # scatterplot cor(y,x) # corelation lm(y~x) # linear regression model # multi variant x1 = rnorm(n=100, mean=0, sd=3) # test data x2 = 3.0*x1 + rnorm(n=100, mean=0, sd=2) x3 = 3.0*x1 + 2.0*x2 + rnorm(n=100, mean=0, sd=2) z = data.frame(x1, x2, x3) cov(z) # co-variance matrix cor(z) # co-relation matrix paris(z) # scatterplot matrix # multi variant (categorical) boxplot sex = c(rep("M", 10), rep("F", 10)) # 10 mans and females math.score = c(rnorm(10,mean=80,sd=10), rnorm(10,mean=60,sd=5)) eng.score = c(rnorm(10,mean=60,sd=20), rnorm(10,mean=80,sd=5)) ave.score = (math.score+eng.score)/2.0 d = data.frame(sex, math.score, eng.score, ave.score) boxplot(d$math.score, names=c("MATH"), col="blue") # math socre box boxplot(d$math.score, d$eng.score, d$ave.score, # total score box names=c("MATH", "ENGLISH", "AVE."), col="blue") boxplot(d$math.score ~ sex, data=d, col=c("blue", "red"))