R 实现朴素贝叶斯算法
朴素贝叶斯分类算法是一种常用的分类要领,应用很是遍及,譬如垃圾邮件判定,电子商务反作弊(作弊卖家等等)。
# 数据集来自Tom Mitchell’s book “Machine
Learning”.
#界说数据矩阵matrix,matrix(vector, nrow=r, ncol=c,
byrow=logical_value,
dimnames=list(char_vector_rownames, char_vector_colnames))
#nrow暗示行数
#ncol暗示列数
#byrow暗示矩阵组织方法,按行可能按列
#dimnames暗示行标识,列标识
data <-
matrix(c(“sunny”,”hot”,”high”,”weak”,”no”,
“sunny”,”hot”,”high”,”strong”,”no”,
“overcast”,”hot”,”high”,”weak”,”yes”,
“rain”,”mild”,”high”,”weak”,”yes”,
“rain”,”cool”,”normal”,”weak”,”yes”,
“rain”,”cool”,”normal”,”strong”,”no”,
“overcast”,”cool”,”normal”,”strong”,”yes”,
“sunny”,”mild”,”high”,”weak”,”no”,
“sunny”,”cool”,”normal”,”weak”,”yes”,
“rain”,”mild”,”normal”,”weak”,”yes”,
“sunny”,”mild”,”normal”,”strong”,”yes”,
“overcast”,”mild”,”high”,”strong”,”yes”,
“overcast”,”hot”,”normal”,”weak”,”yes”,
“rain”,”mild”,”high”,”strong”,”no”),
byrow = TRUE,
dimnames = list(day = c(),
condition =
c(“outlook”,”temperature”,
“humidity”,”wind”,”playtennis”)),
nrow=14, ncol=5);
#统计yes,no呈现的概率
prior.yes = sum(data[,5] ==
“yes”) / length(data[,5]);
prior.no = sum(data[,5] == “no”)
/ length(data[,5]);
#输入条件向量
###################################################
naive.bayes.prediction <- function(condition.vec)
{
###################################################
# Calculate unnormlized posterior probability
for playtennis = yes.
playtennis.yes <-
sum((data[,1] == condition.vec[1]) & (data[,5] ==
“yes”)) / sum(data[,5] ==
“yes”) * # P(outlook = f_1 |
playtennis = yes)
sum((data[,2] == condition.vec[2]) & (data[,5] ==
“yes”)) / sum(data[,5] ==
“yes”) * # P(temperature = f_2 |
playtennis = yes)
sum((data[,3] == condition.vec[3]) & (data[,5] ==
“yes”)) / sum(data[,5] ==
“yes”) * # P(humidity = f_3 |
playtennis = yes)
sum((data[,4] == condition.vec[4]) & (data[,5] ==
“yes”)) / sum(data[,5] ==
“yes”) * # P(wind = f_4 |
playtennis = yes)
prior.yes; # P(playtennis = yes)
# Calculate unnormlized posterior probability
for playtennis = no.
playtennis.no <-
sum((data[,1] == condition.vec[1]) & (data[,5] ==
“no”)) / sum(data[,5] ==
“no”) * # P(outlook = f_1 |
playtennis = no)
sum((data[,2] == condition.vec[2]) & (data[,5] ==
“no”)) / sum(data[,5] ==
“no”) * # P(temperature = f_2 |
playtennis = no)
sum((data[,3] == condition.vec[3]) & (data[,5] ==
“no”)) / sum(data[,5] ==
“no”) * # P(humidity = f_3 |
playtennis = no)
sum((data[,4] == condition.vec[4]) & (data[,5] ==
“no”)) / sum(data[,5] ==
“no”) * # P(wind = f_4 |
playtennis = no)
prior.no; # P(playtennis = no)
return(list(post.pr.yes =
playtennis.yes,
post.pr.no = playtennis.no,
prediction = ifelse(playtennis.yes >=
playtennis.no, “yes”,
“no”)));
}
naive.bayes.prediction(c(“rain”,”hot”,”high”,”strong”));
naive.bayes.prediction(c(“sunny”,”mild”,”normal”,”weak”));
naive.bayes.prediction(c(“overcast”,”mild”,”normal”,”weak”));
#p#分页标题#e#
执行后,输出功效:
> naive.bayes.prediction(c("rain","hot","high","strong")); $post.pr.yes [1] 0.005291005 $post.pr.no [1] 0.02742857 $prediction [1] "no" > naive.bayes.prediction(c("sunny","mild","normal","weak")); $post.pr.yes [1] 0.02821869 $post.pr.no [1] 0.006857143 $prediction [1] "yes" > naive.bayes.prediction(c("overcast","mild","normal","weak")); $post.pr.yes [1] 0.05643739 $post.pr.no [1] 0 $prediction [1] "yes"
本文转载自:http://blog.sina.com.cn/s/blog_61c4630901013qoj.html