set.seed(3) n <- 30 posi <- 2 x <- c(rnorm(n,-posi), rnorm(n,posi)) y <- c(rnorm(n,-posi), rnorm(n,posi)) z <- as.factor(rep(c(-1,1), each = n)) plot(y,x,col = z)
set.seed(3.1) n <- 30 posi <- 2 x <- c(rnorm(n,-posi), rnorm(n,posi)) y <- c(rnorm(n,-posi), rnorm(n,posi)) z <- c(rnorm(n,-posi), rnorm(n,posi)) fact <- as.factor(rep(c(-1,1), each = n))
set.seed(3.14) par(mfrow = c(2,2)) n <- 40 for(i in c(3,2,1,0)){ x <- c(rnorm(n,-i), rnorm(n,i)) y <- c(rnorm(n,-i), rnorm(n,i)) z <- as.factor(rep(c(-1,1), each = n)) plot(x,y, col = z) }
1
par(mfrow = c(1,1))
上图所示, 为四组点列逐渐靠拢的情况. 在两类点有交叉时, 让我们看一下回判率如何.
1 2 3 4 5 6 7 8 9 10 11 12 13
set.seed(3.14) n <- 40 ksvm.precision <- numeric(4) for(i in c(3,2,1,0)){ x <- c(rnorm(n,-i), rnorm(n,i)) y <- c(rnorm(n,-i), rnorm(n,i)) z <- as.factor(rep(c(-1,1), each = n)) data.close <- data.frame(x,y,z) ksvm.model <- ksvm(z ~ ., data = data.close) ksvm.prediction <- predict(ksvm.model, data.close) ksvm.precision[4 - i] <- sum(ksvm.prediction == data.close$z)/nrow(data.close) plot(ksvm.model, data = data.close) }
1
ksvm.precision
1
## [1] 1.00000.98750.92500.5750
可以看到, 无论是从数值上, 还是图形上, 随着两类点的不断靠近, ksvm模型的回判率逐渐下降.
不同的kernel对模型的影响
在ksvm函数中可以使用的kernel有很多, 最简单的线性核为vanilladot
1 2 3 4 5 6 7 8
set.seed(3.141) n <- 20 posi <- 2 x <- c(rnorm(n,-posi), rnorm(n,posi)) y <- c(rnorm(n,-posi), rnorm(n,posi)) z <- as.factor(rep(c(-1,1), each = n)) data.kernel <- data.frame(x,y,z) ksvm.kernel <- ksvm(z~x+y, data = data.kernel, kernel = "vanilladot")
set.seed(3.1415) n <- 15 posi <- 1.5 x <- c(rnorm(n,-posi), rnorm(n,posi)) y <- c(rnorm(n,-posi), rnorm(n,posi)) z <- as.factor(rep(c(-1,1), each = n)) data.c <- data.frame(x,y,z) plot(y,x, col = z) text(0,-0.3,"错误点")