Pcr&pls

5 minute read

Performing Principal Components Regression (PCR) and Partial Least Squares Regression (PLS) in R

For UBER’s dataset

library(tidyverse)
library(caret)
library(pls)

Principal Components Regression

set.seed(45)
pub1 <- train(
    price~., data = trainub, method = "pcr",
    scale = F, trControl = trainControl("cv", number = 10),
    tuneLength = 10)

Plot model RMSE vs different values of components

plot(pub1, main= 'Principal component regression: Uber')

Here’s the PCR plot for Uber:

alt

Print the best tuning parameter ncomp that minimize the cross-validation error, RMSE

pub1$bestTune
summary(pub1$finalModel)

set.seed(45)
pcr_pub2 <- train(
    price~., data = trainub, method = "pcr",
    scale = F, trControl = trainControl("cv", number = 10),
    tuneLength = 5)

Make predictions for the PCR

pcr_ubpred <- pcr_pub2 %>% predict(testub)
# Model performance metrics
data.frame(
    RMSE = caret::RMSE(pcr_ubpred, testub$price),
    Rsquare = caret::R2(pcr_ubpred, testub$price))

Partial Least Squares Regression

set.seed(45)
plsub2 <- train(
    price~., data = trainub, method = "pls",
    scale = F,
    trControl = trainControl("cv", number = 10),
    tuneLength = 10)

Plot model RMSE vs different values of components

plot(plsub2, main= 'Partial least squares: Uber')

Here’s the PLS plot for Uber:

alt

Print the best tuning parameter ncomp that minimize the cross-validation error, RMSE

plsub2$bestTune
summary(plsub2$finalModel)

plsly_sel <- train(
    price~., data = trainub, method = "pls",
    scale = F,
    trControl = trainControl("cv", number = 10),
    tuneLength = 6)

Make predictions for the PLS

predpls2 <- plsly_sel %>% predict(testub)

Model performance metrics

data.frame(
    RMSE = caret::RMSE(predpls2, testub$price),
    Rsquare = caret::R2(predpls2, testub$price)
)

For LYFT’s dataset

Principal Components Regression

set.seed(45)
pcrly <- train(
    price~., data = trainly, method = "pcr",
    scale = F, trControl = trainControl("cv", number = 10),
    tuneLength = 10)

Plot model RMSE vs different values of components

plot(pcrly, main= 'Principal component regression: Lyft')

Here’s the PCR plot for Lyft:

alt

Print the best tuning parameter ncomp that minimize the cross-validation error, RMSE

pcrly$bestTune
summary(pcrly$finalModel)

set.seed(45)
pcrly_sel <- train(
    price~., data = trainly, method = "pcr",
    scale = T, trControl = trainControl("cv", number = 4),
    tuneLength = 4)

Make predictions the PCR

pcr_predly <- pcrly_sel %>% predict(testly)

Model performance metrics

data.frame(
    RMSE = caret::RMSE(pcr_predly, testly$price),
    Rsquare = caret::R2(pcr_predly, testly$price))

Partial Least Squares

set.seed(45)
plsly <- train(
    price~., data = trainly, method = "pls",
    scale = F,
    trControl = trainControl("cv", number = 10),
    tuneLength = 10)

Plot model RMSE vs different values of components

plot(plsly, main= 'Partial least squares: Lyft')

Here’s the PLS plot for Lyft:

alt

Print the best tuning parameter ncomp that minimize the cross-validation error, RMSE

plsly$bestTune
summary(plsly$finalModel)

Make predictions for the PLS

set.seed(45)
plsly_sel <- train(
    price~., data = trainly, method = "pls",
    scale = F, trControl = trainControl("cv", number = 10),
    tuneLength = 7)
predplsly <- plsly_sel %>% predict(testly)

Model performance metrics

data.frame(
    RMSE = caret::RMSE(predplsly, testly$price),
    Rsquare = caret::R2(predplsly, testly$price)
)

par(mfrow=c(1,2))
plot(plsub2, main= 'Partial least squares: Uber')
plot(plsly, main= 'Partial least squares: Lyft')

Make predictions for the PLS

pcr.pred = predict(pub1, testub, ncomp=4)
plot(testub$price, pcr.pred, main="Uber PCR prediction", col='blue', 
     xlab="observed", ylab="PCR Predicted")
abline(1, 1, col="red")

alt

pls.pred = predict(plsly, testly, ncomp=7)
plot(testly$price, pls.pred, main="Lyft PLS prediction", col='blue', 
     xlab="observed", ylab="PLS Predicted")
abline(1, 1, col="red")

alt

This is the end of part 2.

Twitter Facebook LinkedIn