# Extract data to R from YOU CANalytics data<-read.csv('http://ucanalytics.com/blogs/wp-content/uploads/2016/07/Regression-Analysis-Data.csv') #Remove missing data data_without_missing<-data[complete.cases(data),] # Tagging predictor and response variables numeric<-c("Dist_Taxi", "Dist_Market", "Dist_Hospital", "Carpet","Builtup", "Rainfall") categoric <- c("Parking", "City_Category") Target<-c("House_Price") # Functions to plot matrix correlation plot and display correlation coef. on bottom and top panels panel.cor <- function(x, y, digits=3, prefix="", cex.cor) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex <- 0.8/strwidth(txt) test <- cor.test(x,y) Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")) text(0.5, 0.5, txt, cex = 4*r^(0.3),col = "dodgerblue") text(.8, .9, Signif, cex= 4, col=2) } # Function for histogram in the diagonal panel panel.hist <- function(x, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5)) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col = "orange", ...) } # Matrix plot with outliers pairs(data_without_missing[,c(numeric,Target)],upper.panel = panel.cor,diag.panel=panel.hist,cex.labels = 2) # Matrix plot without outliers data_without_outliers<-data_without_missing[data_without_missing$House_Price<10^8,] pairs(data_without_outliers[,c(numeric,Target)],upper.panel = panel.cor,diag.panel=panel.hist,cex.labels = 2)