# Data preparation data<-read.csv('http://ucanalytics.com/blogs/wp-content/uploads/2016/07/Regression-Analysis-Data.csv') data_without_missing<-data[complete.cases(data),] data_without_outliers<-data_without_missing[data_without_missing$House_Price<10^8,] # Box plot and pair wise t-test boxplot(data_without_outliers$House_Price/10^6~data_without_outliers$City_Category, xlab= "City Category",ylab="House Price (in Millions)",ylim=c(1,13), main="House Price by City Category",col=c("Orange","cornflowerblue","grey")) text(1, 13, paste("P(A = B) = ", round(pairwise.t.test(data_without_outliers$House_Price,data_without_outliers$City_Category)$p.value[1],2)),col = "red") text(2, 13, paste("P(B = C) = ", round(pairwise.t.test(data_without_outliers$House_Price,data_without_outliers$City_Category)$p.value[2],2)),col = "red") text(3, 13, paste("P(A = C) = ", round(pairwise.t.test(data_without_outliers$House_Price,data_without_outliers$City_Category)$p.value[4],2)),col = "red")