#.............................. #Simple Linear Regression with R ---- # adapted from http://r-statistics.co/Linear-Regression.html #..Data ---- #We will use the cars dataset that comes with base R ?cars head(cars) plot(cars) #..Estimation ---- #Let's run a simple regression model (one predictor variable) linearMod <- lm(dist ~ speed, data=cars) # estimate linear regression model on full data #Let's get information on the prediction summary(linearMod) #get a summary of the estimation coef(linearMod) #get only the coefficients modelSummary <- summary(linearMod) # capture model summary as an object modelCoeffs <- modelSummary$coefficients # model coefficients beta.estimate <- modelCoeffs["speed", "Estimate"] # get beta estimate for speed std.error <- modelCoeffs["speed", "Std. Error"] # get std.error for speed t_value <- modelCoeffs["speed", "t value"] # get t statistic f_statistic <- linearMod$fstatistic[1] # fstatistic f <- summary(linearMod)$fstatistic # parameters for model p-value calc #..Accuracy of prediction ---- #We create a training and a test data set; we do an estimation and a prediction; we chekc prediction accuracy # Create Training and Test data set.seed(100) # setting seed to reproduce results of random sampling trainingRowIndex <- sample(1:nrow(cars), 0.8*nrow(cars)) # row indices for training data trainingData <- cars[trainingRowIndex, ] # model training data testData <- cars[-trainingRowIndex, ] # test data # Build the model on training data - lmMod <- lm(dist ~ speed, data=trainingData) # build the model distPred <- predict(lmMod, testData) # predict distance # check accuracy actuals_preds <- data.frame(cbind(actuals=testData$dist, predicteds=distPred)) # put together actual ad predicted values cor(actuals_preds) #.............................. # Multiple Linear Regression with R ---- # data from https://onlinecourses.science.psu.edu/stat501/node/284 # ..Data ---- iq.data=read.table("https://onlinecourses.science.psu.edu/stat501/sites/onlinecourses.science.psu.edu.stat501/files/data/iqsize.txt", header = T) pairs(iq.data,lower.panel = NULL,panel=panel.smooth) # ..Estimation ---- iq.model=lm(PIQ~Brain+Height+Weight,data=iq.data) summary(iq.model) iq.model2=lm(PIQ~Brain+Height,data=iq.data) summary(iq.model2) #.............................. # Advanced Regression Model with R ---- # see http://r-statistics.co/adv-regression-models.html # Robust, Logit, Probit, Multinomial, etc models