#============================================================================== # Exploring the Property Value Data #============================================================================== # This version combines various earlier scripts and is by Matt Holian 10/17/2018 # original scripts by Bill Sundstrom and Michael Kevane #============================================================================== # 1. Settings, packages, and options #============================================================================== # You should generally run all of the commands in this Section 1 # Clear the working space rm(list = ls()) # Set working directory (edit for YOUR folder) # For windows - setwd("C:/Users/mkevane/courses/econ42/data") # For Mac - setwd("/Users/mkevane/courses/econ42/data") setwd("/Users/mattholian/Desktop/R") # Load the packages even if you won't need all of them library(ggplot2) library(stargazer) library(sandwich) library(car) # turn off scientific notation except for big numbers options(scipen = 9) # function to calculate corrected SEs for regression cse <- function(reg) { rob = sqrt(diag(vcovHC(reg, type = "HC1"))) return(rob) } #============================================================================== # 2. Data section #============================================================================== # Read data from csv file puma8511 = read.csv("mergedPUMA8511.csv",header=TRUE,sep=",") # generate annual hours and hourly wage variables puma8511$hours = (puma8511$wkhp)*50 puma8511$hourlywage = puma8511$wagp / puma8511$hours # generate a varaible which is a recoded version of the SEX variable; requires the "car" package puma8511$female = recode(puma8511$sex,"1=0;2=1") # generate a varaible which is a recoded version of the SCHL variable puma8511$college = recode(puma8511$schl,"1:20=0;21:24=1") # create a new data frame which is a subset of individuals with wkhp>0 and hourlywage>0 to use in creating first summary statistics table in analysis section puma8511_wkhp = subset(puma8511,wkhp>0 & hourlywage>0) # create a new data frame same as above but females only puma8511_wkhp_female = subset(puma8511,wkhp>0 & hourlywage>0 & female==1) # create a new data frame, males only puma8511_wkhp_male = subset(puma8511,wkhp>0 & hourlywage>0 & female==0) #============================================================================== # 3. Analysis section #============================================================================== # Calculate summary statistics for the full sample; uses the WKHP subset created above stargazer(puma8511_wkhp[c("agep", "college","wkhp", "hours", "female", "hourlywage")], summary.stat=c("n", "mean", "sd", "min", "max" ),type="text", digits=2, title="PUMA 0608511, Summary Statistics, Full Sample") # Calculate summary statistics for females stargazer(puma8511_wkhp_female[c("agep", "college","wkhp", "hours", "female", "hourlywage")], summary.stat=c("n", "mean", "sd", "min", "max" ),type="text", digits=2, title="PUMA 0608511, Summary Statistics, Female Subsample") # Calculate summary statistics for males stargazer(puma8511_wkhp_male[c("agep", "college","wkhp", "hours", "female", "hourlywage")], summary.stat=c("n", "mean", "sd", "min", "max" ),type="text", digits=2, title="PUMA 0608511, Summary Statistics, Male Subsample") # difference in means test t.test(hourlywage~female, data=puma8511_wkhp, FUN=c(mean), na.rm=TRUE) # running a simple regression; reg1 = lm(hourlywage~female, data=puma8511_wkhp) summary(reg1) # Report regression results in a nice table, with corrected standard errors, using stargazer stargazer(reg1, se=list(cse(reg1)), title="Average Wage and Gender in California PUMA 8511", type="text", df=FALSE, digits=3)