SCS: Seminars 2008-09: Propensity scores R script

From MathWiki

#
#  3D illustration of propensity score
#

# 1. show conditional vs marginal effect in nice linear case
# 2. show quad in Z and X simusoidal in Z
#     show conditional
#          incorrect linear
#          correct model
#          correct pscore estimate


# source("/R/fun.R")
# source("/R/Plot3d.R")

install.packages('rgl')

source("http://www.math.yorku.ca/~georges/R/fun.R")
source("http://www.math.yorku.ca/~georges/R/Plot3d.R")

#
# Linear example:
# 6 groups with different values of Z:
#    - each group has a range of values of X around the value of Z
#    - same conditional effect of X (-1)
# This makes it easy to define a clear conditional effect of X
# We can then consider which models in X and Z recover that
# conditional effect and which do not.
#

        dd <- expand.grid( Z = 1:6, del = seq(-1,1,1/2))
        dd$X <- dd$Z + dd$del
        dd$Y <- 2*dd$Z - dd$X + .1*rnorm( length( dd$X))

        Init3d(family='serif',cex=1.2,font=2)
        Plot3d( Y ~ Z + X | factor(Z), dd)

        Fit3d( lm( Y ~ X,dd) )    # marginal model

        cond <- lm( Y ~ X + factor(Z), dd)
        cond

        pred.cond <- expand.grid( Z = 1:6, del = seq( -1.5, 1.5, 1))
        pred.cond$X <- with( pred.cond, del + Z )
        pred.cond$Y.cond <- predict( cond, pred.cond)

        for ( z in unique( pred.cond$Z) ) {
              dc <- pred.cond[ pred.cond$Z == z,]
              Lines3d( z = dc$X, x = z, y = dc$Y.cond, col = 'red', lwd = 3)
        }

# we can get conditional eff of X with a linear model:

        fit.lin <- lm( Y ~ X + Z, dd)
        fit.lin
        Fit3d( fit.lin, col = 'red')

# so additive model controls for Z same as 'within level of Z' model

# What happens if effect of Z is non-linear

        dd$Y2 <- 1*dd$Z + 10* sin(2*pi*(dd$Z-1)/5) - dd$X + .1*rnorm( length( dd$X))

        Init3d(family='serif', cex = 1.2, font = 2)
        Plot3d( Y2 ~ Z + X | factor(Z), dd)
        Axes3d()

        Fit3d( lm( Y2 ~ Z + X, dd))

# Try something else:

        dd   <- expand.grid( Z = 1:6, del = seq(-1,1,1/2))

        dd$X <- dd$Z^4/6^3 + dd$del     # non-linear relationshiop betwen X and Z

        dd$Y2 <- 1*dd$Z + 10* sin(2*pi*(dd$Z-1)/5) - dd$X + .1*rnorm( length( dd$X))

        Plot3d( Y2 ~ Z + X | factor(Z), dd)
        
        fit.lin <- lm ( Y2 ~ X + Z, dd)
        Fit3d( fit.lin )

        fitY <- lm( Y2 ~ X + Z + sin(2*pi*(Z-1)/5), dd)       # non-linear function of Z
        Fit3d( fitY, col = 'red')

        fitP <- lm( Y2 ~ X + Z + I(Z^4), dd)
        Fit3d( fitP, col = 'gray')

        library(car)
        ?avp
        avp( fit.lin, 'X')
        avp( fitY, 'X')
        avp( fitP, 'X')

##
## Example
##

data(Prestige)
head(Prestige)

ds <- Prestige
(fit.mr <- lm( income ~ women + prestige + education, ds))
ds$x.avp <- resid( lm ( women ~ prestige + education , ds))
ds$y.avp.mr <- resid( lm( income ~ prestige + education, ds))


ds$women.hat <- predict( lm( women ~ prestige + education, ds))
ds$women.res <- ds$women - ds$women.hat




(fit.dev <- lm( income ~ women.res, ds))
ds$y.avp.dev <- resid( lm( income ~ 1, ds))

(fit.prop <- lm( income ~ women + women.hat, ds))
ds$y.avp.prop <- resid( lm( income ~ women.hat, ds))

(fit.some <- lm( income ~ women + women.hat + education, ds))
ds$y.avp.some <- resid( lm( income ~ women.hat + education, ds))

plot( y.avp.dev ~ x.avp, ds)
lines( with( ds, dell( x.avp, y.avp.dev)))
lines( abline( lm( y.avp.dev ~ x.avp, ds)))

points( y.avp.mr ~ x.avp, ds,col ='red')
lines( with( ds, dell( x.avp, y.avp.mr)),col ='red')
lines( abline( lm( y.avp.mr ~ x.avp, ds)),col ='red')

points( y.avp.prop ~ x.avp, ds,col ='green')
lines( with( ds, dell( x.avp, y.avp.prop)),col ='green')
lines( abline( lm( y.avp.prop ~ x.avp, ds)),col ='green')

points( y.avp.some ~ x.avp, ds,col ='blue')
lines( with( ds, dell( x.avp, y.avp.some)),col ='blue')
lines( abline( lm( y.avp.some ~ x.avp, ds)),col ='blue')