MATH 2565 W 2007 Section M Week 6 R Script for Distributions

From MathWiki

  #
  #  MATH 2565 Week 6
  #  Random Variables and distributions
  #  Chapter 4-3, 5
  #
  
  # Example of probability calculations with R
  x <- c( 1, 2, 3, 4)
  prob <- c( 4, 3, 2, 1) / 10   # card example
  prob
  cbind( x, prob)
  sum(prob)
  
  x*prob
  
  EX <- sum( x*prob)
  EX
  (x - EX)
  (x - EX)^2
  (x - EX)^2 * prob
  
   Mean <- function(x,p) {
       if(sum(p) != 1) stop("Error: probabilites must add to 1")
       sum(x*p)
   }


  VarX <- sum(   (x - EX)^2 * prob )
  VarX
  SDX <- sqrt(VarX)
  
  Var
  Var <- function( x, p ) sum( (x-Mean(x,p)) ^ 2 * p)
  
  SD <- function(x, p) sqrt( Var(x,p) )
  
  EXs <- function( x, p ){
       c(Mean = Mean(x,p), Var = Var(x,p), SD = sqrt(Var(x,p)) )
  }
  EXs ( x, prob)
  
  
  # Example using frequencies
  
  dd <- read.table( stdin(), header = T)
  X  Freq
  0  450
  1  300
  2  200
  3   50
  4   40
  5   10
  
   dd
   dd$p <- dd$Freq/ sum( dd$Freq )
   dd
   # cute:
   library(MASS)
   fractions(dd$p)
   EXs ( dd$X, dd$p )
   
   # plot of probability function
   
   plot( p ~ X, dd, type = 'h', ylim = c(0,1))


   #
   # Standardizing a variable
   #
   #


   Std <- function (x, p) (x - Mean(x,p))/SD(x,p)
   Xs <- Std( dd$X, dd$p)
   EXs(Xs, dd$p)
   cbind(dd, Xs)


   ##
   ##
   ## Covariance between two variables
   ##  X be score on card
   ##  Y be 0 if black 1 if red
   ##
   
   Cards <- read.table(stdin(), header = T)
   X Y p
   1 0 .2
   1 1 .2
   2 0 .1
   2 1 .2
   3 0 .1
   3 1 .1
   4 0 .1
   4 1 0
   
   Cards
   
   Cov
   cov


   Cov <- function( x, y, p) {
       sum( (x-Mean(x,p)) * (y-Mean(y,p)) * p)
   }
   
   Cov( Cards$X, Cards$Y, Cards$p)
   with( Cards, Cov(X, Y, p))   # could also use attach(Cards) ... but
   
   # what happens is I try:
   
   #
   #  Correlation: Standardized Covariance
   #
   
   Corr <- function( x, y, p) {
       Cov(x,y,p) / ( SD( x,p) * SD(y,p))
   }
   
   with(Cards, Corr(X,Y,p))
   #  Exercise:
   #   Try Cov( Std(y,p), Std(x,p), p).
   #   What do you think you will get
   #


   ###
   ###
   ###   Chapter 5
   ###
   ###
   ## Binomial distribution
   
   
   # Functions for distributions in R
   # some distribution names:  binom, pois, norm
   # function prefixes:
   #       p - cumulative probability: P( X <= x)
   #       q - quantile                          x such that P( X <= x) = p
   #       r - generate random numbers
   #       d - density or probability function   P( X = x)
   
   ?rbinom
   
   #
   # Probability of 0 Jacks in 5 tries
   #
   
   dbinom( 0, 5, 1/10)
   # can get prob for more than one x at a time
   
   dbinom( 0:5, 5, 1/10)
   dbinom( 0:10, 5, 1/10)
   # Note:
   p <- dbinom( 0:5, 5, 1/10)
   p
   sum(p)
   Mean( 0:5, p)
   Var( 0:5, p)
   5 * (1/10) * (9/10)
   
   # Probability of exactly 3 Js
   
   dbinom(3, 5, 1/10)
   
   # Probability of 3 or less (cumulative probability function)
   
   pbinom(3, 5, 1/10)
   
   pbinom(3, 5, 1/10) - pbinom( 2, 5, 1/10)  # what should this be??
   # What if you don't have cards?
   
   rbinom( 1, 5, 1/10)
   # Do this lots of times
   
   sam <- rbinom( 1000, 5, 1/10)
   table(sam)    # frequeny
   
   table(sam) / length(sam)    # relative frequency
   dbinom( 0:5, 5, 1/10)    # probabilities: what's the connection
   
   sam <- rbinom( 1000000, 5 , 1/10)
   
   table(sam)
   table(sam) / length( sam )
   
   table(sam) / length( sam ) - dbinom(0:5, 5, 1/10)  # diff between rel.freq and prob.
   
   
   
   ##
   ##
   ##  The Poisson distribution
   ##
   ##
   
   # Murder per week? Suppose E(X) = 0.9
   
   p <- dpois(0:10, 0.9)
   dbinom( 0:10, 1000, 0.9/1000)
   plot( 0:10, p,type = 'h')
   
   plot( 0:10, dpois( 0:10, 1.5), type='h')
   
   plot( 0:10, dpois( 0:10, 5.0), type='h')


   #
   #  Example:
   #  If   number of students who need advising has mean of 3.5 / hour
   #  What is prop that more than 15 will show up in a 3 hour shift
   
   #  X in 3=hour shift ~ Poisson( 3 * 3.5 )
   
   p.15orless <- ppois( 15, 3* 3.5)
    1 -  p.15orless   # p of more than 15
    
    
   # What is probability that between 10 to 12 inclusive will show up
   
   dpois(10, 3*3.5) +  dpois(11, 3*3.5) + dpois(12, 3*3.5)
   
   dpois( c(10,11,12), 3*3.5)
   
   sum( dpois( c(10,11,12), 3*3.5) )
   
   ppois( 12, 3*3.5) - ppois( 9, 3*3.5)
   
   
   ###
   ###
   ###  Normal distribution (continuous)
   ###
   ###
   
   
   dnorm(0 , 0 , 1)     # not prob of a 0 but approx prob of 0 +/- 0.5, i.e. a density
   
   
   # Standard Normal
   
   x <- seq(-4,4,.01)
   
   plot( x, dnorm(x, 0, 1), type = 'l')
   pnorm( 1, 0, 1) - pnorm( -1, 0, 1)
   abline(v = c(-1,1))
   text( 0, .2,  round( pnorm( 1, 0, 1) - pnorm( -1, 0, 1), 4))
   
   # IQ
   
   x <- seq( 40, 160, .1)
   plot( x, dnorm(x, 100, 15), type = 'l')