MATH 2565 W 2007 Section M Week 2 R Script for random sampling

From MathWiki

Home: MATH 2565 W 2007 Section M#Week 2:_January_10
Output: MATH 2565 W 2007 Section M Week 2 R Script for random sampling: output
   #
   #
   #  MATH 2565 Week 2
   #  Using R for sampling
   #  Chapter 1-Sampling.R
   #
   
   #  Sampling frame: List of member in population
   #  We'll suppose the population is listed in an Excel file that
   #     we've saved as a .csv file.
   
   
   #  Make up a sample data frame of 550 names with 2 genders from 20 neighbourhoods
   
   
   
       population <- data.frame(
                       id = 1 : 550,
                       init = paste( sample(LETTERS,550,replace=T) ,
                                 sample(LETTERS, 550, replace = T), sep = ),
                       sex = sample( c("M","F"), 550, replace = T),
                       nbhd = sample( letters[1:20], 550, replace = T)
                       )
   
       head(population)   # print out first 6
   
       table( population$sex, population$nbhd)
   
   ##
   ## Simple random sample of 30 with replacement
   ##
   
       ?sample
   
       sample.rows.rep <- sample( 1:550, 30, replace = T)
   
       table( sample.rows.rep)  # what is the probability of no repeats?
   
       sample.data.rep <- population[ sample.rows.rep , ]
   
       sample.data.rep
   
       write.csv( sample.data.rep, file = "c:/sampleworksheetrep.csv")
   
   
   ##
   ## Simple random sample of 30 without replacement (guaranteed no repeats)
   ##
   
       sample.rows <- sample( 1:550, 30 )   # replace = FALSE by default
   
       sample.data <- population[ sample.rows , ]
   
       sample.data
   
       table( sample.data$sex)
   
       write.csv( sample.data, file = "c:/sampleworksheet.csv")
   
   ##
   ## Stratified sample: stratified by sex: 15 from each sex
   ## -- Example of selecting from a vector
   ## -- Only works with few levels for stratification variable
   ##
   
       rows.male <- ( 1:nrow(population) )   [ population$sex == "M" ]
       rows.female <- ( 1:nrow(population) ) [ population$sex == "F" ]
   
       sample.rows.male <- sample(rows.male, 15)
       sample.rows.female <- sample(rows.female, 15)
   
       sample.rows <- c(sample.rows.male, sample.rows.female)
   
       sample.data.strat <- population[ sample.rows, ]
       
       sample.data.strat
       
       
   ##
   ## Stratified sample: easier but fancier way that uses 'split' and 'lapply'
   ## -- Can be used for a stratification variable with many levels
   ## -- Illustrates use of lists, 'split()' and 'lapply()' ... powerful programming tools
   ##
   
             split( ( 1:nrow(population) ) , population$sex)
       
    lapply(  split( ( 1:nrow(population) ) , population$sex)  , sample, size = 15)
   
        # This 'applies' the 'sample()' function to each element of the list and
        # adds the 'size = 15' argument
   
        sample.rows <-  lapply(  split( ( 1:nrow(population) ) , population$sex)  ,
                                       sample, size = 15)
   
        sample.rows # Note: a list with two vectors
   
        sample.rows <- unlist(sample.rows)
   
        sample.rows    # Note: a single vector
   
        sample.data.strat <- population[ sample.rows , ]
   
   
   ###
   ###  Cluster sample: sample neighborhouds and then 1 from each neighbourhood
   ###
   
        unique( population$nbhd)
    
    
   ###
   ###  Advanced Exercise:
   ###  Work out the details and an example for cluster sampling
   ###