# MATH 2565 W 2007 Section M Week 2 R Script for random sampling

Home: MATH 2565 W 2007 Section M#Week 2:_January_10
Output: MATH 2565 W 2007 Section M Week 2 R Script for random sampling: output
```   #
#
#  MATH 2565 Week 2
#  Using R for sampling
#  Chapter 1-Sampling.R
#

#  Sampling frame: List of member in population
#  We'll suppose the population is listed in an Excel file that
#     we've saved as a .csv file.

#  Make up a sample data frame of 550 names with 2 genders from 20 neighbourhoods

population <- data.frame(
id = 1 : 550,
init = paste( sample(LETTERS,550,replace=T) ,
sample(LETTERS, 550, replace = T), sep = ),
sex = sample( c("M","F"), 550, replace = T),
nbhd = sample( letters[1:20], 550, replace = T)
)

head(population)   # print out first 6

table( population\$sex, population\$nbhd)

##
## Simple random sample of 30 with replacement
##

?sample

sample.rows.rep <- sample( 1:550, 30, replace = T)

table( sample.rows.rep)  # what is the probability of no repeats?

sample.data.rep <- population[ sample.rows.rep , ]

sample.data.rep

write.csv( sample.data.rep, file = "c:/sampleworksheetrep.csv")

##
## Simple random sample of 30 without replacement (guaranteed no repeats)
##

sample.rows <- sample( 1:550, 30 )   # replace = FALSE by default

sample.data <- population[ sample.rows , ]

sample.data

table( sample.data\$sex)

write.csv( sample.data, file = "c:/sampleworksheet.csv")

##
## Stratified sample: stratified by sex: 15 from each sex
## -- Example of selecting from a vector
## -- Only works with few levels for stratification variable
##

rows.male <- ( 1:nrow(population) )   [ population\$sex == "M" ]
rows.female <- ( 1:nrow(population) ) [ population\$sex == "F" ]

sample.rows.male <- sample(rows.male, 15)
sample.rows.female <- sample(rows.female, 15)

sample.rows <- c(sample.rows.male, sample.rows.female)

sample.data.strat <- population[ sample.rows, ]

sample.data.strat

##
## Stratified sample: easier but fancier way that uses 'split' and 'lapply'
## -- Can be used for a stratification variable with many levels
## -- Illustrates use of lists, 'split()' and 'lapply()' ... powerful programming tools
##

split( ( 1:nrow(population) ) , population\$sex)

lapply(  split( ( 1:nrow(population) ) , population\$sex)  , sample, size = 15)

# This 'applies' the 'sample()' function to each element of the list and
# adds the 'size = 15' argument

sample.rows <-  lapply(  split( ( 1:nrow(population) ) , population\$sex)  ,
sample, size = 15)

sample.rows # Note: a list with two vectors

sample.rows <- unlist(sample.rows)

sample.rows    # Note: a single vector

sample.data.strat <- population[ sample.rows , ]

###
###  Cluster sample: sample neighborhouds and then 1 from each neighbourhood
###

unique( population\$nbhd)

###