# R programming howto

# R programming notes and how to do.

## Split data frame by month

s <- split(airquality, airquality$Month)

# will output a list of data.frame. One data.frame per month.

## Applying a mean function to data splitted by month

Take s from section above

sapply(s, function(x) colMeans(x[, c("Ozone", "Solar.R", "Wind")]))

This is the same as using tapply - and thus a good way to understand how tapply works.

For example with the iris data supplied by R :

library(datasets) data(iris) tapply(iris$Sepal.Length, iris$Species, mean)

It will first split in a list the rows by species, then calculate the mean on each group.

## Split data frame by multiple factors : Month and Wind

s <- split(airquality, list(airquality$Month, airquality$Wind), drop = TRUE)

Ok this example above is not useful for this dataset, but for another one it could !

You can then apply a mean function to all those groups of factors :

sapply(s, function(x) colMeans(x[, c("Ozone", "Solar.R", "Wind")]))

## Compute the mean of specific column in the data frame

apply(iris[, 1:4], 2, mean)

## Create vectors

using c() :

myvector <- c("a", "b")

## Create matrixes

> m <- matrix(1:4, nrow = 2, ncol = 2)

> m

[,1] [,2] [1,] 1 3 [2,] 2 4

> dimnames(m) <- list(c("a","b"), c("y","z"))

> m y z a 1 3 b 2 4

## Remove na values from vector x

> bad <- is.na(x) > x[!bad]

## Pick a few random data from a list

picks <- sample(list, amount-to-sample)

## Load CSV data

data <- read.csv("input.csv")

## dput & dget

### Save variables / loaded data to file

dput(data, "SaveFile.R")

### Load saved data that was dput'ed

data <- dget("SaveFile.R")

## dump & source

They can dump & read multiple variables to file

assuming x and y are set

dump (c("x","y"), file = "data.R")

then load back with :

source("data.R")

## Miscellaneous functions on data frames

```
```*variable inspections*
summary(f ) -- shortcut to print various info about data frame
str(f) -- also print internal structure of a variable.

`nrow(f) -- amount of rows in data frame`

head(f, n) -- print n rows from f

`tail(f, n) -- print n rows from f`

** conditional selection**

`newf <- subset(f, RowName > 5) -- `

Useful function : column mean

columnmean <- function(f)

{ nc <- ncol(f)

means <- numeric(nc)

for (i in 1:nc)

{

means[i] <- mean(f[,i], na.rm = TRUE)

}

means

}

## File operations

list.files("diet_data")

## Recent Comments