##------------------------------------------------------------------------## ## Script for Bio271: Introduction to R ## ## Maoying Wu ## ## Fall 2010 ## ##------------------------------------------------------------------------## ### 1. Download R from ## http://cbb.sjtu.edu.cn/~mywu/bi217/R-2.11.1-win32.exe ### 2. Install R on your computer ### 3. Start R GUI Environment # To change working directories: # on Windows, go to File menu, Change Directory # on UNIX (obvious), just call R when you're in the directory you want # or use the command setwd("/where/you/want/to/go") ### 4. Help topic # To load the help files for the solve command, use either ?solve # To search for all help files that discuss solve, use help.search("solve") # At the bottom of the help file for solve, there are some examples. # To execute these examples for the solve command, just use: example(solve) ### 5. Assignments # R has two types of elementary commands. # 1. We can just compute a value and display the value, for instance, 1/3 # 2. We can store the value of a computation in a variable, for instance, z <- 1/3 # In case 2 above, the value of the computation won't be displayed. # If we want to see what value the computation had, z # Assignment operators work to the left or to the right. # So we could type x <- 1/2 # or we could type 1/2 -> x # or we could type assign("x", 37) # With any of these three ways, displaying the value of x gives the same thing. x ### 6. Variables # To see a list of the currently-defined variables we can type objects() # If we define one more variable (which was not previously defined)... y <- 37 # ... now it will show up in our list of objects objects() # So objects() shows us a list of all variables in the namespace, # i.e., everything we're working with # The command ls() does the same thing as objects(). ls() # The command rm() allows us to remove objects, e.g., rm(y) # 7. VECTORS # The following will concatenate values into a vector, # and store the result into x. x <- c(10.4, 5.6, 3.1, 6.4, 21.7) # We can display the vector x, which has length 5. x # We can also concatenate vectors, as follows, y <- c(x,0,x) # so that y is a new vector of length 11. # In fact, y contains the entries of x in positions 1 through 5, # and then 0 in the sixth entry, # and then the entries of x in positions 7 through 11. # Let's display y just to make sure. y # recycling rule # So at this point x is a vector of length 5 # and y is a vector of length 11. ### 8. Additive Rule for vectors # Let's display x and y and then add a linear combination of the vectors. # Let's also add a 1 to each entry of the result. We do this with v <- 3*x + y + 1 # It seems a little funny to add vectors of different lengths. # Here, 3*x is a vector of length 5, # y is a vector of length 11, # and 1 is a vector of length 1. # The "recycling rule" is used to perform the addition. # Recycling means, whenever a vector isn't long enough, it is repeated # as many times as necessary. So x is repeated 2.2 times, # and 1 is repeated 11 times. This gives us v <- 3*x + y + 1 # R gives a warning if the longest vector's length # is not an integer multiple of each shorter vector length. # We can verify that # v[1] equals 3*x[1] + y[1] + 1 # v[2] equals 3*x[2] + y[2] + 1 # v[3] equals 3*x[3] + y[3] + 1 # v[4] equals 3*x[4] + y[4] + 1 # v[5] equals 3*x[5] + y[5] + 1 # v[6] equals 3*x[1] + y[6] + 1 # v[7] equals 3*x[2] + y[7] + 1 # v[8] equals 3*x[3] + y[8] + 1 # v[9] equals 3*x[4] + y[9] + 1 # v[10] equals 3*x[5] + y[10] + 1 # v[11] equals 3*x[1] + y[11] + 1 # elements of x are repeated until x is long enough # y is actually added to x and then x again ### 9. Concatenate two or more vectors # Let's do another example. Let's make y a vector of length 10, # and z a vector of length 2. y <- c(x,x) y z <- c(1,2) z # Then we'll add the two vectors. y + z # We see z is recycled, just appended to itself a total of 5 times in a row # and then is added to y. We get a vector of length 10 # We can verify that # The 1st component of y + z equals y[1] + z[1] # The 2nd component of y + z equals y[2] + z[2] # The 3rd component of y + z equals y[3] + z[1] # The 4th component of y + z equals y[4] + z[2] # The 5th component of y + z equals y[5] + z[1] # The 6th component of y + z equals y[6] + z[2] # The 7th component of y + z equals y[7] + z[1] # The 8th component of y + z equals y[8] + z[2] # The 9th component of y + z equals y[9] + z[1] # The 10th component of y + z equals y[10] + z[2] ### 10. Mathematical operations and functions # Lots of the usual mathematical symbols make sense in R. For instance, # +, -, *, /, ^, %%, %/% y - z 37 * 1000 10/87 2^3 # many common arithmetic functions sin(3.14) sin(3.1415) pi sin(pi) # don't get exactly 0. We're doing stuff numerically here. # The help pages for related functions are given on the same help page. # For instance, ?sin # also displays the help for cos, tan, acos, etc. ?cos # give the same result! # Similarly, ?log # and ?exp # give the same result. # Other functions include abs, acos, acosh, asin, asinh, atan, atanh, # ceiling, cos, cosh, cummax, cummin, cumprod, cumsum, digamma, # exp, expml [see exp], floor, gamma, lgamma, log, loglp [see log], log10, # round, sign, signif, sin, sinh, sqrt, tan, tanh, trigamma, trunc #### 12. Summary of the vectors # For a vector x, x # we can also compute functions of a vector too, for example # The length() command is one of the more frequent commands we use length(x) max(x) min(x) range(x) prod(x) sum(x) mean(x) var(x) sort(x) # order is used to return the permutation needed to put x in order, e.g., x <- c(1,1,3,2,1,1,2,3,4,3) order(x) # sort.list(x) does the same thing sort.list(x) # order(x,y) puts x in order but uses y to break ties among the elements of x # order(x,y,z) is similar. Here x,y,z should be vectors of the same length. # pmax and pmin return vectors, with each component being the largest or # smallest components of the corresponding vectors. For example x <- c(1,2,3,5,7) y <- c(37,22,3,9,2) z <- c(6,-3,5,1,6) # Then the ith component of pmax(x,y,z) is equal to the largest of the # ith components of x, y, and z pmax(x,y,z) # The following gives NaN and a warning sqrt(-17) # but the following gives the square root as a complex number sqrt(-17+0i) ### 13. SEQUENCES # The following is the same as c(1,2,3,4,5,.......), but easier to type 1:30 # We first give list c(1,2,3,......), then multiply by 2 2*1:15 # Notice the order of precedence here, and the result. 1:10-1 # backwards sequence 30:1 # We demonstrate some variations on the seq command: seq(2,10) # Note that the seq command has 5 arguments ?seq # Try the following seq(1,41,by=2) seq(1,41,length.out=5) y <- 1:5 # so y has length 5 # Here we only write the name of the last argument, since it is out of order: seq(1,41,along.with=y) # We could just have written the names of all the arguments: seq(from=1,to=41,along.with=y) # We can even use a fractional amount of space in between the numbers. seq(1,41,by=2.5) # We can use any phrase which uniquely identifies a parameter from the start, # for instance, len, length, etc., can be used for length.out seq(1,41,len=5) seq(1,41,length=5) seq(1,41,length.out=5) # I emphasize how useful it is to read the manual (help) pages: ?seq seq(1,41,5) seq(1,41,by=5) seq(-5,5,by=.2) seq(length=51,from=-5,by=.2) # The manual has an ERROR. It says "The fifth parameter may be # named along=vector, which if used must be the only parameter, # and creates a sequence 1, 2, ..., length(vector ), or the # empty sequence if the vector is empty (as it can be)." # CORRECTION to the manual: It is perfectly OK to use along=vector # with some other parameters. For instance, consider: y <- 1:5 seq(1,41,along.with=y) # this gives the same result as the following two commands: seq(1,41,by=10) seq(1,41,leng=5) #### 15. repeat # OK, on to the rep() command. First, let's read the manual for it. ?rep x <- c(10,2,7,62,-5) # The rep() command repeats an entire vector. rep(x,times=5) # We can repeat each element a different number of times, by assigning # a vector to the times parameter. Then we have # x[1] repeated times[1] times # x[2] repeated times[2] times # etc. Let's try it. y <- c(3,1,5,3,15) # We should get 3 copies of x[1], 1 copy of x[2], 5 copies of x[3], etc. rep(x,y) # Serkan asked about what happens if the times vector is not long enough. # Does it get recycled? We experimented. # It turns out that the times vector has to be exactly the same length # as the vector we are operating on. The y vector is not long enough here: y <- c(2,6,7) rep(x,y) # If the times vector is too long, we also get an error: y <- c(2,6,7,22,16,3,-7) rep(x,y) # So the length of the times vector has to be just the same length # as the length of the vector we are operating on. We learn by trying! # Here's another example that works correctly: y <- c(2,6,7,22,16) rep(x,y) # The moral of story: the times vector must be the same length as x. # Here's one more example, with the times vector defined inside the rep(): x <- 1:10 rep(x, times=c(2,2,2,2,1,1,1,3,3,3)) # We can also repeat each element of a vector rep(x,each=5) # This next example uses "recycling rule". The vector x, # which has length 10, is repeated 2.3 times, so that the result has length 23 rep(x,length=23) # Now let's run the examples from the rep() command help file. example(rep) ### 16. LOGICAL VECTORS x <- c(1,5,3,7,9,2,6,2) # If we perform a logical statement, like x > 4, then R will grab the # elements of x that are bigger than 4, and output a TRUE for each element # The output for the other elements of x will be FALSE. x > 4 # We can store the resulting vector of Boolean values into a vector. myboolvalues <- x > 4 myboolvalues # Let's check to see that we know Boolean addition. First the "and": TRUE & TRUE TRUE & FALSE # Then the "or": TRUE | FALSE FALSE | FALSE # We also have a negation operator: a <- TRUE !a # We can make Boolean expressions with variables b <- FALSE a & b # HELPFUL HINT: In arithmetic, TRUE evaluates to 1 and FALSE evaluates to 0 # There are 4 TRUE's here, so we can a value of 4. TRUE + TRUE + FALSE + TRUE + FALSE + TRUE # Another possible value besides TRUE and FALSE is NA, not available. # Various functions are available to help us process NA's. # For example, consider: x <- c(1, 5, 3, 7, 9, 2, 6, 2, 6, NA, 37, -12, NA, 102) is.na(x) # We often use this it this way, to grab the elements that are not NA's: !is.na(x) # Someone (Brian, I think) wanted to know if, in the expression is.na, # is the period "." an operator? In other words, does the period have # some special meaning? # Unfortunately, no. The period is just part of the name of the function. # The period isn't doing anything. For instance, the period doesn't # do something special to stuff afterward.... it's just part of the name. # The following would just give an error: is.hello(x) # The "dot" is just part of the name. Oh well.... # Let's recall the value of x x # Other operators are available, like > < <= >= == != ... # We can compares in x to the values in another vector. x x == c(1,10,3,7,18,222,6,0,0,NA,3,-12,38,103) # R can't tell if NA's match, i.e., if they are equal. R just doesn't know. # If we try to compare to a shorter vector, R uses recycyling. x == c(1,5) # This is especailly important in the following example. When the c(1,5,2) # vector is recycyled as c(1,5,2,1,5,2,1,5,2,1,5,2,1,5), the 6th element # is "2", which will match the 6th value of x. This might be unexpected. x x == c(1,5,2) # So recycling is happening in this example with the c(1,5,2) # In class, I did not want to interrupt the flow of lecture, but # someone asked about changing the values of Boolean variables to numbers. # This can be done as follows: as.numeric(TRUE) as.numeric(FALSE) # Some things are not a number, denoted by NaN. This is different than NA. # Values could be classified as NaN because we can't tell if they are # infinite, or finite, or an oscillating expression, etc. # Here are two examples: 0/0 Inf - Inf # I think that we just summarized freshman calculus. # There are some built-in functions for working with NA and NaN is.na(NA) is.na(NaN) is.nan(NA) is.nan(NaN) # It is probably worthwhile to read the help pages ?NA ?NaN # These help pages also describe the is.na() and is.nan() functions. ### 17. CHARACTER VECTORS # We can store a sequence of characters into a variable, with either # double quotes or single quotes, for instance: x <- "hi there" x x <- 'hi there' x # need backspaces for certain "escape sequences" # \n for newline # \t for tab y <- "hi there\n\n\n mom" # It is a little bit disappointing that R displays y on just one line. # We might have expected R to display 3 newlines. We will talk more # about this later in the course, when we are doing string processing. y y <- "hi there \n \n \n mom" y # Make sure that the entire string is inside the quotes. # Otherwise, there will be an error, for instance: y <- "hi there"\n # The help file is in ?Quotes ?Quotes # One helpful command is paste ?paste # The following will paste 1 after a, then 2 after b, then 3 after c, # and the use recycling to past 1 after d and 2 after e: paste( c("a", "b", "c", "d", "e"), c(1,2,3)) # We can change the separator to HH instead of a single space paste( c("a", "b", "c", "d", "e"), c(1,2,3), sep= "HH") # or we can eliminate the space altogether. paste( c("a", "b", "c", "d", "e"), c(1,2,3), sep= "") # INDEX VECTORS # Index vectors are really useful for accessing various parts of a vector # There are four styles of index vectors. The first is a logical vector. x <- c(3,NA,7,0,5,1,8) !is.na(x) x > 0 (!is.na(x))&x>0 # This will grab all of the elements of x that are not NA's and that are > 0 x[(!is.na(x))&x>0] # We could also do the same operation on the vector x + 1 instead: # [In class, I forgot to add the "+1" throughout, but I fixed it here. # J.T. gets 10 cents at the registrar for noticing my error. # Here is the correct version: (x+1)[(!is.na(x+1))&(x+1)>0] # The second style of index vectors is to use positive integral quantities: # This grabs the 3rd component of x: x[3] # We could grab the 3rd through the 5th elements of x too: x[3:5] # or the first four elements of x x[1:4] # many other variations are possible. The indices don't have to be in order. # We could grab the 3rd element 5 times in a row, then the 4th element, # then the 1st element three times in a row, then the 6th element 3 times. x[c(3,3,3,3,3,4,1,1,1,6,6,6)] # The third style of index vectors is negative indices. Negative indices # are used to grab all of the vector EXCEPT for the negative indices. # For instance, we could grab all of x except for the 2nd element x[-2] # Or grab all of x except for the 6th element x[-6] x # So negative indices are not grabbed, and everything else is grabbed. # A question: What happens if we grab something out of bounds? x[23] # Answer: We get a syntax error. No recycling is performed in such a case. # The fourth style of index vectors is using a names attribute: fruit <- c(5,10,1,20) names(fruit) <- c("orange", "banana", "apple", "peach") fruit # We can refer to the elements of fruit by its associated "names" vector now fruit[c("apple", "orange")] # We can store the result in our lunch, so that we don't go hungry. lunch <- fruit[c("apple", "orange")] # What does our lunch contain? lunch # We can use an indexed expression to receive values too. # For instance, recall the values stored in x: x # We could replace all the NA's in the vector x by 0's is.na(x) x[is.na(x)] x[is.na(x)] <- 0 # Here is the new vector x, which no longer contains any NA's. x # So each NA got replaced by 0. # Now let's redefine vector x. x <- c(3,NA,7,0,5,1,8) # We could replace the NA's by something else (i.e., the 0 was not special)! # Let's replace each NA by the value 502. x[is.na(x)] <- 502 # Now let's give x some negative values too. x[3] = -9 x[5] = -37 x # We can check to see which values of x are negative x < 0 x[x<0] # and then replace those values with the negative of the values # Since the negative of a negative is positive, this is equivalent to # removing any negatives and just making them positive instead. x[x<0] <- -x[x<0] # So the -9 in x[3] gets replaced by 9 # and the -37 in x[5] gets replaced by 37 x ### cat cat("hello") print("hello") stop("error: stop") #### EXERCISE Turn to page 14 of the book "usingR" and finish the exercises. # Now we will type q() to quit R q()