R小私集
Fibonacci函数:
fibonacci = function(n)
{
## Author: Ryan Rosario & Jason Fan
## Launch Date: 2010-07-27
## Update Date: 2012-01-20
## Purpose: calculate Fibonacci sequence
if(n < 1) stop(“Input must be an integer >= 1”)
if (n == 1 | n == 2) 1
else fibonacci(n – 1) + fibonacci(n – 2)
}
> sapply(1:20, fibonacci)
[1] 1 1 2 3 5 8 13 21 34 55 89 144 233 377
[15] 610 987 1597 2584 4181 6765
读取Excel 2007文件的函数:
readXLSX = function(file.path, sheet.name)
{
## Author: Jason Fan
## Launch Date: 2011-11-01
## Update Date: 2012-01-10
## Purpose: read .xlsx format file
library(RODBC)
ch.xlsx = odbcConnectExcel2007(file.path)
XLSX = SQLFetch(ch.xlsx, sheet.name, as.is = T)
close(ch.xlsx)
detach(“package:RODBC”)
XLSX
}
读取数据库数据的函数:
readDB = function(sql.code)
{
## Author: Edmund Lu & Jason Fan
## Launch Date: 2011-11-01
## Update Date: 2012-01-10
## Purpose: read data from a database
library(RODBC)
ch.db = odbcConnect(dsn = ‘××××’, uid = ‘××××’, pwd = ‘××××’)
DB = sqlQuery(ch.db, sql.code, as.is = T)
close(ch.db)
detach(‘package:RODBC’)
DB
}
反秩函数:
revRank = function(x, na.last = ‘keep’, ties.method = ‘average’)
{
## Author: Jason Fan
## Launch Date: 2011-12-01
## Update Date: 2012-02-20
## Purpose: opposite to rank() function
## Note: read rank() help file for arguments na.last and ties.method
rank(-x, na.last, ties.method)
}
> set.seed(20120220)
> x = sample((-2):7, replace = T)
> x
[1] 1 1 -1 5 6 -2 4 3 -1 0
> rank(x)
[1] 5.5 5.5 2.5 9.0 10.0 1.0 8.0 7.0 2.5 4.0
> revRank(x)
[1] 5.5 5.5 8.5 2.0 1.0 10.0 3.0 4.0 8.5 7.0
> names(x) = letters[1:10]
> x
a b c d e f g h i j
1 1 -1 5 6 -2 4 3 -1 0
> rank(x)
a b c d e f g h i j
5.5 5.5 2.5 9.0 10.0 1.0 8.0 7.0 2.5 4.0
> revRank(x)
a b c d e f g h i j
5.5 5.5 8.5 2.0 1.0 10.0 3.0 4.0 8.5 7.0
月跨度函数:
monthSpan = function(from.date, to.date)
{
## Author: Jason Fan & Yishuo Deng
## Launch Date: 2011-09-15
## Update Date: 2012-02-01
## Purpose: Count the number of months between 2 dates
date1 = paste(substr(from.date, 1, 7), “01”, sep = “-“)
date2 = paste(substr(to.date, 1, 7), “01”, sep = “-“)
number.of.months = length(seq(from = as.Date(date1), to = as.Date(date2), by = “month”))
number.of.months
}
> monthSpan(from.date = “2010-03-25”, to.date = “2010-06-28”)
[1] 4
找重函数:
findRep = function(x)
{
## Author: Jason Fan
## Launch Date: 2012-02-29
## Update Date: 2012-07-25
## Purpose: find the repeated element(s) in a vector
return(sort(unique(x[duplicated(x)])))
}
> set.seed(123)
> x1 = sample(10, replace = T)
> x2 = sample(letters, 10, replace = T)
> x3 = c(T, F, T)
> x1
[1] 3 8 5 9 10 1 6 9 6 5
> x2
[1] “y” “l” “r” “o” “c” “x” “g” “b” “i” “y”
> x3
[1] TRUE FALSE TRUE
> findRep(x1)
[1] 5 6 9
> findRep(x2)
[1] “y”
> findRep(x3)
[1] TRUE
荟萃运算函数:
> (A <- c(sort(sample(1:20, 9)),NA))
[1] 8 10 11 12 14 15 16 18 20 NA
> (B <- c(sort(sample(3:23, 7)),NA))
[1] 4 6 9 10 13 17 21 NA
>
> # A∩B
> intersect(A, B)
[1] 10 NA
>
> # A∪B
> union(A, B)
[1] 8 10 11 12 14 15 16 18 20 NA 4 6 9 13 17 21
>
> # A-B & B-A
> setdiff(A, B)
[1] 8 11 12 14 15 16 18 20
> setdiff(B, A)
[1] 4 6 9 13 17 21
>
> # A∪B = (A-B)∪(A∩B)∪(B-A)
> setequal(union(A, B), c(setdiff(A, B), intersect(A, B), setdiff(B, A)))
[1] TRUE
怎么利用tapply
> set.seed(20120507)
> FactorData = data.frame(stock_id = rep(1:4, 4),
+ report_date = rep(paste(rep(2008:2011, each = 4), “12-31”, sep = “-“)),
+ net_profit = sample(1e8:2e8, 16),
+ industry_character = rep(LETTERS[1:4], 4))
> FactorData
stock_id report_date net_profit industry_character
1 1 2008-12-31 143053699 A
2 2 2008-12-31 143445207 B
3 3 2008-12-31 147623078 C
4 4 2008-12-31 167865770 D
5 1 2009-12-31 102574966 A
6 2 2009-12-31 101809770 B
7 3 2009-12-31 150244678 C
8 4 2009-12-31 112340973 D
9 1 2010-12-31 198702380 A
10 2 2010-12-31 162393928 B
11 3 2010-12-31 141121733 C
12 4 2010-12-31 120825197 D
13 1 2011-12-31 189642339 A
14 2 2011-12-31 157312958 B
15 3 2011-12-31 165180411 C
16 4 2011-12-31 118997537 D
> FactorMat = tapply(FactorData$net_profit, list(FactorData$report_date, FactorData$stock_id), function(x) x)
> FactorMat
1 2 3 4
2008-12-31 143053699 143445207 147623078 167865770
2009-12-31 102574966 101809770 150244678 112340973
2010-12-31 198702380 162393928 141121733 120825197
2011-12-31 189642339 157312958 165180411 118997537
> FactorSum = tapply(1:nrow(FactorData), list(FactorData$industry_character), function(x) sum(FactorData[x, “net_profit”]))
> FactorSum
A B C D
633973384 564961863 604169900 520029477
怎么在原数据文件中添加新记录
## Existing Data
(X = data.frame(date = format(Sys.Date() – 4:0, “%Y-%m-%d”), V1 = 1:5, V2 = LETTERS[1:5], stringsAsFactors = FALSE))
write.table(X, “Test.txt”, row.names = FALSE, quote = FALSE)
## New Records
(X_Plus = data.frame(date = format(Sys.Date() + 1:2, “%Y-%m-%d”), V1 = 6:7, V2 = LETTERS[6:7], stringsAsFactors = FALSE))
## Add New Records
write(t(X_Plus), “Test.txt”, ncolumns = ncol(X_Plus), append = TRUE)
## Check
(X_New = read.table(“Test.txt”, stringsAsFactors = FALSE, header = TRUE))
## Delete Test File
unlink(“Test.txt”, recursive = T)
怎么求导
### 操作函数D()求解一阶及高阶导数
## 求解一阶导数
a = 2
expr = paste(“sin(“, a, ” * x) + cos(x^”, a, “)”, sep = “”)
expr
D(expr, “x”) # 错误
D(expression(expr), “x”) # 错误
D(expression(sin(2 * x) + cos(x^2)), “x”) # 正确
D(parse(text = expr), “x”) # 正确
x = 2 # 将详细值代入导函数
eval(D(parse(text = expr), “x”))
## 求解高阶导数的函数
DD = function(expr, name, order = 1)
{
if(order < 1) stop(“‘order’ must be >= 1”)
if(order == 1) D(expr, name)
else DD(D(expr, name), name, order – 1)
}
DD(parse(text = expr), “x”, 1) # 一阶导数
DD(parse(text = expr), “x”, 2) # 二阶导数
DD(parse(text = expr), “x”, 3) # 三阶导数
### 操作函数deriv()求解一阶偏导数
(dx = deriv(parse(text = expr), “x”))
eval(dx)
eval(parse(text = expr)) # 原函数的值
dxy = deriv(expression(sin(cos(x + y^2))), c(“x”, “y”))
y = 1
eval(dxy)
## 与上面的功效一样
eval(expression(sin(cos(x + y^2)))) # 原函数的值
eval(D(expression(sin(cos(x + y^2))), “x”)) # 关于x的偏导
eval(D(expression(sin(cos(x + y^2))), “y”)) # 关于y的偏导