Dealing with Date in R language

In R there is a class called Date. If we want to get the components of a date variable or date string, we can use the following function to do it.

toNumerics <- function(Date)
{
stopifnot(inherits(Date, c(“Date”, “POSIXt”)))
day <- as.numeric(strftime(Date, format = “%d”))
month <- as.numeric(strftime(Date, format = “%m”))
year <- as.numeric(strftime(Date, format = “%Y”))
list(year = year, month = month, day = day)
}

The following code uses the above function to deal with date and datestr.

today <- Sys.Date()
mydatecom <- toNumerics(today)
day <- mydatecom$day
month <- mydatecom$month
year <- mydatecom$year
mydatestr <- paste(year,formatC(month, width=2, flag=”0″),formatC(day, width=2, flag=”0″), sep=”-“)
data1 <- subset(data, as.POSIXct(data$Date)>=as.POSIXct(mydatestr))

mydatestr likes like “2017-03-10”. By using as.POSIXct can convert the datestr to the class POSIXct and then used in the comparison.

Share
Posted in R, R/S-Plus | Leave a comment

Search turning points in the trading history of a stock

Stock prices fluctuate everyday. It is impossible to pinpoint the bottom or peak of a stock. But by analyzing the trading history of a stock, we can figure a some unusual signals in the trading history of a stock. By analyzing the time series data of the stock trading history, we can easily figure out major turning points. Through analyzing stock data daily, we may be able to watch out when a major turning point appears. The following R program to analyze and visualize turning points in stock trading history. The program is used with MySQL database. The results of data analysis will be stored into the MySQL database and be summarized in the MySQL. After that, summary information will be loaded to R for plotting.  We only present the R script here. The MySQL database structure will not be displayed.

library(tseries) # used for runs.test
library(DBI) # required for RMySQL
library(RMySQL) # for interacting with MySQL database

#=========== functions =============
# execute query on a MySQL database
dbQuery <- function(Query)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  dbSendStatement(con,Query)
  dbDisconnect(con)
  #return(res)
}

# read a table from the MySQL database
dbRead <- function(tablename)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  myresult <- dbReadTable(con, tablename)
  dbDisconnect(con)
  return(myresult)
}

# read stock list from the MySQL database
dbReadStockList <- function(tablename)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  mystocks <- dbReadTable(con, tablename)
  dbSendQuery(con,"call drop_stocktables();")
  dbDisconnect(con)
  return(mystocks[,2])
}

# write analysis result to the MySQL database
dbWrite <- function(tablename, myvar)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  dbWriteTable(con, tablename, myvar, overwrite=TRUE)
  dbDisconnect(con)
}

# conduct the runs test for a given time series
myruns <- function(vobs, n1, n2) 
{
  n <- nrow(vobs)
  c3 <- rep(-1,n1)
  c4 <- rep(1,n2)

  rank <- c(c3,c4)
  rank <- as.factor(rank)
  
  dt <- data.frame(vobs,rank)
  dt <- dt[order(dt$vobs),]
  
  res <- runs.test(dt$rank)
  return(data.frame(statistic=res$statistic, p.value=res$p.value))
}

# do multiple runs test for a given data set 
# by setting different break point
mytests <- function(obs)
{
  res <- data.frame(varn=NA, nobs=NA, ndays=NA, date1=NA, date2=NA, varname=NA, varvalue=NA, standardN=NA, p.value=NA)
  nvar <- length(obs)
  nobs <- nrow(obs)
  for (i in 2:(nvar-1))
  {
    vobs <- obs[,i]
    #for (n1 in (as.integer(nobs/2 - nobs/4)+1):(as.integer(nobs/2)))
    for (n1 in 3:5)
    {  
      test <- myruns(vobs, n1, nobs-n1)
      res <- rbind(res,data.frame(varn=i, nobs=nrow(obs), ndays=n1, date1=obs$Date[1], date2=obs$Date[n1], varname=colnames(obs[i]), varvalue=vobs[n1], standardN=unname(test$statistic), p.value=unname(test$p.value)))
    }
  }
  res <- res[-1,]
  return(res)
}

myanalysis <- function(stocksymbol)
{
  stockdata <- read.csv(paste(stocksymbol,"csv",sep="."))
  stockdata1 <- subset(stockdata, as.POSIXct(stockdata$Date)>=as.POSIXct("2016-10-01"))

  # set data frame for output
  myres <- data.frame(varn=NA, nobs=NA, ndays=NA, date1=NA, date2=NA, varname=NA, varvalue=NA, standardN=NA, p.value=NA)
  # construct data set
  data <- stockdata[1:(nrow(stockdata)-1),]
  data1 <- stockdata[2:nrow(stockdata),]
  data2 <- data
  # compute the up or down values
  for (i in 2:length(data)){ 
    data2[,i] <- data[,i] - data1[,i]
  }
  data <- data2
  # 
  while (nrow(data) >= 10)
  {
    ngroups = nrow(data)
    if (ngroups > 20) { ngroups = 20 }
    for (j in seq(10,ngroups,by=2))
    {
      mo <- data[1:j,]
      res1 <- mytests(mo)
      myres <- rbind(myres, res1)
    }
  # romove the latest one 
  data <- data[-1,]
  }

  # remove the first row that includes NAs
  myres <- myrses[-1,]
  # write to csv file
  write.csv(myres,"runstest.csv")
  # write to MySQL data base
  dbWrite(stocksymbol, myres)
}

#========== Main program ==============
# Search turning point of any stocks automatically 
setwd("C:\\data\\stocks")
# import a stock list from the MySQL database
mystocks <- dbReadStockList("_stocks")
# conduct analysis for all stocks
for (stocksymbol in mystocks) 
{
	print(stocksymbol)
	myanalysis(stocksymbol)
}

# summarize turningpoint data
dbQuery("call summarize_turningpoint();")

# look at the general trend of turning point
tpcount <- dbRead("view_tpcount")
tpcount$tp_date <- as.Date(tpcount$tp_date, "%Y-%m-%d")
plot(tpcount$counts ~ tpcount$tp_date, xaxt="n", type="l")
axis(1, tpcount$tp_date, format(tpcount$tp_date, "%b %d"), cex.axis = .7)


# look at the turning point of each company
tpcompanycount <- dbRead("view_tpcompanycount")
tpcompanycount$tp_date <- as.Date(tpcompanycount$tp_date)
tpcompanycount$stocksymbol <- as.factor(tpcompanycount$stocksymbol)
#plot(tpcompanycount[,2:4])
# https://www.r-bloggers.com/conditioning-and-grouping-with-lattice-graphics/
library(lattice)
xyplot(countsum ~ tp_date | factor(stocksymbol), data=tpcompanycount, pch=19,
       main="Turning Point Significance Count Sum", xlab="date",  ylab="count",layout=c(3,1),type=c("p","g"))

Share
Posted in MySQL, R/S-Plus, Tutorial | Tagged , , , , | Leave a comment

Data Analysis of Field Experiments in Agriculture with R

I wrote a series of lectures about analyzing data from agricultural experiments with Minitab long time ago. Since then I learned using SAS and R to analyzing experimental data. In order to share my experiences in data management and analysis in the three software environments I started to put together data analysis, experimental design, and advanced data analysis in SAS, Minitab and R. Since the original series of lectures were written in Chinese, I will put all thing in Chinese first and then in English.

Share
Posted in R/S-Plus, Tips, Tutorial | Tagged , , , , , | Leave a comment

R import MySQL data tables

R language is free. Its functions are very powerful. It can be used to do all kinds of data analysis and plot drawing. I always use MySQL database to manage my data. R has the capability to read data from MySQL databases. Below is a simple example to demonstrate the process.

1. Install database import packages

In R console to enter and execute the following command:
install.packages(“RMySQL”)

2. Try to connect to MySQL database and list all data tables in the database.

library(RMySQL) # import package
con<-dbConnect(dbDriver(“MySQL”),dbname=”ddddd”,username=”zzzz”,password=”ppppp”) # create connection
dbListTables(con) # list data tables

3. Deal with the UTF8 encoding

Visit https://github.com/rstats-db/RMySQL/issues/2 to get detailed information for dealing with UTF8 character set.

Another article in Chinese (http://www.cnblogs.com/xiongchang18/p/4955813.html) also provides solution to solve the encoding problem.

Share
Posted in R/S-Plus, Tips | Tagged , | Leave a comment

MySQL transpose function of aggregation

An article published at the following providing a lot of examples.

http://www.artfulsoftware.com/infotree/queries.php

At the stackoverflow website there is an example as below. It helps me a lot.

The following text from the site.

Question:

Here is what my current mysql table looks like:

PunchID EmpID PunchEvent PunchDateTime
1 0456 clockin 5/14/2013 8:36:26 AM
48 0456 breakout 5/14/2013 12:01:29 PM
53 0456 breakin 5/14/2013 12:28:31 PM
54 0456 clockout 5/14/2013 2:28:33 PM
57 0456 clockin 5/15/2013 7:38:34 AM
58 0456 breakout 5/15/2013 7:38:39 AM
59 0456 breakin 5/15/2013 7:38:41 AM
60 0456 clockout 5/15/2013 7:38:42 AM
Now I want to return a result set that is grouped based on the day of the week like so:

Day ClockIn BreakOut BreakIn ClockOut
Tuesday 8:36:26 AM 12:01:29 PM 12:28:31 PM 2:28:33 PM
Wednesday 7:38:34 AM etc, etc…
This is my current query. But it only returns the first punch for each day.

Answer:

SELECT DATE_FORMAT(PunchDateTime, ‘%W’) DAY,
MAX(CASE WHEN PunchEvent = ‘ClockIn’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) ClockIn,
MAX(CASE WHEN PunchEvent = ‘BreakOut’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) BreakOut,
MAX(CASE WHEN PunchEvent = ‘BreakIn’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) BreakIn,
MAX(CASE WHEN PunchEvent = ‘ClockOut’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) ClockOut
FROM tableName
WHERE EmpID = 456
GROUP BY DATE_FORMAT(PunchDateTime, ‘%W’)
ORDER BY PunchDateTime
SQLFiddle Demo
OUTPUT

╔═══════════╦═════════════╦═════════════╦═════════════╦═════════════╗
║ DAY ║ CLOCKIN ║ BREAKOUT ║ BREAKIN ║ CLOCKOUT ║
╠═══════════╬═════════════╬═════════════╬═════════════╬═════════════╣
║ Tuesday ║ 08:36:26 AM ║ 12:01:29 PM ║ 12:28:31 PM ║ 02:28:33 PM ║
║ Wednesday ║ 07:38:34 AM ║ 07:38:39 AM ║ 07:38:41 AM ║ 07:38:42 AM ║
╚═══════════╩═════════════╩═════════════╩═════════════╩═════════════╝

Share
Posted in MySQL | Tagged , , , , | Leave a comment