Posts Tagged MySQL

Use MySQL database in Python

PyMySQL its a pure python client library, you can install using pip install PyMySQL (find documentation at https://pymysql.readthedocs.io/en/latest/).

The following is a simple example to demonstrate how to use the library.

#!/usr/bin/env python
from __future__ import print_function

import pymysql
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='', db='mysql')
cur = conn.cursor()
cur.execute("SELECT Host,User FROM user")
print(cur.description)
print()
for row in cur:
    print(row)

cur.close()
conn.close()
Share

Tags: , ,

Search turning points in the trading history of a stock

Stock prices fluctuate everyday. It is impossible to pinpoint the bottom or peak of a stock. But by analyzing the trading history of a stock, we can figure a some unusual signals in the trading history of a stock. By analyzing the time series data of the stock trading history, we can easily figure out major turning points. Through analyzing stock data daily, we may be able to watch out when a major turning point appears. The following R program to analyze and visualize turning points in stock trading history. The program is used with MySQL database. The results of data analysis will be stored into the MySQL database and be summarized in the MySQL. After that, summary information will be loaded to R for plotting.  We only present the R script here. The MySQL database structure will not be displayed.

library(tseries) # used for runs.test
library(DBI) # required for RMySQL
library(RMySQL) # for interacting with MySQL database

#=========== functions =============
# execute query on a MySQL database
dbQuery <- function(Query)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  dbSendStatement(con,Query)
  dbDisconnect(con)
  #return(res)
}

# read a table from the MySQL database
dbRead <- function(tablename)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  myresult <- dbReadTable(con, tablename)
  dbDisconnect(con)
  return(myresult)
}

# read stock list from the MySQL database
dbReadStockList <- function(tablename)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  mystocks <- dbReadTable(con, tablename)
  dbSendQuery(con,"call drop_stocktables();")
  dbDisconnect(con)
  return(mystocks[,2])
}

# write analysis result to the MySQL database
dbWrite <- function(tablename, myvar)
{
  mysql <- dbDriver("MySQL")
  con<-dbConnect(mysql,dbname="stocks",username="username",password="password") # create connection
  dbWriteTable(con, tablename, myvar, overwrite=TRUE)
  dbDisconnect(con)
}

# conduct the runs test for a given time series
myruns <- function(vobs, n1, n2) 
{
  n <- nrow(vobs)
  c3 <- rep(-1,n1)
  c4 <- rep(1,n2)

  rank <- c(c3,c4)
  rank <- as.factor(rank)
  
  dt <- data.frame(vobs,rank)
  dt <- dt[order(dt$vobs),]
  
  res <- runs.test(dt$rank)
  return(data.frame(statistic=res$statistic, p.value=res$p.value))
}

# do multiple runs test for a given data set 
# by setting different break point
mytests <- function(obs)
{
  res <- data.frame(varn=NA, nobs=NA, ndays=NA, date1=NA, date2=NA, varname=NA, varvalue=NA, standardN=NA, p.value=NA)
  nvar <- length(obs)
  nobs <- nrow(obs)
  for (i in 2:(nvar-1))
  {
    vobs <- obs[,i]
    #for (n1 in (as.integer(nobs/2 - nobs/4)+1):(as.integer(nobs/2)))
    for (n1 in 3:5)
    {  
      test <- myruns(vobs, n1, nobs-n1)
      res <- rbind(res,data.frame(varn=i, nobs=nrow(obs), ndays=n1, date1=obs$Date[1], date2=obs$Date[n1], varname=colnames(obs[i]), varvalue=vobs[n1], standardN=unname(test$statistic), p.value=unname(test$p.value)))
    }
  }
  res <- res[-1,]
  return(res)
}

myanalysis <- function(stocksymbol)
{
  stockdata <- read.csv(paste(stocksymbol,"csv",sep="."))
  stockdata1 <- subset(stockdata, as.POSIXct(stockdata$Date)>=as.POSIXct("2016-10-01"))

  # set data frame for output
  myres <- data.frame(varn=NA, nobs=NA, ndays=NA, date1=NA, date2=NA, varname=NA, varvalue=NA, standardN=NA, p.value=NA)
  # construct data set
  data <- stockdata[1:(nrow(stockdata)-1),]
  data1 <- stockdata[2:nrow(stockdata),]
  data2 <- data
  # compute the up or down values
  for (i in 2:length(data)){ 
    data2[,i] <- data[,i] - data1[,i]
  }
  data <- data2
  # 
  while (nrow(data) >= 10)
  {
    ngroups = nrow(data)
    if (ngroups > 20) { ngroups = 20 }
    for (j in seq(10,ngroups,by=2))
    {
      mo <- data[1:j,]
      res1 <- mytests(mo)
      myres <- rbind(myres, res1)
    }
  # romove the latest one 
  data <- data[-1,]
  }

  # remove the first row that includes NAs
  myres <- myrses[-1,]
  # write to csv file
  write.csv(myres,"runstest.csv")
  # write to MySQL data base
  dbWrite(stocksymbol, myres)
}

#========== Main program ==============
# Search turning point of any stocks automatically 
setwd("C:\\data\\stocks")
# import a stock list from the MySQL database
mystocks <- dbReadStockList("_stocks")
# conduct analysis for all stocks
for (stocksymbol in mystocks) 
{
	print(stocksymbol)
	myanalysis(stocksymbol)
}

# summarize turningpoint data
dbQuery("call summarize_turningpoint();")

# look at the general trend of turning point
tpcount <- dbRead("view_tpcount")
tpcount$tp_date <- as.Date(tpcount$tp_date, "%Y-%m-%d")
plot(tpcount$counts ~ tpcount$tp_date, xaxt="n", type="l")
axis(1, tpcount$tp_date, format(tpcount$tp_date, "%b %d"), cex.axis = .7)


# look at the turning point of each company
tpcompanycount <- dbRead("view_tpcompanycount")
tpcompanycount$tp_date <- as.Date(tpcompanycount$tp_date)
tpcompanycount$stocksymbol <- as.factor(tpcompanycount$stocksymbol)
#plot(tpcompanycount[,2:4])
# https://www.r-bloggers.com/conditioning-and-grouping-with-lattice-graphics/
library(lattice)
xyplot(countsum ~ tp_date | factor(stocksymbol), data=tpcompanycount, pch=19,
       main="Turning Point Significance Count Sum", xlab="date",  ylab="count",layout=c(3,1),type=c("p","g"))

Share

Tags: , , , ,

R import MySQL data tables

R language is free. Its functions are very powerful. It can be used to do all kinds of data analysis and plot drawing. I always use MySQL database to manage my data. R has the capability to read data from MySQL databases. Below is a simple example to demonstrate the process.

1. Install database import packages

In R console to enter and execute the following command:
install.packages(“RMySQL”)

2. Try to connect to MySQL database and list all data tables in the database.

library(RMySQL) # import package
con<-dbConnect(dbDriver(“MySQL”),dbname=”ddddd”,username=”zzzz”,password=”ppppp”) # create connection
dbListTables(con) # list data tables

3. Deal with the UTF8 encoding

Visit https://github.com/rstats-db/RMySQL/issues/2 to get detailed information for dealing with UTF8 character set.

Another article in Chinese (http://www.cnblogs.com/xiongchang18/p/4955813.html) also provides solution to solve the encoding problem.

Share

Tags: ,

MySQL transpose function of aggregation

An article published at the following providing a lot of examples.

http://www.artfulsoftware.com/infotree/queries.php

At the stackoverflow website there is an example as below. It helps me a lot.

The following text from the site.

Question:

Here is what my current mysql table looks like:

PunchID EmpID PunchEvent PunchDateTime
1 0456 clockin 5/14/2013 8:36:26 AM
48 0456 breakout 5/14/2013 12:01:29 PM
53 0456 breakin 5/14/2013 12:28:31 PM
54 0456 clockout 5/14/2013 2:28:33 PM
57 0456 clockin 5/15/2013 7:38:34 AM
58 0456 breakout 5/15/2013 7:38:39 AM
59 0456 breakin 5/15/2013 7:38:41 AM
60 0456 clockout 5/15/2013 7:38:42 AM
Now I want to return a result set that is grouped based on the day of the week like so:

Day ClockIn BreakOut BreakIn ClockOut
Tuesday 8:36:26 AM 12:01:29 PM 12:28:31 PM 2:28:33 PM
Wednesday 7:38:34 AM etc, etc…
This is my current query. But it only returns the first punch for each day.

Answer:

SELECT DATE_FORMAT(PunchDateTime, ‘%W’) DAY,
MAX(CASE WHEN PunchEvent = ‘ClockIn’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) ClockIn,
MAX(CASE WHEN PunchEvent = ‘BreakOut’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) BreakOut,
MAX(CASE WHEN PunchEvent = ‘BreakIn’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) BreakIn,
MAX(CASE WHEN PunchEvent = ‘ClockOut’ THEN DATE_FORMAT(PunchDateTime, ‘%r’) END) ClockOut
FROM tableName
WHERE EmpID = 456
GROUP BY DATE_FORMAT(PunchDateTime, ‘%W’)
ORDER BY PunchDateTime
SQLFiddle Demo
OUTPUT

╔═══════════╦═════════════╦═════════════╦═════════════╦═════════════╗
║ DAY ║ CLOCKIN ║ BREAKOUT ║ BREAKIN ║ CLOCKOUT ║
╠═══════════╬═════════════╬═════════════╬═════════════╬═════════════╣
║ Tuesday ║ 08:36:26 AM ║ 12:01:29 PM ║ 12:28:31 PM ║ 02:28:33 PM ║
║ Wednesday ║ 07:38:34 AM ║ 07:38:39 AM ║ 07:38:41 AM ║ 07:38:42 AM ║
╚═══════════╩═════════════╩═════════════╩═════════════╩═════════════╝

Share

Tags: , , , ,

GROUP domain from url in MySql

Problem

I have a large database that contains many urls, there are many domains repeating and i;m trying to get only the domain. eg:

http://example.com/someurl.html
http://example.com/someurl_on_the_same_domain.html
http://example.net/myurl.php
http://example.org/anotherurl.php

and i want to get only domains, eg:

http://example.com
http://example.net
http://example.org

My query is:

SELECT idsite FROM table GROUP BY site ORDER BY id DESC LIMIT 50

Solution

SELECT
    SUBSTR(site, 1 , IF(LOCATE('/', site, 8), LOCATE('/', site, 8)-1, LENGTH(site)))
        as OnlyDomain
    FROM tablename
    GROUP BY OnlyDomain
    ORDER BY id DESC LIMIT 50

The posted was from the following URL:
http://stackoverflow.com/questions/3590908/group-domain-from-url-in-mysql
Share

Tags: , ,