Commit bbe079e5 authored by Imanol Perez's avatar Imanol Perez
Browse files

Upload new file

parent 431e94ee
import numpy as np
import sigLearn
import pandas as pd
import pandas.io.data as web
import datetime
import time
import matplotlib.dates as mdates
from tickers import *
from random import shuffle
class Stock:
'''
Class that contains information about a stock, that will later be used.
'''
def __init__(self, data, country):
# Store the stream of data.
self.data=np.array(data, dtype='float32')
# Store the country the stock belongs to.
self.country=country
# Since the output to train the model must be a vector,
# each country will be given by a point, which is calculated
# using the function country_to_point.
self.point=country_to_point(country)
def country_to_point(country):
'''
Converts a country into a point
'''
dictionary={"US": (1,0), "UK": (-1, 0), "DE": (0,1)}
return dictionary[country]
def string2datenum(s, f):
'''
Converts a string date in format f to a number
Arguments:
s: string, date that has to be converted to int
f: string, format of s
'''
return mdates.date2num(datetime.datetime.fromtimestamp(time.mktime(time.strptime(s, f))))
def getData(ticker, start, end):
'''
Gets data from the specified ticker, for a set time period.
'''
stock = web.DataReader(ticker, "google", start, end)
values=stock[["Close", "Volume"]].reset_index().values
for i in range(len(values)):
values[i][0]=string2datenum(str(values[i][0]), "%Y-%m-%d %H:%M:%S")
return values
def findMin(p, A):
'''
Finds the point in A that is closest to p.
'''
minimum=(-1, (0,0))
for p0 in A:
dist=np.linalg.norm(p0-np.array(p))
if minimum[0]==-1 or minimum[0]>dist:
minimum=(dist, p0)
return minimum[1]
def accuracy(predictions, y):
'''
Given a list of predictions and a list of correct values y,
it calculates the accuracy of the predictions (as a percentage
of correct guesses).
'''
points=[[1,0], [-1, 0], [0, 1]]
performance={"guesses": 0.0, "total": 0.0}
for i in range(len(y)):
if set(findMin(predictions[i], points))==set(y[i]):
performance["guesses"]+=1
performance["total"]+=1
return performance["guesses"]/performance["total"]
# We will consider data from 2016.
start = datetime.datetime(2016,1,1)
end = datetime.datetime(2017,1,1)
# Load data from each company.
data=[]
for country in tickers:
print("Loading companies from "+country+"...")
for company in tickers[country]:
companyData=getData(company, start, end)
# If the company doesn't have any data, ignore it.
if len(companyData)==0: continue
data.append(Stock(companyData, country))
print("Done.")
# We randomly divide the dataset into two subsets: the training_set,
# which has the 70% of the data, and testing_set, with the remaining
# 30%.
shuffle(data)
training_set=data[0:int(0.7*len(data))]
testing_set=[company for company in data if company not in training_set]
# The inputs and outputs to train the model are constructed.
inputs=[company.data for company in training_set]
outputs=[company.point for company in training_set]
# Inputs and outputs to test the model are built.
inputsTEST=[company.data for company in testing_set]
outputsTEST=[company.point for company in testing_set]
# We apply the model for signature orders 1 to 4.
for signature_order in range(1, 5):
# The model is trained.
model=sigLearn.sigLearn(order=signature_order)
model.train(inputs, outputs)
# We calculate the predictions.
predictions=model.predict(inputsTEST)
# We check the accuracy of our predictions, and print it then.
print(accuracy(predictions, outputsTEST))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment