Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Imanol Perez
identifying-which-country-is-a-stock-from
Commits
bbe079e5
Commit
bbe079e5
authored
Jun 15, 2017
by
Imanol Perez
Browse files
Upload new file
parent
431e94ee
Changes
1
Hide whitespace changes
Inline
Side-by-side
classify_stocks.py
0 → 100644
View file @
bbe079e5
import
numpy
as
np
import
sigLearn
import
pandas
as
pd
import
pandas.io.data
as
web
import
datetime
import
time
import
matplotlib.dates
as
mdates
from
tickers
import
*
from
random
import
shuffle
class
Stock
:
'''
Class that contains information about a stock, that will later be used.
'''
def
__init__
(
self
,
data
,
country
):
# Store the stream of data.
self
.
data
=
np
.
array
(
data
,
dtype
=
'float32'
)
# Store the country the stock belongs to.
self
.
country
=
country
# Since the output to train the model must be a vector,
# each country will be given by a point, which is calculated
# using the function country_to_point.
self
.
point
=
country_to_point
(
country
)
def
country_to_point
(
country
):
'''
Converts a country into a point
'''
dictionary
=
{
"US"
:
(
1
,
0
),
"UK"
:
(
-
1
,
0
),
"DE"
:
(
0
,
1
)}
return
dictionary
[
country
]
def
string2datenum
(
s
,
f
):
'''
Converts a string date in format f to a number
Arguments:
s: string, date that has to be converted to int
f: string, format of s
'''
return
mdates
.
date2num
(
datetime
.
datetime
.
fromtimestamp
(
time
.
mktime
(
time
.
strptime
(
s
,
f
))))
def
getData
(
ticker
,
start
,
end
):
'''
Gets data from the specified ticker, for a set time period.
'''
stock
=
web
.
DataReader
(
ticker
,
"google"
,
start
,
end
)
values
=
stock
[[
"Close"
,
"Volume"
]].
reset_index
().
values
for
i
in
range
(
len
(
values
)):
values
[
i
][
0
]
=
string2datenum
(
str
(
values
[
i
][
0
]),
"%Y-%m-%d %H:%M:%S"
)
return
values
def
findMin
(
p
,
A
):
'''
Finds the point in A that is closest to p.
'''
minimum
=
(
-
1
,
(
0
,
0
))
for
p0
in
A
:
dist
=
np
.
linalg
.
norm
(
p0
-
np
.
array
(
p
))
if
minimum
[
0
]
==-
1
or
minimum
[
0
]
>
dist
:
minimum
=
(
dist
,
p0
)
return
minimum
[
1
]
def
accuracy
(
predictions
,
y
):
'''
Given a list of predictions and a list of correct values y,
it calculates the accuracy of the predictions (as a percentage
of correct guesses).
'''
points
=
[[
1
,
0
],
[
-
1
,
0
],
[
0
,
1
]]
performance
=
{
"guesses"
:
0.0
,
"total"
:
0.0
}
for
i
in
range
(
len
(
y
)):
if
set
(
findMin
(
predictions
[
i
],
points
))
==
set
(
y
[
i
]):
performance
[
"guesses"
]
+=
1
performance
[
"total"
]
+=
1
return
performance
[
"guesses"
]
/
performance
[
"total"
]
# We will consider data from 2016.
start
=
datetime
.
datetime
(
2016
,
1
,
1
)
end
=
datetime
.
datetime
(
2017
,
1
,
1
)
# Load data from each company.
data
=
[]
for
country
in
tickers
:
print
(
"Loading companies from "
+
country
+
"..."
)
for
company
in
tickers
[
country
]:
companyData
=
getData
(
company
,
start
,
end
)
# If the company doesn't have any data, ignore it.
if
len
(
companyData
)
==
0
:
continue
data
.
append
(
Stock
(
companyData
,
country
))
print
(
"Done."
)
# We randomly divide the dataset into two subsets: the training_set,
# which has the 70% of the data, and testing_set, with the remaining
# 30%.
shuffle
(
data
)
training_set
=
data
[
0
:
int
(
0.7
*
len
(
data
))]
testing_set
=
[
company
for
company
in
data
if
company
not
in
training_set
]
# The inputs and outputs to train the model are constructed.
inputs
=
[
company
.
data
for
company
in
training_set
]
outputs
=
[
company
.
point
for
company
in
training_set
]
# Inputs and outputs to test the model are built.
inputsTEST
=
[
company
.
data
for
company
in
testing_set
]
outputsTEST
=
[
company
.
point
for
company
in
testing_set
]
# We apply the model for signature orders 1 to 4.
for
signature_order
in
range
(
1
,
5
):
# The model is trained.
model
=
sigLearn
.
sigLearn
(
order
=
signature_order
)
model
.
train
(
inputs
,
outputs
)
# We calculate the predictions.
predictions
=
model
.
predict
(
inputsTEST
)
# We check the accuracy of our predictions, and print it then.
print
(
accuracy
(
predictions
,
outputsTEST
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment