-
Notifications
You must be signed in to change notification settings - Fork 0
/
update.py
190 lines (153 loc) · 6.1 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
### UPDATE SCRIPT
###
### PURPOSE
### The purpose of this file is to check for updated dividend data and manually extracting the updates from the
### www.newsweb.no website in an efficient manner. This way of manually working with the data is a prototype solution
### until an efficient and fail-proof automated solution has been designed.
###
### WHAT DATA TO STORE
### | Ticker | Last run date (dd.mm.yyyy) | Number of entries |
### | Date (dd.mm.yyy) | Type (Dividend / Info) | Amount (float / X) |
###
### @Author: Fredrik Bakken
### Email: fredrik.bakken(at)gmail.com
### Website: https://www.fredrikbakken.no/
### Github: https://github.com/FredrikBakken
###
### Last update: 22.09.2017
'''
import os
import math
import time
import datetime
import contextlib
from bs4 import BeautifulSoup
from urllib.request import urlopen
from dividend import store_dividends
from db import db_id_stocks, db_number_of_stocks
def file_exist(filename):
try:
open(filename, 'r')
return True
except IOError:
return False
def get_url(page, ticker, from_date, today):
eks_dato = '1101'
url = 'http://www.newsweb.no/newsweb/search.do?headerSearch=' \
'&searchCriteria.categoryIds=' \
'&selectedPagenumber=' + str(page) + \
'&searchSubmitType=searchtype' \
'&searchtype=full' \
'&searchCriteria.issuerSign=' + ticker + \
'&searchCriteria.instrumentShortName=' \
'&searchCriteria.categoryId=' + eks_dato + \
'&searchCriteria.fromDate=' + from_date + \
'&searchCriteria.toDate=' + today + \
'&searchCriteria.exchangeCode=' \
'&_searchCriteria.activeIssuersOnly=' \
'&searchCriteria.activeIssuersOnly=true'
print(url)
return url
def update():
updates_filename = 'data/updates.txt'
number_of_stocks = db_number_of_stocks()
# Start by deleting the updates.txt file
with contextlib.suppress(FileNotFoundError):
os.remove(updates_filename)
# Execute one ticker at the time
for x in range(number_of_stocks):
# Reset lines values to avoid copy to new stock issue
lines = ''
# Get today's date
today = datetime.datetime.today().strftime('%d.%m.%Y')
# Get ticker from db_stocks.json
stock_id = (x + 1)
ticker = db_id_stocks(stock_id)
filename = 'data/div-split/' + ticker + '.json'
# Check if file exist
exist = file_exist(filename)
first_line_split = ''
# Find which date last updates are from
if not exist:
from_date = '01.01.1900'
previous_entries = 0
# Write first line to files
with open(filename, 'a') as f:
f.write(ticker + ',' + today + ',' + str(previous_entries) + '\n')
else:
with open(filename, 'r') as f:
first_line = f.readline()
lines = f.readlines()
first_line_split = first_line.split(",")
from_date = first_line_split[1]
previous_entries = first_line_split[2]
# Get number of pages to check and number of new entries
url = get_url(1, ticker, from_date, today)
web_content = urlopen(url).read()
soup = BeautifulSoup(web_content, "html.parser")
hits = str(soup.find_all('div', attrs={'class': 'hits'}))
entries = int(''.join(x for x in hits if x.isdigit()))
pages = int(math.ceil(entries / 25))
print("Number of updates: " + str(entries))
# Update entries
total_entries = (int(previous_entries) + int(entries))
new_line = (ticker + ',' + today + ',' + str(total_entries) + '\n')
check_date = ''
# Get latest dividend entry date
if (len(lines) > 0):
date_line = lines[0]
date_line_split = date_line.split(",")
check_date = date_line_split[0]
date_list = []
# Go through all pages and extract rows
for page in range(pages):
time.sleep(1) # Avoid DDoS
page_number = (page + 1)
dividend_url = get_url(page_number, ticker, from_date, today)
dividend_page = urlopen(dividend_url).read()
soup_table = BeautifulSoup(dividend_page, "html.parser")
table = soup_table.find('table', attrs={'class': 'messageTable'})
rows = table.find_all('tr')
# Get update dates
for row in rows[1:]:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
date = cols[0].split(' ')[0]
if not date == check_date:
element = date + ',type,amount\n'
if not element in date_list:
date_list.append(element)
# Overwrite file with new data
with open(filename, 'w') as f:
f.write(first_line.replace(first_line, new_line))
for element in date_list:
f.write(element)
for line in lines:
f.write(line)
# Check every file for updates
missing_updates = 0
with open(filename, 'r') as f:
for line in f:
if ("type" in line) or ("amount" in line):
missing_updates = missing_updates + 1
# Append all missing data to updates file ('ticker' : number of missing updates)
with open(updates_filename, 'a') as upd:
if missing_updates > 0:
upd.write(ticker + ': ' + str(missing_updates) + '\n')
updates_size = os.stat(updates_filename).st_size
# If the updates.txt is empty
if updates_size == 0:
store_dividends()
# There are new dividend updates which has not been fixed
else:
print("\nOBS!\nThere are new dividend updates which has to be manually updated.\n"
"Please check in the '/data/updates.txt' for more information.")
input("Press enter to continue...")
update()
return True
# Setting starting method
if __name__ == "__main__":
update()