Skip to content

Commit

Permalink
Merge pull request #24 from Jaime-alv/gui
Browse files Browse the repository at this point in the history
Gui
  • Loading branch information
Jaime-alv committed Oct 2, 2021
2 parents 3f818bc + 6c86be1 commit c68f876
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 76 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@ __pycache__/

# other files
social_preview.jpg
modules/gui.py
46 changes: 21 additions & 25 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,23 @@
import filecmp
import json
import logging
from modules.add_url import NewUrl
from modules import setup
import shutil


class CompareUrl:
def __init__(self, stored_url):
print('Running...')
def __init__(self, root, stored_url):
self.root = root
self.list_of_saved_url = stored_url
logging.basicConfig(filename=f'{self.root}\\logs\\log.txt', level=logging.DEBUG,
format='%(levelname)s - %(message)s')
pathlib.Path(f'{self.root}\\logs\\log.txt').open('w')
for each_url in self.list_of_saved_url:
self.file_name = self.list_of_saved_url[each_url]['file_name']
self.css_selector = self.list_of_saved_url[each_url]['css_selector']
self.charset = self.list_of_saved_url[each_url]['encoding']
self.url = each_url
self.path = f'storage\\url_data\\{self.file_name}.txt'
self.path = f'{self.root}\\url_data\\{self.file_name}.txt'
logging.info(f'url = {self.url}')
logging.info(f'file_name = {self.file_name}')
logging.info(f'selector = {self.css_selector}')
Expand All @@ -30,17 +32,17 @@ def __init__(self, stored_url):
def compare_url(self):
new_url = requests.get(self.url)
if self.css_selector is not None:
temp_file = pathlib.Path('storage\\temp.txt').open('w', encoding=self.charset)
temp_file = pathlib.Path(f'{self.root}\\temp.txt').open('w', encoding=self.charset)
bs4_object = bs4.BeautifulSoup(new_url.text, features="html.parser")
parsed_element = bs4_object.select(self.css_selector)
temp_file.write(str(parsed_element[0].get_text()))
temp_file.close()
elif self.css_selector is None:
temp_file = pathlib.Path('storage\\temp.txt').open('wb')
temp_file = pathlib.Path(f'{self.root}\\temp.txt').open('wb')
for chunk in new_url.iter_content(10000):
temp_file.write(chunk)
temp_file.close()
compare_files = filecmp.cmp('storage\\temp.txt', self.path, shallow=False)
temp_file.close()
compare_files = filecmp.cmp(f'{self.root}\\temp.txt', self.path, shallow=False)
if compare_files:
logging.warning(f"{self.url} Equal to stored one")
elif not compare_files:
Expand All @@ -50,7 +52,7 @@ def compare_url(self):

def save_url(self):
logging.warning(f'Updating file with {self.url} in {self.path}')
shutil.move(self.path, f'storage\\url_data\\backup\\{self.file_name}_backup.txt')
shutil.move(self.path, f'{self.root}\\url_data\\backup\\{self.file_name}_backup.txt')
if self.css_selector is not None:
new_url = requests.get(self.url)
open_old_url = pathlib.Path(self.path).open('w', encoding=self.charset)
Expand All @@ -67,27 +69,21 @@ def save_url(self):
open_url.close()


def main():
if __name__ == "__main__":
directory = 'storage'
try:
with pathlib.Path('storage\\url_list.txt').open('r') as file:
logging.basicConfig(filename=f'{directory}\\logs\\log.txt', level=logging.DEBUG,
format='%(levelname)s - %(message)s')
pathlib.Path('storage\\logs\\log.txt').open('w')
with pathlib.Path(f'{directory}\\url_list.txt').open('r') as file:
list_of_saved_url = json.load(file)
if len(list_of_saved_url) == 0:
print('List is empty')
NewUrl('storage', list_of_saved_url)
else:
CompareUrl(list_of_saved_url)
logging.debug(pathlib.Path.cwd())
logging.debug('main function')
print('Running...')
CompareUrl(directory, list_of_saved_url)
except FileNotFoundError:
logging.error('Running setup.py')
setup.setup('storage')


if __name__ == "__main__":
try:
logging.basicConfig(filename='storage\\logs\\log.txt', level=logging.DEBUG,
format='%(levelname)s - %(message)s')
pathlib.Path('storage\\logs\\log.txt').open('w')
except FileNotFoundError:
setup.setup('storage')
logging.debug(pathlib.Path.cwd())
logging.debug('main function')
main()
144 changes: 94 additions & 50 deletions modules/add_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,64 +10,58 @@


class NewUrl:
def __init__(self, root, list_of_saved_url):
def __init__(self, root, list_of_saved_url, add_url, add_css):
self.url = add_url
self.css_selector = add_css
self.root = root
self.list_of_saved_url = list_of_saved_url
print('Add desired url.')
print('Url needs to start with http:// or https://')
self.url = input('@: ')
print('Add unique css selector.')
css = input('css: ')
if css != '':
self.css_selector = css
else:
self.css_selector = None
logging.basicConfig(filename=f'{self.root}\\logs\\log.txt', level=logging.DEBUG,
format='%(levelname)s - %(message)s')
self.main()

def main(self):
logging.critical(f'passed url: {self.url}')
try: # check if given url is valid or not
requests.get(self.url).raise_for_status()
# check if given url is already in json file
if self.list_of_saved_url.get(self.url, None) is None:
response = requests.get(self.url)
pass_charset = response.headers['Content-Type']
domain, header = self.domain_name()
enc_charset = get_charset(pass_charset)

if header is None:
name = domain
else:
name = domain + '_' + header

logging.warning(f'New file with name {name}.txt')
additional_info = {}
self.list_of_saved_url.setdefault(self.url, additional_info)
self.list_of_saved_url[self.url].setdefault('file_name', name)
self.list_of_saved_url[self.url].setdefault('encoding', enc_charset)
logging.info(f"{name} with encoding {enc_charset}")

if self.css_selector is not None:
new_file = pathlib.Path(f'{self.root}\\url_data\\{name}.txt').open('w', encoding=enc_charset)
self.list_of_saved_url[self.url].setdefault('css_selector', self.css_selector)
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as f:
json.dump(self.list_of_saved_url, f)
bs4_object = bs4.BeautifulSoup(response.text, features="html.parser")
parsed_element = bs4_object.select(self.css_selector)
new_file.write(str(parsed_element[0].get_text()))

elif self.css_selector is None:
new_file = pathlib.Path(f'{self.root}\\url_data\\{name}.txt').open('wb')
self.list_of_saved_url[self.url].setdefault('css_selector', None)
for chunk in response.iter_content(10000):
new_file.write(chunk)
logging.debug(f'Stored url in json file {self.list_of_saved_url}')
print(f"Everything ok with {name}")
response = requests.get(self.url)
pass_charset = response.headers['Content-Type']
domain, header = self.domain_name()
enc_charset = get_charset(pass_charset)

if header is None:
name = domain
else:
name = domain + '_' + header

logging.warning(f'New file with name {name}.txt')
additional_info = {}
self.list_of_saved_url.setdefault(self.url, additional_info)
self.list_of_saved_url[self.url].setdefault('file_name', name)
self.list_of_saved_url[self.url].setdefault('encoding', enc_charset)
logging.info(f"{name} with encoding {enc_charset}")

if self.css_selector is not None:
new_file = pathlib.Path(f'{self.root}\\url_data\\{name}.txt').open('w', encoding=enc_charset)
self.list_of_saved_url[self.url].setdefault('css_selector', self.css_selector)
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as f:
json.dump(self.list_of_saved_url, f)
bs4_object = bs4.BeautifulSoup(response.text, features="html.parser")
parsed_element = bs4_object.select(self.css_selector)
new_file.write(str(parsed_element[0].get_text()))

elif self.css_selector is None:
new_file = pathlib.Path(f'{self.root}\\url_data\\{name}.txt').open('wb')
self.list_of_saved_url[self.url].setdefault('css_selector', None)
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as f:
json.dump(self.list_of_saved_url, f)
for chunk in response.iter_content(10000):
new_file.write(chunk)
logging.debug(f'Stored url in json file {self.list_of_saved_url}')
logging.info(f"Everything ok with {name}")
except Exception:
logging.error(f"Something went wrong with {self.url}")
response = requests.get(self.url)
logging.error(f"Response from request = {response}")
print('Error!')

def domain_name(self):
name = re.compile(r'(http(s)?://)?(www\.)?(?P<domain>.*)\.([a-zA-Z]+)(/(?P<header>[a-zA-Z_\-]+)(/.*)?)?')
Expand All @@ -81,6 +75,44 @@ def get_charset(charset):
return search_charset.group('charset')


class ModifyCssGUI:
def __init__(self, root, list_of_saved_url, url, modify_css):
self.root = root
self.list_of_saved_url = list_of_saved_url
self.url = url
self.modify_css = modify_css
logging.basicConfig(filename=f'{self.root}\\logs\\log.txt', level=logging.DEBUG,
format='%(levelname)s - %(message)s')

if self.modify_css != '':
self.modified_css = self.modify_css
else:
self.modified_css = None
logging.warning(f"New css selector for {self.list_of_saved_url[url]['file_name']}")
logging.info(f"old: {self.list_of_saved_url[url]['css_selector']}, new: {self.modify_css}")
self.list_of_saved_url[url]['css_selector'] = self.modified_css
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as overwrite:
json.dump(self.list_of_saved_url, overwrite)


class DeleteUrlGUI:
def __init__(self, root, list_of_saved_url, for_delete):
self.root = root
self.list_of_saved_url = list_of_saved_url
self.for_delete = for_delete
for element in for_delete:
file_name = list_of_saved_url[element]['file_name']
file = pathlib.Path(f'{self.root}\\url_data\\{file_name}.txt')
backup_file = pathlib.Path(f'{self.root}\\url_data\\backup\\{file_name}_backup.txt')
if file.exists():
pathlib.Path.unlink(file)
if backup_file.exists():
pathlib.Path.unlink(backup_file)
del self.list_of_saved_url[element]
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as overwrite:
json.dump(self.list_of_saved_url, overwrite)


class DeleteUrl:
def __init__(self, root, list_of_saved_url):
self.root = root
Expand All @@ -103,9 +135,11 @@ def delete_stored_url(self):
if url_number.isdigit() and 0 < int(url_number) <= (index - 1):
pathing = self.list_of_saved_url[order[(int(url_number) - 1)]]['file_name']
file = pathlib.Path(f'{self.root}\\url_data\\{pathing}.txt')
backup_file = pathlib.Path(f'{self.root}\\url_data\\backup\\{pathing}.txt')
pathlib.Path.unlink(file)
pathlib.Path.unlink(backup_file)
backup_file = pathlib.Path(f'{self.root}\\url_data\\backup\\{pathing}_backup.txt')
if file.exists():
pathlib.Path.unlink(file)
if backup_file.exists():
pathlib.Path.unlink(backup_file)
del self.list_of_saved_url[order[(int(url_number) - 1)]]
with pathlib.Path(f'{self.root}\\url_list.txt').open('w') as overwrite:
json.dump(self.list_of_saved_url, overwrite)
Expand Down Expand Up @@ -174,7 +208,17 @@ def modify(self):
""")
answer = input('#: ')
if answer == '1':
NewUrl('..\\storage', stored_url)
print('Add desired url.')
print('Url needs to start with http:// or https://')
new_url = input('@: ')
print('Add unique css selector.')
css = input('css: ')
if css != '':
css_selector = css
else:
css_selector = None
if stored_url.get(new_url, None) is None:
result = NewUrl('..\\storage', stored_url, new_url, css_selector)
elif answer == '2':
ModifyCss('..\\storage', stored_url)
elif answer == '3':
Expand Down
Loading

0 comments on commit c68f876

Please sign in to comment.