From cbf4365036bca65d09ed51212eeea8f81f2f1648 Mon Sep 17 00:00:00 2001 From: ibidyouadu Date: Thu, 5 Aug 2021 16:31:21 -0400 Subject: [PATCH] fix pd read_csv bug and add reading pg info from config.ini --- emailing.py | 30 ++++++------- exception_handling.py | 22 +++++----- main.py | 97 ++++++++++++++++++++++++++++--------------- postgres.py | 44 ++++++++++---------- 4 files changed, 112 insertions(+), 81 deletions(-) diff --git a/emailing.py b/emailing.py index 5aea0d9..f068e3f 100644 --- a/emailing.py +++ b/emailing.py @@ -16,17 +16,17 @@ def ask_email(window_title, prompt): root.withdraw() inp = simpledialog.askstring(window_title, prompt, parent=root) root.destroy() - + return inp def ping(subject, body): - + # get email login info from config.ini cfg = configparser.ConfigParser() cfg.read('config.ini') - bot_email = cfg.get('email', 'address') - password = cfg.get('email', 'password') - + bot_email = cfg.get('email', 'bot_email') + password = cfg.get('email', 'bot_password') + port = 465 context = ssl.create_default_context() from_address = bot_email @@ -36,18 +36,18 @@ def ping(subject, body): msg['To'] = to_address msg['Subject'] = subject msg.attach(MIMEText(body, "plain")) - + with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server: server.login(from_address, password) server.sendmail(from_address, to_address, msg.as_string()) - + def email_results(email, num_flagged, flagged_path, flagged_fname, images): - """ + """ Send an email with the following information: 1. All the information from the flagged records (so the whole rows) 2. unit_price vs weight/count log-plots in the form of attachments - + Parameters ---------- email (str) @@ -68,8 +68,8 @@ def email_results(email, num_flagged, flagged_path, flagged_fname, images): # get email login info from config.ini cfg = configparser.ConfigParser() cfg.read('config.ini') - bot_email = cfg.get('email', 'address') - password = cfg.get('email', 'password') + bot_email = cfg.get('email', 'bot_address') + password = cfg.get('email', 'bot_password') # components of the email message to put into MIMEMultipart object from_address = bot_email @@ -79,7 +79,7 @@ def email_results(email, num_flagged, flagged_path, flagged_fname, images): body = """ Good day! We counted %d record(s) from yesterday flagged as potential outlier(s). Please take a look at the csv/plot data, attached. As a reminder, the plot data is in a shifted log scale, so -1 on the graph means that the value is actually 0. - + Have a nice day! Outlier Bot """ % num_flagged @@ -109,12 +109,12 @@ def email_results(email, num_flagged, flagged_path, flagged_fname, images): with open(img_path, 'rb') as f: # initiate MIME object for img attachment mime = MIMEBase('image', 'png', filename=img_fname) - + # add metadata mime.add_header('Content-disposition', 'attachment', filename=img_fname) mime.add_header('X-attachment-id', x_attach_id) mime.add_header('Content-ID', content_id) - + # prepare the object in format compatible w msg, then attach to msg mime.set_payload(f.read()) encoders.encode_base64(mime) @@ -122,4 +122,4 @@ def email_results(email, num_flagged, flagged_path, flagged_fname, images): with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server: server.login(from_address, password) - server.sendmail(from_address, receivers, msg.as_string()) \ No newline at end of file + server.sendmail(from_address, receivers, msg.as_string()) diff --git a/exception_handling.py b/exception_handling.py index 409afbd..4398da1 100644 --- a/exception_handling.py +++ b/exception_handling.py @@ -8,14 +8,14 @@ import tkinter as tk import tkinter.messagebox as messagebox def send_error_log(logpath): - """ + """ Send an email to Angel with Python error log. """ cfg = configparser.ConfigParser() cfg.read('config.ini') - bot_email = cfg.get('email', 'address') - password = cfg.get('email', 'password') - adu_email = cfg.get('email', 'adu') + bot_email = cfg.get('email', 'bot_email') + password = cfg.get('email', 'bot_password') + adu_email = cfg.get('email', 'adu_email') port = 465 #for conntecting to gmail server # create secure SSL context ie security configuration @@ -28,29 +28,29 @@ def send_error_log(logpath): body = """ AAAarrRrGGgghH! *cough* I;m dying *cooough* soomething went wrong... The error log, please... look for answers,.. in..side........ - + Have a nice day! Outlier Bot (RIP) - """ + """ # build the MIMEMultipart object which will later be converted to text msg = MIMEMultipart() msg['From'] = from_address msg['To'] = to_address msg['Subject'] = subject - + # add the message body msg.attach(MIMEText(body, "plain")) - + # attach csv file fname = str(logpath.name) with open(logpath, 'rb') as f: msg.attach(MIMEApplication(f.read(), Name=fname)) - + with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server: server.login(from_address, password) server.sendmail(from_address, to_address, msg.as_string()) - + def print_error_message(reason=None): root = tk.Tk() root.withdraw() @@ -63,4 +63,4 @@ def print_error_message(reason=None): message = """Could not ping the database. Maybe login info is wrong? Please enter your login details again.""" messagebox.showwarning(window_title, message) - root.destroy() \ No newline at end of file + root.destroy() diff --git a/main.py b/main.py index cb555f9..c4d62c9 100644 --- a/main.py +++ b/main.py @@ -1,19 +1,21 @@ -import psycopg2 -import postgres -import data_clean -import clean_fish -import algorithm -import emailing -import exception_handling +import time from pathlib import Path import traceback +import psycopg2 import schedule -import time -import pandas as pd import numpy as np +import pandas as pd +import algorithm +import clean_fish +import data_clean +import emailing +import exception_handling +import postgres +import configparser def timestamp(msg): - now = np.datetime64('now') - np.timedelta64(5, 'h') # EST timezone + # define now as 12am EST + now = np.datetime64('now') - np.timedelta64(1, 'h') print(now, msg) def main(host, db, user, password, email, first_run): @@ -22,10 +24,11 @@ def main(host, db, user, password, email, first_run): subject = 'opened' body = "I'm up!!" emailing.ping(subject, body) - + # get yesterday's date date = str(np.datetime64('today') - np.timedelta64(1, 'D')) timestamp("checking for outliers...") + try: # get today's records and clean pg_data = postgres.query_data(host, db, user, password, date) @@ -37,11 +40,13 @@ def main(host, db, user, password, email, first_run): else: return True data = postgres.clean_postgres_data(pg_data) - df = data_clean.main(data) # 'lite' version of `data` - + # we'll use a 'lite' version of `data` called `df` + df = data_clean.main(data) + # load up existing dataset archive_path = Path('./data/pg_data_clean.csv') archive_df_path = Path('./data/clean_catch_data.csv') + try: archive = pd.read_csv(str(archive_path)) archive_df = pd.read_csv(str(archive_df_path)) @@ -49,13 +54,14 @@ def main(host, db, user, password, email, first_run): # without changing the data we just extracted archive.append(data).to_csv(archive_path, index=False) archive_df.append(df).to_csv(archive_df_path, index=False) - except FileNotFoundError: # either first time or file was deleted + # in case its the first run or file was deleted: + except FileNotFoundError: pg_archive = postgres.query_data(host, db, user, password) archive = postgres.clean_postgres_data(pg_archive) archive.to_csv(archive_path, index=False) archive_df = data_clean.main(archive) archive_df.to_csv(archive_df_path, index=False) - + # load fish data; eventually set this up like catch data where # the pg server is queried and the raw data is cleaned fishpath = Path('./data/fishdata_buyingunit_clean.csv') @@ -85,10 +91,12 @@ def main(host, db, user, password, email, first_run): emailing.email_results(email, num_flagged, flagged_path, flagged_fname, images) else: timestamp("I found nothing fishy in yesterday's data!") + subject = 'daily ping' body = 'still alive!' emailing.ping(subject, body) # daily check if code is live or not - + + # in case something goes wrong anywhere in the program, send error log and quit: except Exception as e: logpath = Path('./logs/'+date+'.log') with open(logpath, 'w') as logf: @@ -96,39 +104,60 @@ def main(host, db, user, password, email, first_run): exception_handling.send_error_log(logpath) exception_handling.print_error_message() quit() + timestamp("I'm done for today. Zzzz.....") + if first_run: return schedule.CancelJob -# get postgres info -host = None -db = None -user = None -password = None +# for the beginning of the program, initialize things like postgres and email info + login_errors = (psycopg2.errors.InFailedSqlTransaction, psycopg2.OperationalError) # for some reason, a wrong password will not cause a problem, # data can be queried just fine... not a problem for now I guess -while (host is None) or (db is None) or (user is None) or (password is None): - host, db, user, password = postgres.login() - try: - postgres.query_data(host, db, user, password, 'test') - except login_errors: - exception_handling.print_error_message('login') - host = None - db = None - user = None - password = None + +cfg = configparser.ConfigParser() +cfg.read('config.ini') +host = cfg.get('postgres', 'host_address') +db = cfg.get('postgres', 'db_name') +user = cfg.get('postgres', 'user') +password = cfg.get('postgres', 'password') + +try: + # test ping the postgres server + postgres.query_data(host, db, user, password, 'test') +except login_errors: + # config.ini info is wrong, prompt user until we have correct info + exception_handling.print_error_message('login') + host = None + db = None + user = None + password = None + while (host is None) or (db is None) or (user is None) or (password is None): + host, db, user, password = postgres.login() + try: + postgres.query_data(host, db, user, password, 'test') + except login_errors: + exception_handling.print_error_message('login') + host = None + db = None + user = None + password = None # get email address -email = None +email = cfg.read('email', 'user_email') while email is None: window_title = "Email", prompt = "Please enter the email address where you would like notifications to go to." email = emailing.ask_email(window_title, prompt) - + +# scan for outliers in all data up til now as part of the first run schedule.every().second.do(main, host, db, user, password, email, True) + +# now just scan for outliers once a day schedule.every().day.at("00:00").do(main, host, db, user, password, email, False) + while True: schedule.run_pending() - time.sleep(1) \ No newline at end of file + time.sleep(1) diff --git a/postgres.py b/postgres.py index 9c82389..f14b9fc 100644 --- a/postgres.py +++ b/postgres.py @@ -18,13 +18,13 @@ def prompt_user(window_title, prompt, for_password=False): else: inp = simpledialog.askstring(window_title, prompt, parent=root) root.destroy() - + return inp def login(): """ Prompt user for pg login info using tkinter gui - + Returns: user (str) pg user name to use to connect to pg server @@ -36,7 +36,7 @@ def login(): db = prompt_user("Postgres login", "Please enter the name of the database. (e.g. ourfish)") - + user = prompt_user("Postgres Login", "Please enter your user name.") @@ -55,7 +55,7 @@ def query_data(host, db, user, password, date=None): database=db, user=user, password=password) - + cur = conn.cursor() if date is None: # if there is no archived data already date = '2019-01-01' @@ -72,15 +72,15 @@ def query_data(host, db, user, password, date=None): WHERE date::date = \'{}\'""".format(date) copy_sql = "COPY ("+sql+") TO STDOUT WITH CSV HEADER" csv_path = Path('./data/postgres_dump.csv') - - + + with open(str(csv_path), 'w') as f: cur.copy_expert(copy_sql, f) - + cur.close() conn.close() - - return pd.read_csv(str(csv_path)) + + return pd.read_csv(str(csv_path), engine='python') def unravel(row_data): """ @@ -93,7 +93,7 @@ def unravel(row_data): for ii in range(len(row_data)): row_data[ii] = row_data[ii].split(': ') row_data[ii][0] = row_data[ii][0].replace('"', '') - + val = row_data[ii][1] if val == '""': val = np.nan @@ -104,7 +104,7 @@ def unravel(row_data): else: val = ast.literal_eval(val) row_data[ii][1] = val - + return row_data def clean_postgres_data(pg_data): @@ -117,11 +117,13 @@ def clean_postgres_data(pg_data): that it contains. """ pg_data['data'] = pg_data['data'].apply(unravel) - - cols = [record[0] for record in pg_data.iloc[0]['data']] + + cols = ['name', 'count', 'weight', 'weight_units', \ + 'price_currency', 'unit_price', 'total_price'] + # this will contain lists that will turn into new columns of pg_data data_dict = {} - + for col in cols: data_dict[col] = [] @@ -130,16 +132,16 @@ def clean_postgres_data(pg_data): keys = [record[0] for record in data] vals = [record[1] for record in data] row_dict = {k: v for (k,v) in zip(keys, vals)} - + for col in cols: data_dict[col].append(row_dict[col]) - + for col in cols: pg_data[col] = data_dict[col] - + pg_data = pg_data.drop(columns='data') # no longer need that poorly formatted col pg_data = pg_data.rename(mapper={'name':'buying_unit'}, axis=1) - + # use currencies to create country col country_currency_dict = { 'IDR': 'IDN', @@ -154,7 +156,7 @@ def clean_postgres_data(pg_data): # create country column pg_data['country'] = pg_data['price_currency'].apply(lambda x: country_currency_dict[x]) - + # fix weight_units in the same manner that is done in clean_fish.py pg_data['weight_units'] = pg_data['weight_units'].apply(fix_weight_units) @@ -176,7 +178,7 @@ def clean_postgres_data(pg_data): lbs_conv = np.array(lbs_conv) pg_data['weight_kg'] = pg_data['weight']*kg_conv pg_data['weight_lbs'] = pg_data['weight']*lbs_conv - + pg_data = pg_data[~pg_data.duplicated()] - return pg_data \ No newline at end of file + return pg_data -- 2.43.0