root.withdraw()
inp = simpledialog.askstring(window_title, prompt, parent=root)
root.destroy()
-
+
return inp
def ping(subject, body):
-
+
# get email login info from config.ini
cfg = configparser.ConfigParser()
cfg.read('config.ini')
- bot_email = cfg.get('email', 'address')
- password = cfg.get('email', 'password')
-
+ bot_email = cfg.get('email', 'bot_email')
+ password = cfg.get('email', 'bot_password')
+
port = 465
context = ssl.create_default_context()
from_address = bot_email
msg['To'] = to_address
msg['Subject'] = subject
msg.attach(MIMEText(body, "plain"))
-
+
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
server.login(from_address, password)
server.sendmail(from_address, to_address, msg.as_string())
-
+
def email_results(email, num_flagged, flagged_path, flagged_fname, images):
- """
+ """
Send an email with the following information:
1. All the information from the flagged records (so the whole rows)
2. unit_price vs weight/count log-plots in the form of attachments
-
+
Parameters
----------
email (str)
# get email login info from config.ini
cfg = configparser.ConfigParser()
cfg.read('config.ini')
- bot_email = cfg.get('email', 'address')
- password = cfg.get('email', 'password')
+ bot_email = cfg.get('email', 'bot_address')
+ password = cfg.get('email', 'bot_password')
# components of the email message to put into MIMEMultipart object
from_address = bot_email
body = """
Good day! We counted %d record(s) from yesterday flagged as potential outlier(s).
Please take a look at the csv/plot data, attached. As a reminder, the plot data is in a shifted log scale, so -1 on the graph means that the value is actually 0.
-
+
Have a nice day!
Outlier Bot
""" % num_flagged
with open(img_path, 'rb') as f:
# initiate MIME object for img attachment
mime = MIMEBase('image', 'png', filename=img_fname)
-
+
# add metadata
mime.add_header('Content-disposition', 'attachment', filename=img_fname)
mime.add_header('X-attachment-id', x_attach_id)
mime.add_header('Content-ID', content_id)
-
+
# prepare the object in format compatible w msg, then attach to msg
mime.set_payload(f.read())
encoders.encode_base64(mime)
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
server.login(from_address, password)
- server.sendmail(from_address, receivers, msg.as_string())
\ No newline at end of file
+ server.sendmail(from_address, receivers, msg.as_string())
import tkinter.messagebox as messagebox
def send_error_log(logpath):
- """
+ """
Send an email to Angel with Python error log.
"""
cfg = configparser.ConfigParser()
cfg.read('config.ini')
- bot_email = cfg.get('email', 'address')
- password = cfg.get('email', 'password')
- adu_email = cfg.get('email', 'adu')
+ bot_email = cfg.get('email', 'bot_email')
+ password = cfg.get('email', 'bot_password')
+ adu_email = cfg.get('email', 'adu_email')
port = 465 #for conntecting to gmail server
# create secure SSL context ie security configuration
body = """
AAAarrRrGGgghH! *cough* I;m dying *cooough* soomething went wrong...
The error log, please... look for answers,.. in..side........
-
+
Have a nice day!
Outlier Bot (RIP)
- """
+ """
# build the MIMEMultipart object which will later be converted to text
msg = MIMEMultipart()
msg['From'] = from_address
msg['To'] = to_address
msg['Subject'] = subject
-
+
# add the message body
msg.attach(MIMEText(body, "plain"))
-
+
# attach csv file
fname = str(logpath.name)
with open(logpath, 'rb') as f:
msg.attach(MIMEApplication(f.read(), Name=fname))
-
+
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
server.login(from_address, password)
server.sendmail(from_address, to_address, msg.as_string())
-
+
def print_error_message(reason=None):
root = tk.Tk()
root.withdraw()
message = """Could not ping the database. Maybe login info is wrong?
Please enter your login details again."""
messagebox.showwarning(window_title, message)
- root.destroy()
\ No newline at end of file
+ root.destroy()
-import psycopg2
-import postgres
-import data_clean
-import clean_fish
-import algorithm
-import emailing
-import exception_handling
+import time
from pathlib import Path
import traceback
+import psycopg2
import schedule
-import time
-import pandas as pd
import numpy as np
+import pandas as pd
+import algorithm
+import clean_fish
+import data_clean
+import emailing
+import exception_handling
+import postgres
+import configparser
def timestamp(msg):
- now = np.datetime64('now') - np.timedelta64(5, 'h') # EST timezone
+ # define now as 12am EST
+ now = np.datetime64('now') - np.timedelta64(1, 'h')
print(now, msg)
def main(host, db, user, password, email, first_run):
subject = 'opened'
body = "I'm up!!"
emailing.ping(subject, body)
-
+
# get yesterday's date
date = str(np.datetime64('today') - np.timedelta64(1, 'D'))
timestamp("checking for outliers...")
+
try:
# get today's records and clean
pg_data = postgres.query_data(host, db, user, password, date)
else:
return True
data = postgres.clean_postgres_data(pg_data)
- df = data_clean.main(data) # 'lite' version of `data`
-
+ # we'll use a 'lite' version of `data` called `df`
+ df = data_clean.main(data)
+
# load up existing dataset
archive_path = Path('./data/pg_data_clean.csv')
archive_df_path = Path('./data/clean_catch_data.csv')
+
try:
archive = pd.read_csv(str(archive_path))
archive_df = pd.read_csv(str(archive_df_path))
# without changing the data we just extracted
archive.append(data).to_csv(archive_path, index=False)
archive_df.append(df).to_csv(archive_df_path, index=False)
- except FileNotFoundError: # either first time or file was deleted
+ # in case its the first run or file was deleted:
+ except FileNotFoundError:
pg_archive = postgres.query_data(host, db, user, password)
archive = postgres.clean_postgres_data(pg_archive)
archive.to_csv(archive_path, index=False)
archive_df = data_clean.main(archive)
archive_df.to_csv(archive_df_path, index=False)
-
+
# load fish data; eventually set this up like catch data where
# the pg server is queried and the raw data is cleaned
fishpath = Path('./data/fishdata_buyingunit_clean.csv')
emailing.email_results(email, num_flagged, flagged_path, flagged_fname, images)
else:
timestamp("I found nothing fishy in yesterday's data!")
+
subject = 'daily ping'
body = 'still alive!'
emailing.ping(subject, body) # daily check if code is live or not
-
+
+ # in case something goes wrong anywhere in the program, send error log and quit:
except Exception as e:
logpath = Path('./logs/'+date+'.log')
with open(logpath, 'w') as logf:
exception_handling.send_error_log(logpath)
exception_handling.print_error_message()
quit()
+
timestamp("I'm done for today. Zzzz.....")
+
if first_run:
return schedule.CancelJob
-# get postgres info
-host = None
-db = None
-user = None
-password = None
+# for the beginning of the program, initialize things like postgres and email info
+
login_errors = (psycopg2.errors.InFailedSqlTransaction,
psycopg2.OperationalError)
# for some reason, a wrong password will not cause a problem,
# data can be queried just fine... not a problem for now I guess
-while (host is None) or (db is None) or (user is None) or (password is None):
- host, db, user, password = postgres.login()
- try:
- postgres.query_data(host, db, user, password, 'test')
- except login_errors:
- exception_handling.print_error_message('login')
- host = None
- db = None
- user = None
- password = None
+
+cfg = configparser.ConfigParser()
+cfg.read('config.ini')
+host = cfg.get('postgres', 'host_address')
+db = cfg.get('postgres', 'db_name')
+user = cfg.get('postgres', 'user')
+password = cfg.get('postgres', 'password')
+
+try:
+ # test ping the postgres server
+ postgres.query_data(host, db, user, password, 'test')
+except login_errors:
+ # config.ini info is wrong, prompt user until we have correct info
+ exception_handling.print_error_message('login')
+ host = None
+ db = None
+ user = None
+ password = None
+ while (host is None) or (db is None) or (user is None) or (password is None):
+ host, db, user, password = postgres.login()
+ try:
+ postgres.query_data(host, db, user, password, 'test')
+ except login_errors:
+ exception_handling.print_error_message('login')
+ host = None
+ db = None
+ user = None
+ password = None
# get email address
-email = None
+email = cfg.read('email', 'user_email')
while email is None:
window_title = "Email",
prompt = "Please enter the email address where you would like notifications to go to."
email = emailing.ask_email(window_title, prompt)
-
+
+# scan for outliers in all data up til now as part of the first run
schedule.every().second.do(main, host, db, user, password, email, True)
+
+# now just scan for outliers once a day
schedule.every().day.at("00:00").do(main, host, db, user, password, email, False)
+
while True:
schedule.run_pending()
- time.sleep(1)
\ No newline at end of file
+ time.sleep(1)
else:
inp = simpledialog.askstring(window_title, prompt, parent=root)
root.destroy()
-
+
return inp
def login():
"""
Prompt user for pg login info using tkinter gui
-
+
Returns:
user (str)
pg user name to use to connect to pg server
db = prompt_user("Postgres login",
"Please enter the name of the database. (e.g. ourfish)")
-
+
user = prompt_user("Postgres Login",
"Please enter your user name.")
database=db,
user=user,
password=password)
-
+
cur = conn.cursor()
if date is None: # if there is no archived data already
date = '2019-01-01'
WHERE date::date = \'{}\'""".format(date)
copy_sql = "COPY ("+sql+") TO STDOUT WITH CSV HEADER"
csv_path = Path('./data/postgres_dump.csv')
-
-
+
+
with open(str(csv_path), 'w') as f:
cur.copy_expert(copy_sql, f)
-
+
cur.close()
conn.close()
-
- return pd.read_csv(str(csv_path))
+
+ return pd.read_csv(str(csv_path), engine='python')
def unravel(row_data):
"""
for ii in range(len(row_data)):
row_data[ii] = row_data[ii].split(': ')
row_data[ii][0] = row_data[ii][0].replace('"', '')
-
+
val = row_data[ii][1]
if val == '""':
val = np.nan
else:
val = ast.literal_eval(val)
row_data[ii][1] = val
-
+
return row_data
def clean_postgres_data(pg_data):
that it contains.
"""
pg_data['data'] = pg_data['data'].apply(unravel)
-
- cols = [record[0] for record in pg_data.iloc[0]['data']]
+
+ cols = ['name', 'count', 'weight', 'weight_units', \
+ 'price_currency', 'unit_price', 'total_price']
+
# this will contain lists that will turn into new columns of pg_data
data_dict = {}
-
+
for col in cols:
data_dict[col] = []
keys = [record[0] for record in data]
vals = [record[1] for record in data]
row_dict = {k: v for (k,v) in zip(keys, vals)}
-
+
for col in cols:
data_dict[col].append(row_dict[col])
-
+
for col in cols:
pg_data[col] = data_dict[col]
-
+
pg_data = pg_data.drop(columns='data') # no longer need that poorly formatted col
pg_data = pg_data.rename(mapper={'name':'buying_unit'}, axis=1)
-
+
# use currencies to create country col
country_currency_dict = {
'IDR': 'IDN',
# create country column
pg_data['country'] = pg_data['price_currency'].apply(lambda x: country_currency_dict[x])
-
+
# fix weight_units in the same manner that is done in clean_fish.py
pg_data['weight_units'] = pg_data['weight_units'].apply(fix_weight_units)
lbs_conv = np.array(lbs_conv)
pg_data['weight_kg'] = pg_data['weight']*kg_conv
pg_data['weight_lbs'] = pg_data['weight']*lbs_conv
-
+
pg_data = pg_data[~pg_data.duplicated()]
- return pg_data
\ No newline at end of file
+ return pg_data