From e127de87f53e417ed1f29e0e76eb4b0dc60100b3 Mon Sep 17 00:00:00 2001 From: ibidyouadu <60790401+ibidyouadu@users.noreply.github.com> Date: Sun, 10 May 2020 23:22:31 -0400 Subject: [PATCH] Delete BERT_neighbors_classification.ipynb --- BERT_neighbors_classification.ipynb | 719 ---------------------------- 1 file changed, 719 deletions(-) delete mode 100644 BERT_neighbors_classification.ipynb diff --git a/BERT_neighbors_classification.ipynb b/BERT_neighbors_classification.ipynb deleted file mode 100644 index 93d6c6f..0000000 --- a/BERT_neighbors_classification.ipynb +++ /dev/null @@ -1,719 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The first few cells will import some libraries and a script. For the most part this is identical to [xhlulu's Kaggle code.](https://www.kaggle.com/xhlulu/disaster-nlp-keras-bert-using-tfhub) Only difference is the first cell, which you only need to run if using google colab, and the importing of re and nltk in cell 3. " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 196 - }, - "colab_type": "code", - "id": "z2EOfe64FcUh", - "outputId": "fb8d8809-4d89-40b4-833f-7836feff4577" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sentencepiece\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/98/2c/8df20f3ac6c22ac224fff307ebc102818206c53fc454ecd37d8ac2060df5/sentencepiece-0.1.86-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", - "\r", - "\u001b[K |▎ | 10kB 21.6MB/s eta 0:00:01\r", - "\u001b[K |▋ | 20kB 6.7MB/s eta 0:00:01\r", - "\u001b[K |█ | 30kB 7.7MB/s eta 0:00:01\r", - "\u001b[K |█▎ | 40kB 8.5MB/s eta 0:00:01\r", - "\u001b[K |█▋ | 51kB 7.7MB/s eta 0:00:01\r", - "\u001b[K |██ | 61kB 8.5MB/s eta 0:00:01\r", - "\u001b[K |██▏ | 71kB 8.9MB/s eta 0:00:01\r", - "\u001b[K |██▌ | 81kB 9.3MB/s eta 0:00:01\r", - "\u001b[K |██▉ | 92kB 9.6MB/s eta 0:00:01\r", - "\u001b[K |███▏ | 102kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███▌ | 112kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███▉ | 122kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████ | 133kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████▍ | 143kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████▊ | 153kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████ | 163kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████▍ | 174kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████▊ | 184kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████ | 194kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 204kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████▋ | 215kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████ | 225kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████▎ | 235kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████▋ | 245kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████▉ | 256kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████▏ | 266kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████▌ | 276kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████▉ | 286kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████▏ | 296kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████▌ | 307kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████▊ | 317kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 327kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████▍ | 337kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████▊ | 348kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 358kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████▍ | 368kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████▋ | 378kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 389kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████▎ | 399kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 409kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 419kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████▎ | 430kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████▌ | 440kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████▉ | 450kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████▏ | 460kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████▌ | 471kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████▉ | 481kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████▏ | 491kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████▍ | 501kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████▊ | 512kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 522kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████▍ | 532kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████▊ | 542kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 552kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 563kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▋ | 573kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 583kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▎ | 593kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▋ | 604kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 614kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▏ | 624kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▌ | 634kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▉ | 645kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▏ | 655kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▌ | 665kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▉ | 675kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▏ | 686kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▍ | 696kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▊ | 706kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 716kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▍ | 727kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▊ | 737kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 747kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▎ | 757kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▋ | 768kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 778kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▎ | 788kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 798kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 808kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 819kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 829kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 839kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 849kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 860kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 870kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 880kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 890kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 901kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 911kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 921kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 931kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 942kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 952kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 962kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 972kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 983kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 993kB 10.0MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 1.0MB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 1.0MB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 1.0MB 10.0MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 1.0MB 10.0MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 1.0MB 10.0MB/s \n", - "\u001b[?25hInstalling collected packages: sentencepiece\n", - "Successfully installed sentencepiece-0.1.86\n", - "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", - "[nltk_data] Unzipping corpora/stopwords.zip.\n", - "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", - "[nltk_data] Unzipping corpora/wordnet.zip.\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 1, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "# Use the following if you need the libraries. If working in google colab, you will need these lines.\n", - "\n", - "# !pip install sentencepiece\n", - "# import nltk\n", - "# nltk.download('wordnet')" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "gR6-jumvFhr9" - }, - "outputs": [], - "source": [ - "!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "colab_type": "code", - "id": "75PqQCwcFlji", - "outputId": "41863890-3b6e-4b60-9b60-e4d5158a73dd" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DONE\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import tensorflow as tf\n", - "from tensorflow.keras.layers import Dense, Input\n", - "from tensorflow.keras.optimizers import Adam\n", - "from tensorflow.keras.models import Model\n", - "from tensorflow.keras.callbacks import ModelCheckpoint\n", - "import tensorflow_hub as hub\n", - "import re\n", - "from nltk.stem import WordNetLemmatizer\n", - "from nltk.corpus import stopwords\n", - "\n", - "\n", - "import tokenization" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The following four cells are unchanged from the Kaggle code" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "ULdRsYGtFnro" - }, - "outputs": [], - "source": [ - "def bert_encode(texts, tokenizer, max_len=512):\n", - " all_tokens = []\n", - " all_masks = []\n", - " all_segments = []\n", - "\n", - "\n", - " for text in texts:\n", - " text = tokenizer.tokenize(text)\n", - " \n", - " text = text[:max_len-2]\n", - " input_sequence = [\"[CLS]\"] + text + [\"[SEP]\"]\n", - " pad_len = max_len - len(input_sequence)\n", - " \n", - " tokens = tokenizer.convert_tokens_to_ids(input_sequence)\n", - " tokens += [0] * pad_len\n", - " pad_masks = [1] * len(input_sequence) + [0] * pad_len\n", - " segment_ids = [0] * max_len\n", - " \n", - " all_tokens.append(tokens)\n", - " all_masks.append(pad_masks)\n", - " all_segments.append(segment_ids)\n", - " \n", - " return np.array(all_tokens), np.array(all_masks), np.array(all_segments)" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mo7LXhGLFrCz" - }, - "outputs": [], - "source": [ - "def build_model(bert_layer, max_len=512):\n", - " input_word_ids = Input(shape=(max_len,), dtype=tf.int32, name=\"input_word_ids\")\n", - " input_mask = Input(shape=(max_len,), dtype=tf.int32, name=\"input_mask\")\n", - " segment_ids = Input(shape=(max_len,), dtype=tf.int32, name=\"segment_ids\")\n", - "\n", - " _, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])\n", - " clf_output = sequence_output[:, 0, :]\n", - " out = Dense(1, activation='sigmoid')(clf_output)\n", - " \n", - " model = Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)\n", - " model.compile(Adam(lr=2e-6), loss='binary_crossentropy', metrics=['accuracy'])\n", - " \n", - " return model" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 53 - }, - "colab_type": "code", - "id": "ukl7WfAiFtqL", - "outputId": "94997e5e-b150-4d00-ba59-ba515d56a169" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9.67 s, sys: 2.01 s, total: 11.7 s\n", - "Wall time: 13 s\n" - ] - } - ], - "source": [ - "%%time\n", - "module_url = \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1\"\n", - "bert_layer = hub.KerasLayer(module_url, trainable=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "l-N0Z4J_62Nh" - }, - "outputs": [], - "source": [ - "vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n", - "do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n", - "tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This next cell is what separates this notebook from the Kaggle code. The function used to generated neighborhoods is defined. A small lemmatization function is defined and used in the generator function in order to extract a keyword index. The basic BERT tokenizer in conjunction with some regex filtering is used to clean and tokenize the text, again to find the keyword index. Once the keyword index is found, the neighborhood is generated by finding how many tokens to the left and how many tokens to the right to use as the neighborhood. These tokens are joined together with spaces and returned as output." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "x66XjPOMFvMd" - }, - "outputs": [], - "source": [ - "def lemmatize(x):\n", - " lemmatizer = WordNetLemmatizer()\n", - " return lemmatizer.lemmatize(lemmatizer.lemmatize(x,pos='v'))\n", - "\n", - "kwds = 'blackout duke dukeenergy electric electricity fpl outage power'.split()\n", - "basic_tok = tokenization.BasicTokenizer()\n", - "\n", - "def neighborhood_generator(text,kwd):\n", - " parse_seq = r'[-_+<=>\\[\\]{}`&;\\/()#!@,.\\n?]|\\x80|\\:\\/\\/.*$'\n", - " split_tokens = text.lower().split()\n", - " stop_words = set(stopwords.words('english'))\n", - " kwd_idx = 0\n", - "\n", - " def tok_process(tok): #remove special chars, apply basic BERT tokenizer, lemmatize, and remove stop words\n", - " parse_tok = re.sub(parse_seq, ' ', tok)\n", - " tok_tokens = basic_tok.tokenize(parse_tok) #tokens of the token!\n", - " tok_lemmatized_tokens = [lemmatize(tok_tok) for tok_tok in tok_tokens]\n", - " # tok_lemmatized_tokens = [tok_tok for tok_tok in tok_lemmatized_tokens if tok_tok not in stop_words]\n", - " return tok_lemmatized_tokens\n", - "\n", - " if kwd in split_tokens: #see if we can get away without going through the text processing\n", - " kwd_idx = [idx for idx in range(len(split_tokens)) if split_tokens[idx]==kwd][0]\n", - " else:\n", - " for idx in range(len(split_tokens)):\n", - " tok = split_tokens[idx]\n", - " tok_lemmatized_tokens = tok_process(tok)\n", - " if kwd in tok_lemmatized_tokens:\n", - " kwd_idx = idx\n", - " break\n", - "\n", - " neighborhood_radius = 3\n", - " before_kwd = split_tokens[:kwd_idx]\n", - " after_kwd = split_tokens[kwd_idx+1:]\n", - "\n", - " before_idx = [idx for idx in range(kwd_idx)][::-1]\n", - " after_idx = [idx for idx in range(kwd_idx+1,len(split_tokens))]\n", - " start_tok_idx = 0 #if search extends beyond startin index, use the first token\n", - " end_tok_idx = len(split_tokens) #if search extends beyond last index, use the last index\n", - " before_sig_toks = 0\n", - " after_sig_toks = 0\n", - "\n", - " for idx in before_idx:\n", - " tok = split_tokens[idx]\n", - " tok_lemmatized_tokens = tok_process(tok)\n", - " if len(tok_lemmatized_tokens) > 0: # significant token\n", - " before_sig_toks += 1\n", - " if before_sig_toks == neighborhood_radius:\n", - " start_tok_idx = idx\n", - " break\n", - " \n", - " for idx in after_idx:\n", - " tok = split_tokens[idx]\n", - " tok_lemmatized_tokens = tok_process(tok)\n", - " if len(tok_lemmatized_tokens) > 0:\n", - " after_sig_toks += 1\n", - " if after_sig_toks == neighborhood_radius:\n", - " end_tok_idx = idx\n", - " break\n", - " \n", - " neighborhood = split_tokens[start_tok_idx:end_tok_idx+1]\n", - " neighborhood = ' '.join(neighborhood)\n", - " \n", - " return neighborhood" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Iterate through all the tweets to generate the neighborhoods." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "UBn2jMFuBC27" - }, - "outputs": [], - "source": [ - "town = pd.DataFrame(columns=['text']) # a town is a collection of neighborhoods\n", - "neighborhood_vals = []\n", - "\n", - "data = pd.read_csv('irma_power_tweets.csv')\n", - "\n", - "for idx in data.index:\n", - " row = data.iloc[idx]\n", - " text = row.text\n", - " kwd = row.kwd\n", - " new_row = neighborhood_generator(text,kwd)\n", - " neighborhood_vals.append(new_row)\n", - "town.text = neighborhood_vals\n", - "town['original_twt'] = data.text\n", - "town = town[['original_twt', 'text']]\n", - "\n", - "from sklearn.model_selection import train_test_split\n", - "train, test, train_labels, test_labels = train_test_split(town.text, data.topic_related,\n", - " test_size=0.2,\n", - " random_state=42)\n", - "\n", - "train_input = bert_encode(train, tokenizer, max_len=160)\n", - "test_input = bert_encode(test, tokenizer, max_len=160)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "gu5PX-saIhSQ" - }, - "source": [ - "The next three cells are unchanged from the Kaggle code. These blocks build the model and predict the outputs for the data." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 410 - }, - "colab_type": "code", - "id": "OYjF80BvIjId", - "outputId": "327fa3e6-2fd1-4f53-bca1-8bc2456ca391" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"model_1\"\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_word_ids (InputLayer) [(None, 160)] 0 \n", - "__________________________________________________________________________________________________\n", - "input_mask (InputLayer) [(None, 160)] 0 \n", - "__________________________________________________________________________________________________\n", - "segment_ids (InputLayer) [(None, 160)] 0 \n", - "__________________________________________________________________________________________________\n", - "keras_layer (KerasLayer) [(None, 768), (None, 109482241 input_word_ids[0][0] \n", - " input_mask[0][0] \n", - " segment_ids[0][0] \n", - "__________________________________________________________________________________________________\n", - "tf_op_layer_strided_slice_1 (Te [(None, 768)] 0 keras_layer[1][1] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 1) 769 tf_op_layer_strided_slice_1[0][0]\n", - "==================================================================================================\n", - "Total params: 109,483,010\n", - "Trainable params: 109,483,009\n", - "Non-trainable params: 1\n", - "__________________________________________________________________________________________________\n" - ] - } - ], - "source": [ - "model = build_model(bert_layer, max_len=160)\n", - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 124 - }, - "colab_type": "code", - "id": "g3JP8ENtIry4", - "outputId": "6360a6df-3da4-4087-ed43-23fc52a2c20e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/3\n", - "94/94 [==============================] - 947s 10s/step - loss: 0.3609 - accuracy: 0.8203 - val_loss: 0.2471 - val_accuracy: 0.9200\n", - "Epoch 2/3\n", - "94/94 [==============================] - 942s 10s/step - loss: 0.1991 - accuracy: 0.9332 - val_loss: 0.2080 - val_accuracy: 0.9227\n", - "Epoch 3/3\n", - "94/94 [==============================] - 910s 10s/step - loss: 0.1244 - accuracy: 0.9639 - val_loss: 0.2133 - val_accuracy: 0.9253\n" - ] - } - ], - "source": [ - "train_history = model.fit(\n", - " train_input, train_labels,\n", - " validation_split=0.2,\n", - " epochs=3,\n", - " batch_size=16\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "S3KrM3vnUaru" - }, - "outputs": [], - "source": [ - "test_pred = model.predict(test_input)\n", - "train_pred = model.predict(train_input)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After producing outputs, we're interested in seeing how well the model has done. So the following block defines a scoring function to output some metrics for both the training and test sets. Then a function for drawing a color map is defined." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "l9C1RyC0mYLF" - }, - "outputs": [], - "source": [ - "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n", - "from sklearn.metrics import confusion_matrix\n", - "import itertools\n", - "from matplotlib import pyplot as plt\n", - "plt.rcParams['figure.figsize'] = [10,10]\n", - "\n", - "\n", - "def scoring(Ytrain, Ytrain_pred, Ytest, Ypred):\n", - " \n", - "\n", - " acc_train = accuracy_score(Ytrain, Ytrain_pred)\n", - " prec_train = precision_score(Ytrain, Ytrain_pred)\n", - " rec_train = recall_score(Ytrain, Ytrain_pred)\n", - " f1_train = f1_score(Ytrain, Ytrain_pred)\n", - " \n", - " acc = accuracy_score(Ytest, Ypred)\n", - " prec = precision_score(Ytest, Ypred)\n", - " rec = recall_score(Ytest, Ypred)\n", - " f1 = f1_score(Ytest, Ypred)\n", - "\n", - " scores = {'accuracy': acc,\n", - " 'precision': prec,\n", - " 'recall': rec,\n", - " 'f1 score': f1}\n", - "\n", - " scores_train = {'accuracy': acc_train,\n", - " 'precision': prec_train,\n", - " 'recall': rec_train,\n", - " 'f1 score': f1_train}\n", - "\n", - " print('Ytrain vs Ytrain_pred')\n", - " for metric in scores_train.keys():\n", - " print(f'{metric}: {scores_train[metric]:.2f}')\n", - " \n", - " print('\\nYtest vs Ypred')\n", - " for metric in scores.keys():\n", - " print(f'{metric}: {scores[metric]:.2f}')\n", - " \n", - "\n", - "\n", - "def plot_confusion_matrix(cm, labels: list, normalize=False, title='Confusion Matrix', cmap=plt.cm.Oranges):\n", - " if normalize:\n", - " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " plt.figure(figsize=(10,10))\n", - " plt.imshow(cm, interpolation='nearest',cmap=cmap)\n", - " plt.title(title,size=24)\n", - " plt.colorbar(aspect=4)\n", - " tick_marks = np.arange(len(labels))\n", - " plt.xticks(tick_marks, labels, size=14)\n", - " plt.yticks(tick_marks, labels, size=14)\n", - " \n", - " fmt = '.2f' if normalize else 'd' # format of decimal precision to display\n", - " thresh = cm.max()/2 # threshold to change color of text depending on color of cell\n", - " for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): # realistically, cm.shape[0] = cm.shape[1] = len(labels)\n", - " plt.text(j, i, format(cm[i,j], fmt), fontsize=20, # i,j rev bc of diff btwn matrix and list of lists indexing\n", - " horizontalalignment='center',\n", - " color='white' if cm[i,j] > thresh else 'black') # if the cell color is too dark, make the text white\n", - " plt.grid(False)\n", - " plt.tight_layout()\n", - " plt.ylabel('True label', size=16)\n", - " plt.xlabel('Predicted label', size=16)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's see the results." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 759 - }, - "colab_type": "code", - "id": "YoE6nWXFquEK", - "outputId": "8c0d739f-b7fd-40c2-dd16-5e90dae6061f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ytrain vs Ytrain_pred\n", - "accuracy: 0.97\n", - "precision: 0.97\n", - "recall: 0.99\n", - "f1 score: 0.98\n", - "\n", - "Ytest vs Ypred\n", - "accuracy: 0.94\n", - "precision: 0.94\n", - "recall: 0.98\n", - "f1 score: 0.96\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtsAAAIhCAYAAACfeXbXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdd5hcVfnA8e8bCDWQSgk1VAFBWugIhCJNpSiCIhBBsSDSVIryo4kiICBSJCAGUGkK0ntHpItKB+md0GtCkvf3x71LJpsts9md3ZnJ9/M895mZe88999yZJbxz5j3nRGYiSZIkqef16+sGSJIkSc3KYFuSJEmqEYNtSZIkqUYMtiVJkqQaMdiWJEmSasRgW5IkSaoRg21J6mMR8YWIuCEi3o6IyRGRETG6D9qxQXntZ3r72pqan4XUPAy2pQYTEWPL/wm33iZFxJsRcXtE7BsRs7dz/oh2zm9re7uN89sqNzEixkXEreW156gof2gXrtd622A63p/BEfHjiLguIl6IiI8j4r2IeDwi/hwRW0fEzF2tt1Yi4vPAVcCGwADgdeBV4KO+bFejaPX3cmkV5S+uPKeH27J1+fe+QU/WK6mx1c3/cCR12SfAmxWvZwMGA+uU224RsUFmvt5BHW8BEzo4/k4Hx95lSkA4KzAU+Hy5fSci1s/M14D3KYLH1mYp2wswDpjURpmO2jaNiPg2cCwwsFU7ZwaWKrdvAI9HxHaZ+Z+u1F8jP6Lo+LgAGJ2ZfRlkfwg8BrzYh23ojs0iYp72/uYjYiiwZQ2vvzWwS/n85m7W1eifhaSSPdtS47ojM+ev2AYBg4AfA5OB5YCjOqlj21Z1tN4+08G5e1WUGwwMAf4PSGAZ4CSAzDy2rbqBbSvqWq2d699R7ZsREQcDp1ME2neX9c+dmQMzc05gXmBn4AFgaWCVauuusc+Wj+f0caBNZt6dmctk5kZ92Y7p9BzQH/h6B2W+XpZ5tlda1A0N/llIqmCwLTWRzHwnM38D/KHc9aVevPZbmXkEcEa5a5uImKs3rh0RmwGHlS/PANbKzIsz872K9r2emedQBNl70cVe8xpqSfd5v09b0fj+VD7u1EGZnVuVlaSaM9iWmlNLesScfXDta8vHltSN3nA0EMC/gO9n5uT2CmbhRODc1sciYu4y5/bfEfF+uf0nIg6LiIFtVFeZkz62fL1LRNxV5om/GxE3RcQmbZzXkjM8otx1U0Uu8c1lmRHRSW5xRwPpImKWiNgrIu4oB19+EhGvlvd3ckSsVW1dFWVGRcRFEfFKREwoHy+OiA07OKflvkZExCIRcXqZTz8+Ip6OiGMjYu72zq/S7cDTwMiIWLaNNiwDrEbRq31LB22dKSI2j4jTIuK+8v2aEBEvtXefLe8bU1JIDmk9/qB12Zb3uLzWVRHxWhSDY/duq1zF+buX+z+OiOXbuYfTyjLPRcSgDt81STVnsC01pxXKxyf74NpR8Xymml8sYm2m3O9RmTmxmvMyc6oANiKWpPiScgjwOYr7iLLu/wP+ExEdfnmIiDOAscCqFKk8cwEbAFdHxFdaFX+13Fq+GLxVse9NuqkcBHotcAKwFjA3Re/5UIr7+wFFD39X6vwFcCOwDUVazgfl49bADRHxq06qWJHiC9G3y/b0o/iysV95fv+utKeVZEqP9c5tHG/Z9+eybHuWBa4Edqf4FWQ2il9BhjPlPg9sdc4Eis/t4/L1B0z5LFu2aUTEfuW1NqVIb2n3S2KLzBwDXE4xTuJPETFLqzq/WLY9KcYATDPIWVLvMtiWmkjZM7s3RTADcHwfNOMLFc+f7oXrjSofJwFXTE8FZcDyN2BR4HmKexhQbhtT5AMvAlwcEbO2U81WwI7A9ylzxYHFgVsp/q39XVTMglKRu/58uasyf35buu8bwPoUA+12AuYoc+tnLe/zh8C/q60sInYAfla+PAmYt6xvHuB35f4DIuKbHVQzliJnfoXMnJvi/d0NGA+MBL5TbXvacXb5uGNEfPr/t4gIis+mskx7JgBnUgTAA8uc/wHAfMDBFH9nR0bEGi0nZOYd5Wd5frlrmnEKbVxnPuDXwCnA8PK9HAD8tYr73A14jeLLyy8q7nMepqRxHZ+ZN1ZRl6QaM9iWGtfa5U/4LdvbFLOHHE8RRO2cmWM7qeOiVnW03g6vtjFRTLn3c4pAAODqzBw3PTfWRS0pA//LzA+ms47tKXp7PwG2yMzrcoobgC3KY59lStDW2iDg25n5+8z8ECAzn6YYlNfSM7r2dLZveqxZPp6dmX/KzI/LNk3KzOcy8+TM7KwnGvg0WD2ifHleZu7Z8tlm5huZ+SOmpOUcURnotvIixfv7YHnu+Mw8k2JgK8BXu3SHrWTmk8A/gYUpflFoMYriy9LdmflYJ3U8npm7Zea1mfluxf7XMvMXFGMDAvhed9pK0WN+QWbukZmvltf4ODNf6OzEcpafli/U+0XE+uXzMRRB/IPAQd1sn6QeYrAtNa7+FP9jbdkqc4qHAPOWQVJHBreqo/XWUR7tbyuC8rcoUh+OoAhEnqH7wUi1hpaP3Um9aAnyLmkJBCtl5kNM6XH8Wjt1PAf8pY1zX6KYHQWgzRzbGmkJFIf3QF0rAUuWz3/RTpmWAaojgNXbKXNcZo5vY//fy8eeeH9aeq4rU0l2bnWsOy4rH9fpgbqOmd4TM/MyiuC6H3B2ROxDkeYyAfhmO++zpD5gsC01rlsyM1o2igGJi1Pk4g6gmG/6jI4qAEZV1tHGtncH587NlKC8chDWVRRpAnU/vVqFlmkAb+qgTMtP8u1NGXhv6zzwCi1zJQ9u53gtXFU+bhURl0bEtlHMMz09Wu759fKLxzTKHuMXW5Vv7Z529vfk+3M+RVrKVyJijigWWPoKRRB6XjUVRMTsEbFPRNxcDlz8pGKg47/KYgt0s50f0YU0nnbsSzEuYxHguHLfwZnZ3Xol9SCDbalJlOkBT2fmqUxJddg1Itat0SW/VRHoD6OY1/ppYHNg/xpdsy1vlI9DulHHPOVjRwuItPy8P7SdXwzea2Nfi5aBc90ZANglmXkLxcDOiRRTQP4NGBcRj5Szf3Rlpphq3h+Y8h7N087x9t6jlven2wutZeZbFAMIB1AM5Ny2fH5lZr7R0bkAETGcIq/8OIqc93kogveWlT1bUqO6O9PPGx3NmlONMm1qj4pdd1J8yZZURwy2pSaUmdcAr5Qv20t76MnrvZGZF1MMLPwQ+HlEbFHr65YeKR+XiIjuBkCzdbcx9aSc93xp4EDgGorUkmUoZv94OCLamrWjI43y/lSmknQ1heQEivfsKYoe8SGZOSAz5y0HOq7Z4dnVa2vF1Omxa8XzpSh+aZJURwy2peb1XPm4eG9dsByg9pvy5QmVs2/UUEvqx0xM/1LcLct7L9JBmYXKxzc6SBfpaZ9OYxgR7QW6bc7/3aL8teOozNyMovd/FMUMKTMDp0TEvFW0o+X9WbiTci3vUZvLpfeiqyh6oDcGNqLI5+90pppyVpqtypc7ZuZFZU95pboJZiNiR4rBvRMplnYfSjGTiqQ6YrAtNa8Fy8dPevm6x1PMM7wUMLrWF8tiSfeWQY0HVBvgt0oFub98HNVW2VLLYib3d1Cmp1XOkbxQO2VWq7ayMtXoZuCLFH8Xc1JMudeZlnueMyLaHPwYEUsz5W+uN9+jaWTmJxT52f3K7fzMrGbF0GEUUyPClNzs1jbu4PyWtJDOBiZ3W0QsTDEFI8DhFIMjPwI2i4gf1Pr6kqpnsC01oYhYhz4KfMqewJaBmQdERM0XtqHIEU9gZYre2nb/bYvCnhRT8rVomWlk84hYuY1zPsuUGUsu6Jkmdy4z36eY2QWm9Lh+qhzw+O3W+8tjs7S1vzSBKWkM7c0bXukBpiyQ1N6UcoeWj88wZfaVvnQyxa8sv2FKUNqZ95iy4M0KrQ+W+dx7dnB+ywwwNV21sfyiOLa8zp3ALzPzUaaMlTim/PIjqQ4YbEtNpJxFYWumzHn8IX3zs/LxFD9tL8HUQW1NZOaVTJkH+jvAHRGxdUQMaCkTEfNExE7AfcCJQGUwej5Tlrj/e0Rs3NLzHREbUazy1x94iGIFwt7UEtz/PCK+3NJzHxFrAtcz9X1UOjsi/hgRm0bEXC07I2IEcBZF/vVHwG2dNaBMm/l5+XKriPhdy8wmETE0Ik5kyuf88+4O/OsJmfloZv643B6u8pz3KIJXgDMjYiWAiOhX/h3cQse91i0ztWxWBua1sg/FLy0fADtlZssXp5Mo/ibmAM7ppTQuSZ3wP0Spca0dEa9UvJ6J4mfwFh8AO2RmRzNIXBQRnf28vlpmPt9Jmalk5rMRcQHFKoYHRcRfah2AZeYhEfEScDSwBnAxQES8QxEoz1FR/EEqpqHLzAnlcurXU6yueB3wYRlvt5z3HMUqj709f/FRFL3qiwOXAOMjYiJFCshzwI+Ac9o4bzaKfN7RQJbvwyxMuZ9JwHerXXgoM8+PiBUoVpH8IfCDss6BTOm4OSoze/vLSE/bh2IcwArAvyLiA4r7m50i93tXpswL3trFFJ/X0sALEfEaxUwmZOaInmhcRCwP/LJ8uV85ToLyGhkRo4H/Usx1/nOm/OIgqY/Ysy01rtaL2gwD3qfoof0N8NnMvLyTOjpb1GY+iiB+ehxdPi5LMatDzWXmacBiFD+n3wi8TBEkJfAE8CeKafBWaj1fdBm0rEiR/1q5sM2DFL3mn8vMx2t9D62VaTlrUyxg8hLFv9tvUCyRvgpTpttr7QDgp8DVFDNrzELxWf4P+COwSma2FaR31JafUww4vIRiAOKAsi2XAhtn5oFdqa8eZeZdwFoUAfVbFP+dvQacRrG4T7tzWJdfXEYBF1EMEp2H4svboj3RtjI16E8UqT9XlH/vrdvwIlOmA/xZezn2knpP9N6gekmSJGnGYs+2JEmSVCMG25IkSVKNGGxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1YrAtSZIk1YjBtiRJklQjBtuS1AQiYqaIuDcistwOreKc2SNi74i4PSJej4iPI+LZiLgqIvbr4LzhEXF0RPwnIt6PiAkR8VJEXBoRX+7RG5OkBudy7ZLUBCJib+D4il2HZeahHZRfDrgMWLzc9QnwITCwfD0pM2du47w1gSuBwS3lyvPmqih2NjA6/R+MJNmzLUmNLiIWAo4AngVeraL8wsBNFIH2P4D1gNkycxAwoHz9uzbO6w+cTxFoPwVsUp43NzAcOKUsujOwU/fuSpKag8G2JDW+31EEyT8CPq6i/O+BeYGbgQ0z87bMnAyQmR+Ur/dp47x1gUXK56Mz8/rMnFie90pm7gHcUh7fdrrvRpKaiMG2JFUpIp4p86E3iIghEXFcRDwdEeMj4sWIOD0ihvdym74MbA1cnpmXVlF+RWCL8uUPMnNCFy43X8Xzf7VT5r7ycc4u1CtJTctgW5K6biHgfmAfih7iBBYAvg3cERGDOzi3x0TEnMBJwEfAnlWetmP5+EBmPtLFSz5T8XzldsqsWj7e38W6JakpGWxLUtf9DngLWDsz56RI4dgKeBsYARzYS+04AlgY+GVmPlPlOWuVj/+KiEERcWxF7/wrEXFRRKzbzrl3A/8un4+NiI0jYmaAiJg/Ik4C1gdeAo6drjuSpCZjsC1JXTce2Dgz/wmQmRPLFI5flMe/Wlk4IkZXTMnX1W1sWw2IiJUpcrQfB47uQtuXqnh+L7AfsCDFjCLzAdsAt7Y19V+Z170t8BDF4MrrgI8j4l3gZWBX4Bxg9cx8vQttkqSmZbAtSV03JjPfaGP/38vHxcoUjxYfUcwSMj3bO60vEhH9gNOAmYAfdjHvelD5uAvFYMc9gLkzczCwGHAFEMAxEbF+65Mz8ylgY+DactdMTJn2rz9FL3+vpNFIUiOYZg5VSVKn7mln/4sVzwcBHwBk5vkUU+b1lD2A1YALMvO6Lp7br+Lxl5nZMl0fmflMRHyVord8YWB/pswuAkBEfAn4CzAB+B5wNfAmsCxwMEXP+EYRsXFmtvc+SdIMw55tSeq699ramZmV0+71r8WFI2IBinSV9ygGaHbV+xXPf9v6YHkPp5YvN4iImSquvRjwV4qZRrbJzNMy89nMfC8z787MLwE3AHPTxjzdkjQjMtiWpMbyK4pg9mjg3YgYULlRpIAAzFKxr9JL5eObmTmunWs8Vj7ODgyt2P99YBbgvsy8tZ1zTygf14iI+au8J0lqWgbbklRjEbF9OdPH9Gyte58XLR+PoOjdbr21LDpzYMW+Sg92sfmVS64vWz4+3UH5pyqej+jitSSp6ZizLUm1NztTLwjTFQN7siHA9cD2wJCIGNZO7/Yy5eN7QOVA0Mnl4yK0b9GK522m20jSjMSebUmqscwcm5kxndvoVnVt0FF54Nmy6GEV+ypdzJS87b1btzUiZqMY+AhwTcsy7qWWObZXLacebMt3ysd3gEc7fXMkqckZbEvSDKScsvBX5cufRsQPygCbiFgUuJBiJpIJTJk3vMWZFHOMzwxcEhFbVZy7cEScQTEbCcApmTmptncjSfXPNBJJmvH8iiL/+pvAycAJEfE+U+bHngDskpn/rjypnBpwF+AsioD878DkiPiIYoaSFpcDh9b0DiSpQdizLUkzmCzsBOwA3EiRWz0n8BzwR2DlzDyvnXPPBz5HEaQ/DHwMzEqxAM9VwDeAL3dxoR1JalqRmZ2XkiRJktRl9mxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk14jzbqplhgwbkiOFD+7oZUvObradXdJfUlvse+O+4zJynr9uhxmKwrZoZMXwod591YF83Q2p6sfTmfd0EaYbQb/CIZ/u6DWo8ppFIkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1YrAtSZIk1YjBtiRJklQjBtuSJElSjRhsS5IkSTVisC1JkiTViMG2JEmSVCMG25IkSVKNGGxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1YrAtSZIk1YjBtiRJklQjBtuSJElSjRhsS5IkSTVisC1JkiTViMG2JEmSVCMG25IkSVKNGGxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1YrAtSZIk1YjBtiRJklQjBtuSJElSjRhsS5IkSTVisC1JkiTViMG2JEmSVCMG25IkSVKNGGxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1YrAtSZIk1YjBtiRJklQjBtuSJElSjRhsS5IkSTVisC1JkiTViMG2JEmSVCMG25IkSVKNGGxLkiRJNWKwLUmSJNWIwbYkSZJUIwbbkiRJUo0YbEuSJEk1MnNfN0CSJEnTWnLOfvnhpOzrZjS8l8dzTWZu1lfXN9iWJEmqQx9OSr47wlCtuw59bOKwvry+aSSSJElSjfh1SZIkqQ5FFJsam8G2JElSnTIFofEZbEuSJNUpe7Ybn1+YJEmSpBqxZ1uSJKlO2bHd+Ay2JUmS6lBgGkkzMNiWJEmqU+b7Nj6DbUmSpDplz3bj8wuTJEmSVCP2bEuSJNUpO7Ybn8G2JElSHXKAZHMw2JYkSapTxtqNz5xtSZIkqUbs2ZYkSapHAf3s2m54BtuSJEl1yli78RlsS5Ik1SEHSDYHg21JkqQ6Zazd+BwgKUmSJNWIPduSJEl1ql9kXzdB3WSwLUmSVKdMI2l8BtuSJEl1KDDYbgbmbEuSJEk1Ys+2JElSnXLqv8ZnsC1JklSnjLUbn8G2JElSnXK59sZnsC1JklSHHCDZHBwgKUmSJNWIPduSJEn1KBwg2QwMtiVJkuqUsXbjM9iWJEmqUw6QbHzmbEuSJEk1Ys+2JElSHXI2kuZgsC1JklSnHCDZ+Ay2JUmS6pSxduMz2JYkSapT9mzXVkTMBtwKzEoRF/81Mw+JiMWA84ChwH3ATpk5ISJmBc4GVgXeALbPzGc6uoYDJCVJkjSjGg9smJkrAisBm0XEmsCvgeMzc0ngLWC3svxuwFvl/uPLch0y2JYkSapDQRGouXVv60gW3i9f9i+3BDYE/lruPwvYuny+Vfma8vhGER3//mAaiTQDyEzOuOQfnHnpP3joqZfJTJYdMT+7brUOu2+9Lv36Tf3P0bvvf8TJf72ZC6+/n2defoPMZJH5h7DVeiuy5/ajmGfwXH10J1L9euPNt7j48mu48tob+e/Dj/Hiy68wS/9ZWGG5zzB6x+341o7bTfXf2hP/e5qLLruaa2+8lSf+9wyvvj6OwYMGsubIldjr+7sy6vNr9+HdqF6YRlJ7ETETRarIksDJwP+AtzNzYlnkBWDB8vmCwPMAmTkxIt6hSDUZ1179BtvSDGCnQ/7Iudfcw7yD52KHL4xkjlln4fq7H2GPX5/LP//zFGcdOvrTsu+8/xFrfusoHn/uNUYuuyijv7gWALf96wmO/ONVnHXFndw99gDmGzp3H92NVJ8u/PsV/GC/nzN8/nkZte5aLLzQArz6+jguvuxqvvOj/bn6+pu5YOwptHSC/d+Rv+H8iy9nuWWWYvNNNmDI4EE8/uRTXHrV9Vx61fWccNQh/Oi73+rju1JfM9buEcMi4t6K12Myc0zLi8ycBKwUEYOAi4FlevLiBttSk7v45gc495p7WGyBodz5xwMYNmgAABM+mchXDxjDn666i63WX5FtR60MwOkX38bjz73G6C+uxR8O3nmqur51+FmcfcWdjLn4Ng7+9pa9fi9SPVt6ycW55C9nsOWmG07Vg/3Lg3/CGhttzd8uvYqLLruar3x5cwA23Xh9frr391j5c8tPVc8t/7iTL2yzEz/9v1+x3VZbMnz+eXv1PqQmNC4zR3ZWKDPfjoibgLWAQRExc9m7vRDwYlnsRWBh4IWImBkYSDFQsl3mbEtN7u83PwDAvt/Y+NNAG2CW/jNz+He/BMApF9786f6nXip+Cfvi5z83TV1fKve9/vb70xyTZnQbrrc2X9p842nSsuafb16++60dAbj59js/3T/6G9tNE2gDrL/Ommyw7ppMmDCBO+6+r7aNVl2LKJZrd+ve1vF7HPOUPdpExOzAJsAjwE3AV8tiuwCXlM8vLV9THr8xM7OjaxhsS03ulTfeBWCxBYdNc2zxBYp9tz3wJBM+KVLTPrv4AgBc+Y//TlP+ituLfRut1qO/sElNr3//4ofkmWeeqbryM3etvJpXuHV768Rw4KaI+A9wD3BdZl4O7A/sGxFPUuRk/6Es/wdgaLl/X+CAzi5gGonU5IYNmhOAZ16a9leull7siZMm89SL41hmxPzs9uV1OO/aezjz0jt48H8vsfbnlgDg9gee5OGnX+YX3/8yW62/Yu/dgNTgJk6cyDnnXQTAZhut32n5Z597gRtu/QdzzDE76629Rq2bpzrXWc+suicz/wOs3Mb+p4DV29j/MbBdV65hsC01uS3WWYHzrr2X48+9ge03GcmQgUXw/cnESRw65vJPy7313ocAzDZrf64/eW/2Pu5Cxlx8G3c/9MynZb6y4Spstd5Kvdp+qdEdcNivefCRx9hik1Fs2kmwPX78eL65+96MHz+BXx92IIMHDeylVqoeBaYgNAODbanJ7bDJSP501V1ce+fDLL/D4Xx5vc8x2yz9ueGeR3n5jXdYZP4hPPfKm/QrZ0h445332e6AMTz6zKv85Re7sfHqRcrI9Xc/yj7HXchau/2a607am9U/O6IP70pqDCee9keOO+l0lll6Cc4+7fgOy06aNImdv7cv/7jrXrbf5ov8eM/de6mVkmrJL0xSk5tppn5c+psf8Ks9tmaewQM4+8o7OfvKO1ly4Xm5/fSfMNccswIw75Bi7uwf//Zv3HL/E/z+wG+w/SYjGTpwAEMHDmD7TUZy6gHf4P0Px3PASRf15S1JDeGkMWex9wGHsdwyS3HjpecyZPCgdstOmjSJnXbfmwv/fgVf22ZLzhlzAp2sk6EZRIRbd7e+Zs92N0TEzcCDmfnDXr7uCOBpYLXMvLfj0jW5/o+BH2bmiN6+tqZP/5ln4qc7b8pPd950qv0fj/+EJ55/nWGDBrBYOViyZRDkqFU/M009o1ZdGoD7Hn2uxi2WGtsJp/6BfQ86guWX/QzXX/Jn5p1n2gHKLT755BO+WQba3/jqVpz1++OYaSYHRqpgr2jj69XPMCLGRkRGxMGt9m9Q7m//X6Np67o5Ik7qqXLTaVvgwBrV3aPK9/7yzktqRnLedfcy4ZOJ7PCFKdOPji9nJXn97femKd8y5d8sM/s9XWrPr084lX0POoKVVliOGy87t8NAe8KECXxt9A+48O9XsPMO23L2accbaGsqfd0r3AxbX+uLL0wfAz+JiHn64No9KjPfzMxpI5LpFBH9yiVDpR717vsfTbPvgcefZ//fXcTguedg/4oe73VXXBKAw8+4gsmTJ3+6f9KkyRx6evF9bcPVpu31lgRHHHMiBx72a1ZdaQWuv+QvDBs6pN2y48ePZ9tvfpdLrryOXXfanjNPPnaaObolNb6+6J66iWIlnoOBH7VXKCLWA44BVgTeAf4C7J+ZEyJiLLA+sH5E7FGeslhmPtOqjnbLdVR/ee7NwKPAeKBlGb0zyjKTK8p8mkYSEbMAhwI7AvNTrDJ0Qmae2M49jgZOAr4GHE2xPOhKEfE4cERZzxDgIeDnmXlNO/XMBIwBNiyv+wJwOnBsZk6OiEMpJ2CPiJaJ10dl5s0RsSDwG6Al2roD2Dszn6io/6cUc0kOAC4CnmqrHapfm/7oRGaftT+fXXwB5ppjNh555hWu/Md/mX3WWbjkN99ngXmm5JIe9cNt+Od/n+KcK+/i/kefY9TIIrC+8Z7HePjplxk2aABHfn+rvroVqW6dde5fOeSXRQrIumutxomn/XGaMiMWWYjR3yhmDfv+vj/jyutuYtjQISw4fD4OP/q305TfYN012WDdtWredtWnAPpFh+ulqAH0RbA9mWIC8L9HxG8z83+tC5QB4FXAOcBoYAmKQHcysB+wF7A0RTB8UHna621cq81yVdTfYkdgLMWynZ+jCGBfBo5r597OAj5fXvdfwKIUS3p2ZDaKLx7fLe/hZeCPZQpPyrwAACAASURBVJu+QRE4bwFcFhGrZea/26ijH0Vg/7WyjtUpgu83KCZfPxZYliJw36k8582ImIPiy88dFF9KJgA/Bq6PiGUz88OI+BrwC2DPsux2FBO9v9nJfamOfGXUypx/3b38+eq7+Wj8Jyw4z0C+s/W6HLDLZiw03+Cpyq6w5ILcd85BHH32tVx/9yOMufh2ImDheQezx3YbsP/Om7LgvO0P9JJmVE8/+zxQDHb87alntllm/XXW+DTYbik/7o03OeLoNvtkAAy2Z3D+1tH4opMVJnv2YkVP87DM/GK59vyrmblDRGxAEcjNk5njIuJIisDxMxW9yKOB04DBZRB4M1UMTmyrXBfqX6Ask2WZnwPfy8yFWtcdEUsBjwObZ+bVVb4foykC65GZeV+5bwngCWBEZj5XUfbvwEuZ+YNqBkhGxFFlvRuXr8dSvvcVZXalyDlfuuIeZwJeA76fmRdExB3AQ5n5nYrzrgeWbGuAZETsDuwOsMj8Q1Z9+pIjq3krJHVDLL15XzdBmiH0Gzzivswc2XnJnrHUXJEnrmS43V1b3D65Vz+31vryE9wf2C4iVm3j2LLAnS2BcOl2YBZgyR64drX139lqvft/AgtGxNxt1LkyRc/4TW1dMCIeioj3y+2qikMTgQcqXq9C8cvRwxXl3we2pOjtblNEfC8i7o2I18vy+wCLtFe+tCqwGPBexXXeAQZXXGvZ8r4rtX79qcwck5kjM3PkPIMGdHJ5SZLUnqAI1Ny6t/W1PptSIDPvjoi/UeQqH9GVU2vUpFrXvwXQv3xeOVptfGZOqnjdr2zDasAnreqYdpQbEBHbAydQpIDcAbwL7AFs00mb+lEE+ju0ccw0EUmSpG7q6/m7DgIeBjZrtf8R4GsR0a+i93ldipzilhzvCUA1M3e0Va6a+gHWiIio6N1ekyKV4902rvMARfA6CpgmjSQzn62irVDkegcwf2a22UvehnWBuzLz0ykOy3SUSm29D/cDXwfGZebb7dT9CMV9VyYgrllluyRJUjfUw9R16p4+7V3PzCcpBvLt1erQKRT50qdExLIRsSVwFHBSZn5YlnkGWD0iRkTEsIho717aKldN/ZRlToiIz0TEV4GfAG2ut5uZjwMXAGdExFciYrGI+HxE7NRW+Q7ek8eBPwNjI+KrEbF4RIyMiB9HxLbtnPY4sEpEbB4RS5XzmK/fxvuwfHkvwyKif3mdV4FLImL9ss3rRcRvyhx0gN8Cu0TEd8q6DwTW6Mo9SZKk6dPXKRjNsPW1emjD4RR5y5/KzBeBzSnyoB+g6FU9lykzikAxw8YEip7x12k/P3maclXWD0UwOhNwF8VMJH+gnWC7tDPFFIInUsyAMhYY2EH59nyLYuDk0WU9lwPrAe31jp9GEej/BbgHGEExnV+l0yl6qe+leB/WKb9YrEcxld+F5bXOosjZfgsgM8+nmM7wSIpe9xVofzYWSZLUQ4qp/9y6u/W1Xp2NpJH01VLszWTksovm3Wc1xAKbUkNzNhKpd/T2bCSfmSvy96vWQ79oY9vwlhl3NhJJkiSpqfX1AElJkiS1w17Rxmew3Y7M3KCv2yBJkmZcgbORNAODbUmSpDplz3bj8zOUJEmSasSebUmSpHpUJ1PXqXsMtiVJkupQlJsam8G2JElSnbJnu/EZbEuSJNUpY+3G5wBJSZIkqUbs2ZYkSapDgWkkzcBgW5IkqU71i+zrJqibDLYlSZLqlB3bjc+cbUmSJKlG7NmWJEmqQ+ZsNweDbUmSpDplrN34DLYlSZLqkcu1NwWDbUmSpDoUOLiuGfgZSpIkSTViz7YkSVKdCtNIGp7BtiRJUp0yZ7vxGWxLkiTVKWPtxmfOtiRJklQj9mxLkiTVoQDCpO0ekH16dYNtSZKkOmWs3fhMI5EkSapHRde2W3e3jt7iiIUj4qaIeDgiHoqIvcr9h0bEixHxQLltUXHOgRHxZEQ8FhGbdvYx2rMtSZJUp+zZrrmJwH6ZeX9EzAXcFxHXlceOz8xjKwtHxHLADsBngQWA6yNi6cyc1N4F7NmWJEnSDCkzX87M+8vn7wGPAAt2cMpWwHmZOT4znwaeBFbv6Br2bEuSJNUpB0j2nogYAawM3AWsA/wwInYG7qXo/X6LIhC/s+K0F+g4ODfYliRJqk9hsN0zhkXEvRWvx2TmmMoCETEA+Buwd2a+GxGnAkdQTGVyBPAbYNfpubjBtiRJUj0KTPjtGeMyc2R7ByOiP0Wg/efMvAggM1+tOH46cHn58kVg4YrTFyr3tcuPUJIkSTOkKH46+APwSGYeV7F/eEWxbYAHy+eXAjtExKwRsRiwFHB3R9ewZ1uSJKkOuahNr1gH2An4b0Q8UO47CPh6RKxEkUbyDPBdgMx8KCIuAB6mmMlkj45mIgGDbUmSpLplrF1bmXk7xfea1q7s4JwjgSOrvYbBtiRJUp2yZ7vxGWxLkiTVo6DtPlc1FAdISpIkSTViz7YkSVKdMo2k8RlsS5Ik1Slj7cZnsC1JklSHnPqvOZizLUmSJNWIPduSJEl1KcwjaQIG25IkSfXIWLspGGxLkiTVKXO2G5/BtiRJUp0y1m58DpCUJEmSasSebUmSpHpl13bDM9iWJEmqU8bajc9gW5IkqQ5FOECyGZizLUmSJNWIPduSJEl1yp7txmewLUmSVKeMtRufwbYkSVJdcgnJZmCwLUmSVKeMtRufAyQlSZKkGrFnW5IkqR459V9TMNiWJEmqQ4FpJM3AYFuSJKleGW03PINtSZKkOmUaSeNzgKQkSZJUI/ZsS5Ik1Sk7thufwbYkSVI9cjaSpmCwLUmSVK+MtRueOduSJElSjdizLUmSVIeCIPrZL9roDLYlSZLqlTnbDa/dYDsiJgNZZT2ZmQbukiRJPcUlJJtCRwHy4VQfbEuSJKlHBRGmkTS6doPtzDy0F9shSZIkNZ0up35ExABgKPBSZn7S802SJEkSYBpJE6j6t4mI+GJE3A+8AzwFrFDuPyMivlGj9kmSJM24Ity6u/WxqoLtiNgauAQYB+zP1FOsPw3s0vNNkyRJmrFFhFs3t75Wbc/2IcAfM/MLwAmtjj0ILN+jrZIkSZKaQLU528sCPy2ft56h5C2KHG5JkiT1lAhwNpKGV22w/S4wrJ1jI4DXe6Q1kiRJ+lT06/s0iGYWEQsDZwPzUXQoj8nM30bEEOB8ijj3GeBrmflWFHkpvwW2AD4ERmfm/R1do9qvS9cBB0bEoIp9GRGzAj8Erqr6riRJklSdvh5c2AxbxyYC+2XmcsCawB4RsRxwAHBDZi4F3FC+BtgcWKrcdgdO7ewC1fZs/wy4G3gMuJIi8j8A+BwwENi6ynokSZJULdNIaiozXwZeLp+/FxGPAAsCWwEblMXOAm6mmCRkK+DszEzgzogYFBHDy3raVNUnmJnPAKsAlwObAJOA9YA7gTUy86Wu3pwkSZLUC4ZFxL0V2+5tFYqIEcDKwF3AfBUB9CsUaSZQBOLPV5z2QrmvXVUvapOZLwC7VVtekiRJ3VAnU9c1gXGZObKjAuWijX8D9s7Mdyvf98zMiGg9QUjVpmcFyQUoIvgX7dGWJEmqIYPtmouI/hSB9p8z86Jy96st6SERMRx4rdz/IrBwxekLlfva1ZUVJHeOiKcpus7vBJ6PiKcj4pvV1iFJkqQu6OvBhc2wdfj2RgB/AB7JzOMqDl3KlEUbd6FY3LFl/85RWBN4p6N8baiyZzsifgicCFwPHAG8SpG78nXgrIgYmJknV1OXJEmSVCfWAXYC/hsRD5T7DgKOAi6IiN2AZ4GvlceupJj270mKqf++1dkFqk0j2Q8Ym5m7ttp/ZkSMBX4MGGxLkiT1kADC2UhqKjNvp3ir27JRG+UT2KMr16g22J4fOK+dY39hSrQvSZKkHlHVPNGqc9UG2/8Flmjn2FLAgz3THEmSJAFFrO0Kkg2v2mB7L+C8iBgHXJSZkyJiJuArwE+AHWrVQEmSpBmWaSQNr91gOyKep1gpssVAilSSSRHxFjAYmAl4n2Lt+EVr2E5JkiSp4XTUs30DUwfbkiRJ6k3mbDe8doPtzBzdi+2QJEnSVFxBshl0eQVJSZIk9YJi7r++boW6qUvBdkSsCHwGmK31scw8u6caJUmSJDWDaleQHARcAazZsqt8rMzpNtiWJEnqSc5G0vCq7dn+JTAUWA+4DdgGeAfYFVgLp/6TJEnqceZsN75qg+1NgcOAO8vXL2TmfcDNEXEqxTzcO9egfZIkSTOoABe1aXjVBtvDgafKxWw+BuaqOHYR7S/lLkmSpOkREKaRNLxqP8FXgEHl82cpUkdaLNmjLZIkSZKaRLU927dTDI68HDgHOCQiRgATgV2AS2vROEmSpBmaOdsNr9pg+zBggfL5MRSDJbcH5qAItPfs+aZJkiTN4Ay2G15VwXZm/g/4X/n8E2C/cpMkSVINhCtINgWz7iVJkqQaabdnOyL+rwv1ZGYe0QPtkSRJUgtnI2l4HaWRHNqFehIw2JYkSeopgTnbTaDdYDsz/SolSZLUh8zZbnzVzkYidd3sg+m3/Ff7uhVS0zt0leF93QRJNRHQz77PRucnKEmSJNWIPduSJEn1yjSShmewLUmSVI8CZyNpAgbbkiRJdSns2W4Cfl2SJEmSaqRLPdsR8TlgPWAocFpmvhIRSwKvZuZ7tWigJEnSDMs0koZXVbAdEbMCfwK2pcggSuAy4BXgaOBx4IAatVGSJGnGZBpJw6u2Z/tIYGNgJ+A64NWKY1cBP8BgW5IkqQeFPdtNoNpg++vAzzPzLxExU6tjTwMjerRVkiRJMzqXa28K1X5dGgo80kEds/ZMcyRJkqTmUW3P9tPAWsCNbRxbHXisx1okSZKkgmkkDa/aYPts4KCIeAb4W7kvI2IUsA9waM83TZIkaQZnGknDqzbYPhpYETgHOKPcdzswG3BeZv6uBm2TJEmagbmoTTOoKtjOzEnADhFxMrApMC/wBnB1Zt5Sw/ZJkiTNuEwjaXhdWtQmM28DbqtRWyRJkqSm0qVgW5IkSb3Eqf+aQrUrSE6mWDWyXZnZev5tSZIkTTcXtWkG1fZsH860wfZQ4AsUc2yP7cE2SZIkCezZbgLVDpA8tK395WqSlwHv9GCbJEmSpF4REWcCXwRey8zly32HAt8BXi+LHZSZV5bHDgR2AyYBP8rMazqqv1s525k5KSJOAU4CTuhOXZIkSWrFNJLeMJYilj271f7jM/PYyh0RsRywA/BZYAHg+ohYupy5r009MUByVmBID9QjSZKkTznPdm/IzFsjYkSVxbeiWGNmPPB0RDxJsZr6P9s7odoBkou0sXsWYHngKODeKhsoSZKkagT2bPetH0bEzhRx7n6Z+RawIHBnRZkXyn3tqrZn+xnano0kgP8Be1RZjyRJkqplz3ZPGBYRlR3DYzJzTCfnnAocQRH/HgH8Bth1ei5ebbD9rTb2fQw8C9zTUZ6KJEmS1IfGZebIrpyQma+2PI+I04HLy5cvAgtXFF2o3NeuToPtcsaRB4CXMvP1zspLkiSpJzjPdl+JiOGZ+XL5chvgwfL5pcBfIuI4igGSSwF3d1RXNT3bSZGrsiVw7XS1WJIkSV1nGknNRcS5wAYU6SYvAIcAG0TEShRx8DPAdwEy86GIuAB4GJgI7NFZhkenwXZmTo6I54E5u3EfkiRJ6goHSPaKzPx6G7v/0EH5I4Ejq62/2k/wNGDviJil2oolSZKkGV21AyTnApYAnoqIq4GXmXp2kszMQ3q6cZIkSTM000gaXrvBdkQ8BWyTmf8GDqo41Na0J0mR3yJJkqQe4QDJZtBRz/YIitUhyUw/aUmSpN5mz3bD64nl2iVJklQL9mw3vM4+wbZWjZQkSZJUhc56tg+LiHFV1JOZuUtPNEiSJEkUKSSmkTS8zoLtlYDxVdRjD7gkSVJPM42k4XUWbG+dmR0uQSlJkqQa6WfPdqPz65IkSZJUI85GIkmSVK/M2W54BtuSJEn1KFzUphm0G2y7kI0kSVIfs2e74dmzLUmSVK/s2W54foKSJElSjdizLUmSVJfM2W4GBtuSJEn1ymC74RlsS5Ik1aPAAZJNwK9LkiRJUo3Ysy1JklSXzNluBgbbkiRJ9cpgu+EZbEuSJNUrc7YbnsG2JElSXTKNpBn4CUqSJEk1Ys+2JElSPQrs2W4CBtuSJEl1yTSSZmCwLUmSVK8cINnw/LokSZIk1Yg925IkSfXKNJKGZ7AtSZJUl8zZbgYG25IkSfUogH4G243OYFuSJKleOUCy4fl1SZIkSaoRe7YlSZLqkjnbzcBgW5IkqV4ZbDc8g21JkqR6FJiz3QT8uiRJkiTViD3bkiRJdcmc7WZgsC1JklSvDLYbnsG2JElSvTLYbnh+gpIkSXUpigGSbt3bOnuXI86MiNci4sGKfUMi4rqIeKJ8HFzuj4g4MSKejIj/RMQqndVvsC1JkqQZ2Vhgs1b7DgBuyMylgBvK1wCbA0uV2+7AqZ1VbhqJJElSPQpMI+kFmXlrRIxotXsrYIPy+VnAzcD+5f6zMzOBOyNiUEQMz8yX26vfYFuSJKleGWz3lfkqAuhXgPnK5wsCz1eUe6HcZ7AtSZLUWKrLOVanhkXEvRWvx2TmmGpPzsyMiJzeixtsS5IkqZmNy8yRXTzn1Zb0kIgYDrxW7n8RWLii3ELlvnYZbEuSJNUr00j6yqXALsBR5eMlFft/GBHnAWsA73SUrw0G25IkSfXLYLvmIuJcisGQwyLiBeAQiiD7gojYDXgW+FpZ/EpgC+BJ4EPgW53Vb7AtSZJUj8Ll2ntDZn69nUMbtVE2gT26Ur/BtiRJUr3q5wDJRufXJUmSJKlG7NmWJEmqV6aRNDyDbUmSpLpkznYzMNiWJEmqRy7X3hQMtiVJkuqSK0g2A78uSZIkSTViz7YkSVLdsme70RlsS5Ik1StzthuewbYkSVK9Mme74fl1SZIkSaoRe7YlSZLqUmC/aOMz2JYkSapXppE0PINtSZKkehQYbDcBg21JkqS6ZBpJM/ATlCRJkmrEnm1JkqR6ZRpJwzPYliRJqlcG2w3PYFsSADfcdCsnnfYH/nnXPbz19jsMHTKYFT67LHv9YHe22GyTvm6eVFc23u+XLLD8qgwdsRRzDB7GxI8/4u2XnuPRGy7h7j+fwkdvv9nh+V8+4jRW2W5XAE78wjK8+dz/pjq+5LpfYKkNtmDRVddl4AKL0H+22Xn7pWd58tZruG3Mr/ngjddqdm+qN2b8NjqDbUn89GeHccwJJ7HQggvw5S03Y9jQIbw+7g3u+9e/ufm2Owy2pVbW2mUvXn74Xzx1xw188MZr9J9jThZacQ1G7XkIq37t25yx/bq8+8oLbZ679KgtWWW7XRn/wXvMOudc0xyfeZZZ+eYZVzBxwnievec2nvrnDUS/mVhszVGsucuPWH6Lr3HmN0fx5rNP1vo2JfUAg21pBnf6H8/hmBNOYpcdt2fMSccxyyyzTHX8k08+6aOWSfXrVyOHMHHC+Gn2b7j34az3vQP5/O77c8Xhe05zfI7Bw/jy4b/nwSvOZ8A88zNi9fWnKTN58iRuOP5g7jn393z87tuf7o8ItjzkJEbusDubHnAM535/m569KdWhMI2kCfjbhDQDGz9+PD879JcssvBCbQbaAP379++Dlkn1ra1AG+Chq/4KwJBFl2zz+JcOPxWAK474Ubt1T544kdtOO2qqQBsgM7nllF8AtBmkq0lFP7fubn3Mnm1pBnbdjbfw+rhx7L3Hd+nXrx9XXH0tDz70KLPNNiurj1yFtdZYra+bKDWUz4zaEoBXH//vNMdW2mZnlt1ka879wbad5nS3Z1L5S9PkiROnv5FqMPZsNzqDbWkGds99/wJgttlmZeW1NuTBhx+Z6vh6667FX/90JvPMM6wvmifVvbV33YdZ5hjArAMGssDyq7LoyHV55dH/cPuYo6cqN3CBRdjsoOP49yV/5rEbL5vu6638ldEAPHn7Nd1pthqFK0g2BYNtaQb22uvjADjmhJNZbpnPcNt1l7HS55bn6Wee48cHHcK1N9zMdjvtxs1XX9LHLZXq09rf2pcB88z/6esnbr2avx+4Gx++Ne7TfRHBNkedyYQP3+eqI/ee7mstsPxINtjjYMa//y43nnBIt9otqfcYbEszsMmTJwMw88wzc+mF5zBi0UUAWGH55bj4vLP4zEprccttd/DPu+4xpURqw7GfXxiAOYfOy8Irr8XG+x3J9y6+h798b2tefrj45WjN0XsxYvX1+fPuX5omD7taQ0csxTdOvZh+M/fnr/vuyFvPP9Vj96B6FnWRc6zu8ROUZmCDBg4EYOUVV/g00G4xxxxzsOnGowC4+977e71tUiP54I3XePT6Szhnty2YfdBQtjnqTKAIkjfa+wj+9bexPHHr1dNV99ARS7HLWdcx+8Ah/HW/HXnspst7sumqcxHh1s2tr9mz3Q0RkcB2mfnXXr7uaOCkzBzQm9etuP7lwLjMHN0X11fP+cxSSwAwaODcbR4fPGgQAB999HGvtUlqZO+89ByvP/kIw5dbiTkGDWWeJZZl5llnY+WvjP4037q1H137KADn7fEVHr3h0qmODVt8GXYZew2zDxrKBXvv0K18bzUq+0UbXdMG22Ug3JGzeiBYHA681c06ekVEPEMRoB/b121R/dho1HpEBA8/+jiTJ0+mX7+p/1FvGTC52IhF2jpdUhvmmnc4UMyX/faLz3L/hWe2WW6p9TdnrnmH89BVFzL+/fd4+8Vnpzo+79LLs/OZVzPbXAM5f8/teOKWq2redkk9r2mDbYpAuMUXgdNb7fuouxfIzFe6W0eliJglMyf0ZJ1SRxZdZGG+tMWmXHrF1fz25DHss+f3Pj127fU3cc31NzFo0EA222SjPmylVF+GjliK98e9yvj3351qf0Qwaq/DGDBsPp67/w4+fvdtXnn3bS49+Ltt1jP67OuZa97h3HD8wdMs1z7/Miuy8x+vpv9sc3DuHtvyv9uvq9n9qJ65qE0zaNpguzIQjoi329j33Yj4CbAI8Bzw68w8veJ4AnsCmwOjgNeBn2Xmn1qV+TSNJCIWAI4BNgNmBx4H9snMm9pqY0SMBYYBt5XXmgWYNyIWBH4DbFoWvQPYOzOfaKeeJYDjgDWAuYDHgP/LzMvL4zcDiwLHRMQx5XsR5bG1gV8Bq1H00l8K7J+Z75bH5wBOAb4KfAD8tq02qHGdfPyv+de//8u+BxzMFVdfx8orrsDTzz7L3y+7iplmmokzTj6ege2kmUgzoqXW25yN9v0Fz933D95+8Rk+fPsNBgydj0VX+zxDFlmC9157mcsO/l7nFbVjtrkHsfPYa5hj0FCeuuMGFl5pTRZeac1pyt151ol8/N473bkVNQKD7YbXtMF2RyJiG+AkYB/gWoqg9pSIeCUzKxPiDgMOKsttB5wdEY9m5r1t1DkncAvwGrA18BKwYhXNWR94hyJAjzK4vYkiwF4fmAD8GLg+IpbNzA/bqGMAcBXwc4oe++2BiyLic5n5KLAt8G/gTODUijavUN7/IcC3gSHACWW5r5bFjgU2Ab4CvFiWXQ+4qIp7UwNYaMEFuO/26zn8qGO59IpruPUf/2TuuefiS1tsyoE/3ovVR67S102U6spT/7yBIX9bgkVWWYfhy63EbHMNYsJHH/DGM09w80mHc9c5J/HRO9OfYTjbXAOZY9BQABZfeyMWX7vtX5YeuPhsg+0ZgjnbjS4yO0ttbnwR8VXgwore3H8Aj2XmrhVlxgJLZua65esEzsjM71SUuR54JTO/WVFmu8z8a0R8h6J3ebHMnDLBasftGgtsCSyUmeP/v717D7OsqO81/n4HJYRBE3WAIBEhgIo3bnMUlZuHR8W7ECR4CxcDKsrJUSEIhjgScwTUgAkHAUUHHiFGwwEBHxUxoqCCELlkwk3BITACclUG5KZ1/qhqWGx29/Q0s+jdPe/nedaz91qrVlWt1Wt3/3Z1Va22bS/gIOA5pf1wkqxCDeLfV0r56mQGSCa5ADirlPKJtr6YgT7bSU4CHiylvLuzbTPgEmBt4F7gdmCvUsrJbf8awI3A6cP6vCfZB9gHYL1n/emW1191yWQuhaTHYcEW6yw7kaTH7eNXP/QfpZT5T1R581/83HLRGcc/UcXNWnM22P4J/bk9pvzpKniabQL8cGDb+cDzB7b9eMj6YJoxmwOXDwu0k6yXZGlnObize9FYoN1sCWwA3D2Wntry/TRgw2EFJ5mb5IgkVyS5sx0zn9pFZiJbAu/s1o1HrsuGbVmVznUopSwFHvsc4kf2H19KmV9Kmb/mvGcso3hJkqTZbaXsRjKBvpr5fwls1lm/o/P+noG0c4BLgd2G5HPHkG1Qu3rsSO1u8jNqi/RJ1EB5InOALwBHDtm3BHjOMo6XJEm9cYDkbLCyBttXAq8ATuhs2xq4YiDdVtT+y931K8fJ8xLgXUnmDbZul1IeAn4+ybr9FHgbdR7ryT5qbGvgpFLKqQBJVqO2Sl/TSfMAsMqQsl5QShlatyTXAg9Sz/u6tm0u8ELg2mHHSJKkFWll7YQwe6ysP8FPUQPj9yfZOMl+wDuAIwbS7Zxk75bmIGAH6gDCYU6h9qv+epJtkvxZkjcleeVy1u1k4JaWz3ZJNkiybZLPJNl4nGOuAXZKskUb9PhlYLWBNIuBbZKsm2Re23Y48JIkxybZPMlGSd6Q5Dh4uMvICcDhSV6V5AXULx+DQbskSepD4vJ4l2m2UgbbpZTTqVPtfZDamv3XwL4DM5EALKDOwnE58D5gz1LKRePkeQ919pAbgTOBRdTZTJara0qbbWRbakvy14CrgBOpfbbHG97+IWqgfx51VpIL2vuuvwOeRW2RvrWVdXkra33qTCqXUacBvKVz3P7U2VFOa6+LgB8szzlJkiStrFaK2UimYroexT6bzN9is3Lx+edMdzWkWc/Zq81GXAAAEwpJREFUSKQnxhM/G8km5aJvDH8CqSZvznovn9bZSFbWPtuSJEkzwPR3g9DjY7AtSZI0qkagz/Fs155FcjfwO+ChUsr8JE8H/pXa1XYxsGspZUpPqzLYHsfYA3AkSZKmRYCslMPrpsMrB2aT+wjw3VLKYUk+0tYPnErG/gQlSZKkR3szdYIK2utbppqRLduSJEkjaTSmrlsJFODsNjnGcaWU44G1Syk3tf03A2tPNXODbUmSpJFlsL0CzEtycWf9+BZQj9m6lLIkyVrAd5Jc1T24lFJaID4lBtuSJEmjyj7bK8JtE039V0pZ0l5/leQ04CXALUnWKaXclGQd6vNMpsSfoCRJklZKSeYmecrYe+DV1Af4nQHs3pLtDnx9qmXYsi1JkjSy7EbSs7WB01L7xj8JOKWU8q0kFwFfTfJu4Hpg16kWYLAtSZI0khwg2bdSynXApkO23w7ssCLKMNiWJEkaWQbbM53BtiRJ0qiyZXvGc4CkJEmS1BODbUmSJKkndiORJEkaRcFuJLOAwbYkSdLIMtie6exGIkmSJPXElm1JkqSR5Dzbs4HBtiRJ0sgy2J7pDLYlSZJGlS3bM57BtiRJ0sgy2J7pHCApSZIk9cSWbUmSpFFlN5IZz2BbkiRpJAW7kcx8BtuSJEmjypbtGc9gW5IkaWQZbM90DpCUJEmSemKwLUmSJPXEbiSSJEmjKBD7bM94BtuSJEkjy2B7prMbiSRJktQTW7YlSZJGUpz6bxYw2JYkSRpZBtszncG2JEnSqLJle8Yz2JYkSRpZBtsznQMkJUmSpJ7Ysi1JkjSq7EYy4xlsS5IkjaRgN5KZz2BbkiRpFAVbtmcB+2xLkiRJPbFlW5IkaWTZsj3TGWxLkiSNKmPtGc9gW5IkaWQZbc90BtuSJEmjygGSM54DJCVJkqSe2LItSZI0kpxnezYw2JYkSRpVdiOZ8Qy2JUmSRpbB9kxnsC1JkjSC/uOSy76duWvOm+56zAK3TWfhBtuSJEkjqJSy43TXQY+fs5FIkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktQTg21JkiSpJwbbkiRJUk8MtiVJkqSeGGxLkiRJPTHYliRJknpisC1JkiT1xGBbkiRJ6onBtiRJktSTlFKmuw6apZLcClw/3fXQcpsH3DbdlZBWAn7WZp5nl1LWnO5KaGYx2Jb0KEkuLqXMn+56SLOdnzVp5WA3EkmSJKknBtuSJElSTwy2JQ06frorIK0k/KxJKwH7bEuSJEk9sWVbkiRJ6onBtrSSSnJukqOnodz1k5Qk0zILQ5L9kyyejrI1s7X7dpdpKHePJEuf6HI75Z+VZOF0lS/NdAbb0iQlWdj+2B4ysH37tn3ecuQ1qUC354B4Z+CgnvJeodq1P2u666HR1T6DEy0LV0Ax6wBnroB8epdkcZL9p7sekuBJ010BaYa5DzggybGllFunuzKPRynljhWZX5I51HEgv1uR+UqTtE7n/RuAzw9s++3jLaCUcvPjzaMryaqllAdWZJ6SRo8t29Ly+R6wGDhkokRJtk1yYZL7ktyS5Mgkq7Z9C4HtgPd3Wt3WH5LHuOkmyr/tPzfJsUk+m+TOtnyqBcTdNEd31ldN8n+SXJ/k/iTXJflfE5zjHkmWJnldkkXAA8AmLZ/Dk9yY5N4kFyV5zQT5rJLkhCS/SPLbJD9L8jdjdU2yANgdeH3nOmzf9q2b5Cudc/xGko0H8v+bJDe3up4ErDFeXTRzlVJuHluAu4Zs2y3Jz5M80F737h7f7qsPtHvo3vY5eOeQNLt01p+Z5OQkt7djLk3yyvHqOPYfmiQHJrkRuLFtX+Z9PJDPhkm+3u7re5L8NMkbOvvPBZ4NfGrsM9PZ9/Ik32/1XZLkc0me2tm/eqvn0va75eBlXXtJEzPYlpbP74GPAO9NsuGwBEnWBb4JXAJsDrwbeBvwyZbkr4EfA1+itrytA9wwJKuh6SaR/5h3UD/jLwPeA+wD/O8Jzu1E4C+BDwGbtHzvmiA9wGrULx7vAZ4PXN/qux3wduCFLd8zk2w6Th5zgCXArq3cjwIHA3u2/Z8GvgqcwyPX4UdJVqd++bmvlfcy4CbgnLaPJLsCnwA+BmwBXN3OTyuRJDsBRwNHUe/JzwLHJHnjQNKPA2cAm1Gn5Tsp44wtSDIX+D6wPvAW4EXAoZOoznbAi4EdgR0mcx8PsQb1d8CrgE2BU4H/l+R5bf/O1ED+UB75zJDkRcDZ7Rw3bek2A77YyfvTLd8/B3ag/o7ZdhLnJWk8pRQXF5dJLMBC4Kz2/nvAV9r77YECzGvr/wD8DJjTOXYP4H5g9bZ+LnD0JMp8TLrlyP8a2vSebdvfAjcOyxvYuJ3DjstxPfZox2zZ2bYh9QvJegNpTweOae/Xb8fNnyDvw4Bzhl37zra92nXonuMqwO3Arm39R8DnB447B1g83feTS38LsEv98/bw+g+BLw6kWQic31kv49wrXx5Is0t7vzdw99jnfpL1WgjcCvxBZ9tk7uM9gKXLyPsC4G8764uB/QfSnAScMLBts3Zea1GD+PuBd3T2r0H90r1wun+uLi4zdbFlW5qaA4G3JtlyyL5NgAtKKb/vbDsfWBXYaAWUPdn8LyildCfS/zGwbvdfxh2bU4Pk7w0rMMl/tX8rL03yzc6uh4BLO+tbAAGu6KRfCryeGogPleS9SS5OcmtL/0FgvfHSN1sCGwB3d8r5NfC0TlmbtPPuGlzX7LcJNeDuOp/635iuYffKYJoxmwOXl1JuG9yRZL3u/T/QFWNRKeX+zvpk7uPB/OcmOSLJFa3byVJgPpP7zLxz4LM5dl02bMuqdK5DKWUp8J/LyFfSBBwgKU1BKeUnSU4FjgD+fnkO7alKfef/OuDJ7X13oNn95dEDIue0OvwP4MGBPIYOUEvyF9R/7+9PbYn+DfB+YKdl1GkONdDfbci+FTr4U7NWX5+XX1JbjMd078d7BtJO5T7+NLUbyv7UVvF7qa3Wq46TvlvWF4Ajh+xbAjxnGcdLmgKDbWnqDgauoP7R67oS2DXJnE7r89bUAYTXtvUHqP8qXpZh6SaTP8BLk6TTur0V8MtSym+GlHMp9Q/xK4FvDe4spVw/ibpC7Uce4E9KKUNbyYfYGriwlNIdrDnYojfsOvyU2lf9tlLKeH3Lr6Sed7dP6laTrJdmjyuBVwAndLZtTf38dg27V64cJ89LgHclmTfYul1KeQj4+STrNpn7eNDWwEmllFMBkqxGbZW+ppNmvM/MC0opQ+uW5Frql+StgOvatrnUfu7XDjtG0rLZjUSaovYH63jqQMauY4BnUgdgbZLk9dQ+yEeXUu5taRYDL0l9wMu8dGYJGTAs3WTyp6U5Kslz2wwKBzC8RYtSyjXUQYhfSPLnSTZIsk2Sdy3nNbkGOBlYmGSXJH+WZH7qg2R2Huewa4Atkrw2ycap85hvN+Q6vLCdy7wkT27l3AJ8Pcl2rc7bJvlMZyaHzwK7J9m75X0Q8NLlOSfNCp+iBsbvb/fBftQBxEcMpNt54F7Zgfpfl2FOAX5Fvf+2aff6mzLBbCTjmMx9POgaYKckW7RBj1+mDlbuWgxskzrTydgzAA6n/j45NsnmSTZK8oYkx8HDXUZOAA5P8qokL6B++ZhMw4CkcRhsS4/PodR+yw8rpSwBXkvt03kp9Y/Vv1Bbwsd8mtrydAV1wNR4fS0fk26S+UP9I74KcCF1zuETGCfYbv6SGkD8E3AVdTDXH02Qfjx7UmckOaLlcxZ1NoPxWsePowb6pwAXUQdQfmYgzeepLYwXU6/DK9oXi22pLXBfa2WdSO3reidAKeVfgQXUQaWXUGeM+McpnJNmsFLK6cB+1LEAV1C/IO9bShl8QM0C6iwclwPvA/YspVw0Tp73UL8U3kh90M0i6mwmy9U1ZTL38RAfogb651FnJbmgve/6O+BZ1BbpW1tZl7ey1qfOpHIZdRajWzrH7U8du3Fae10E/GB5zknSo+XR46ckzQap8+wuKqV8YLrrIs0EqXNRv7WU8m/TXRdJs4st25IkSVJPDLYlSZKkntiNRJIkSeqJLduSJElSTwy2JUmSpJ4YbEuSJEk9MdiWNCsl2SNJ6Sx3J7ksyQeS9Pr03PYQopJkj862hUkWL2c+2ydZMMFDj6ZavwVtqrtlpVucZOFU819R17nzs1x/ReQnSU8kg21Js91bgZdRH1byE+CfqQ/8eKL9PbDTch6zPfAx/F0tSTNWr607kjQCLi2l/Ly9PzvJRtQnCA4NuNuj4B8qK3iqplLKtSsyP0nSzGBriaSVzUXAU5Os1enusW+SI5L8Ergf+GOAJDsnuSDJvUnuSvK1JOt1M0uyepJjktyeZGmSM4A/HSx0WDeSJHOTHJbk2iT3J7k5yalJ1k6ygNqqDfDgWHeYgXIPT/KLJA+0148OdjlJsnmS85Lcl2RJkkOATOXCJVkzyXFJrmnX5IYkpyRZd5xDNknyvZb2piSHDqnfmkmObXW7P8lVSfaZSv0kaRTZsi1pZbMB8DtgKbB62/ZRahC+D7AKcF+S9wKfA74EHAo8BVgAfD/Ji0spd7djjwP+Avh4y+NVwCnLqkSSVYHvAJsChwEXAH8EvAZ4GvAFatD+bmDrVuexY58EfBt4PrV7yn8CWwGHAE8HPtzSzQP+HbgZ2J36ReIA4FFfGJbD04H7gIOAW4FntrJ+mOR5pZT7BtKfDnwR+GQ7r0OA31OvI0meCpwP/GHb9ouW7nNJ/qCU8s9TrKckjQyDbUmz3SotOH0KsCuwM3BmKeXe5OEG3luAnca6jiRZAzgc+FIpZa+xREl+AlxNDYCPSvJc4O3AR0sph7VkZ7fj37uMer2T2pf8zaWUMzrb/61T3o3t7YWllIc6ad5GDcC3K6X8oG37bjufjyU5vJTyK+CDwFzg1aWUG1qe3wGuX0bdhiqlXE3tgjNWv1WAHwL/DbwWOG3gkM8PXJenAh9OclQp5a6W17OBF5VSftbSnZPkj9t5fG7gvCVpxrEbiaTZ7irgQeAO4BjgZGCvgTSnD/TRfhnwVODkJE8aW4AbWn7btnQvpf4e/epAfl+ZRL1eDdw8EGhP1o7UgPlHA/U7G3gytZV77DwuGAu0AUop9wBnTqFMAJK8r83qshR4iBpoAzx3SPJh12UN4IWd87gQ+MXAeXwbeAa15V6SZjRbtiXNdjsBNwJ3A9cP6eoAcNPA+lrt9Zxx8ryzva7TXm8Z2D+4PswzgCWTSDfMWtQW4QcnyBtq/RYN2T+Z+j1Gkv2AfwL+kdod5U7ql40LgNUmUc7Y+lgf77WAjVj2eUjSjGWwLWm2W9SZjWQ8gzOP3N5e9wD+a0j6sf7aY0H62sB1nf1rT6Jet/FIC+/yup3av3nXcfYvbq83jVOXydRvmN2A75ZSPjy2IckGE6Qf77qMfcm4HfgVna4pA66eYj0laWQYbEvSY/2IGlBvVEo5cYJ0F1IH/O1KHeQ4ZrdJlHE2sFuSN5ZSxuvWcX97/UMeCfABvkWdN3xpKeWqCcr4MXBAkmd1+mzPBd44ifoNszrwm4Fte06Qfth1WUod0An1PPYD/rv1MZekWcdgW5IGlFJ+k+QA4P8mWRP4JvBraveH7YBzSymnlFKuTnIKMDal3UXUvtivm0QxXwb2Bv4lySepgftTqLNxHNWC6Cta2g8n+Sbwu1LKxdR+53tSB0V+BrgMWBXYEHgT8JZSyr3AkcC+1MGJC3hkNpLfTvHSfAs4MMnB1AcE/U9glwnS7925Lq8B/gpYUEr5ddt/JHUml/OSHEltyZ4LPA/YppTy5inWU5JGhsG2JA1RSjkuyQ3U4PTt1N+XS4DzgEs7Sd9Dba3dnxrw/ntLf/4y8n8wyaupc2nv015vp87ucUdLdhZ1UOe+1IfwBEg79jXAR9qxGwD3ANcC3wAeaGXclmQH4LPAiS3/Y9u5TOUpmodS5yD/ILWP9vepQfR146R/M/WJnYdQv6x8gjpV4dg1+HWSl7e6HEj9MnMXNeg+dQr1k6SRkxX8kDRJkiRJjVP/SZIkST0x2JYkSZJ6YrAtSZIk9cRgW5IkSeqJwbYkSZLUE4NtSZIkqScG25IkSVJPDLYlSZKknhhsS5IkST35/xaUerTyxdDrAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light", - "tags": [] - }, - "output_type": "display_data" - } - ], - "source": [ - "round_train_pred = np.round(train_pred)\n", - "round_test_pred = np.round(test_pred)\n", - "scoring(train_labels, round_train_pred, test_labels, round_test_pred)\n", - "cm = confusion_matrix(test_labels, round_test_pred)\n", - "cm_labels = ['Not topic-related', 'Topic-related']\n", - "plot_confusion_matrix(cm, cm_labels, normalize=False,title=f'BERT Confusion Matrix\\nn={len(test_labels)}')" - ] - } - ], - "metadata": { - "colab": { - "machine_shape": "hm", - "name": "2nd Copy of BERT final.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} -- 2.43.0