Friday, 30 June 2017

Learning ChatBot - python

import re
import sqlite3
from collections import Counter
from string import punctuation
from math import sqrt
import random
import socket
import wikipedia

# initialize the connection to the database
connection = sqlite3.connect('brain1.sqlite')
cursor = connection.cursor()

# create the tables needed by the program
create_table_request_list = [
    'CREATE TABLE words(word TEXT UNIQUE)',    
    'CREATE TABLE sentences(sentence TEXT UNIQUE, used INT NOT NULL DEFAULT 0)',    
    'CREATE TABLE associations (word_id INT NOT NULL, sentence_id INT NOT NULL, weight REAL NOT NULL)',]
for create_table_request in create_table_request_list:
    try:
        cursor.execute(create_table_request)
    except:
        pass

def get_id(entityName, text):
    """Retrieve an entity's unique ID from the database, given its associated text.    
    If the row is not already present, it is inserted.    
    The entity can either be a sentence or a word."""    
    tableName = entityName+'s'    
    columnName = entityName
    cursor.execute('SELECT rowid FROM '+tableName+' WHERE '+columnName+' = ?', (text,))
    row = cursor.fetchone()
    if row:
        return row[0]
    else:
        cursor.execute('INSERT INTO '+tableName+' ('+columnName+') VALUES (?)', (text,))
        return cursor.lastrowid


def get_words(text):
    """Retrieve the words present in a given string of text.    
    The return value is a list of tuples where the first member is a lowercase word,    
    and the second member the number of time it is present in the text."""    
    wordsRegexpString = '(?:\w+|['+re.escape(punctuation)+']+)'    
    wordsRegexp = re.compile(wordsRegexpString)
    wordsList = wordsRegexp.findall(text.lower())
    return Counter(wordsList).items()


# incoming and outgoing static 
responseqName = ["what is your name", "who are you", "whats your name"]
aName = ["iam called Jarvis", "my name is Jarvis"]
qGreeting = ["hi", "hi there", "hello", "hi Jarvis", "hello Jarvis"]
aGreeting = ["greetings", "hi", "hi there", "hello"] 
#random picking up from 
hereqCommon1 = ["how are you", "how are you doing"]
aCommon1 = ["Iam doing alright", "Iam fine", "doing great, and you?"]
qCommon2 = ["how was your day", "hows your day"]
aCommon2 = ["it was great", "just fine"]
qOnline = ["look for"]
interEro = ["No internet", "i can't go online", 
"connection is not available", "check internet connection", "check "                                                                                                            "internet "                                                                                                            "first",            "no connection", "sorry internet is not available"
            ]

#internet search 
functionB = random.choice(aGreeting)
while True:
    def online():
        interp = "google.com"        try:
            host = socket.gethostbyname(interp)
            socket.create_connection((host, 80), 2)
            print("I am online now ask me anything\n")
            i = input('> ').strip()
            query = i.lower()
            print("Search...")
            ans = wikipedia.summary(query)
            print("> "+ans)
            row = ans
            r = row[1]
            cursor.execute('UPDATE sentences SET used=used+1 WHERE rowid=?', (row[0],))
            print('online> '+r)
        except ConnectionError:
            print("> "+random.choice(interEro))
            return

    # ask for user input; if blank line, exit the loop
    print(":~# "+B)
    H = input(':~# ').strip()
    if H == "":
        print(":~# sorry can't get it")
        exit()
# fire up internet function    
# if H in qName:    
#     print(":~# "+random.choice(aName))
    # 
#elif H in qGreeting:    
# 
#    
print(":~# "+random.choice(aGreeting))
    # elif H in qCommon1:
    #
     print(":~# "+random.choice(aCommon1))
    # elif H in qCommon2:
    #     
print(":~# "+random.choice(aCommon2))
    #
 #
 elif H == "wiki":
    #
 #
     online()
    # 
else:
    #
     print(":~# "+B)
    #
    #store the association between the bot's message words and the user's response
    words = get_words(B)
    words_length = sum([n * len(word) for word, n in words])
    sentence_id = get_id('sentence', H)
    for word, n in words:
        word_id = get_id('word', word)
        weight = sqrt(n / float(words_length))
        cursor.execute('INSERT INTO associations VALUES (?, ?, ?)', (word_id, sentence_id, weight))
    connection.commit()

    # retrieve the most likely answer from the database
    cursor.execute('CREATE TEMPORARY TABLE results(sentence_id INT, sentence TEXT, weight REAL)')
    words = get_words(H)
    words_length = sum([n * len(word) for word, n in words])
    for word, n in words:
        weight = sqrt(n / float(words_length))
        cursor.execute(
            'INSERT INTO results SELECT associations.sentence_id, 
             sentences.sentence, ?*associations.weight/(4+sentences.used) 
             FROM words INNER JOIN associations ON associations.word_id=words.row id INNER JOIN sentences ON sentences.row id=associations.sentence_id WHERE words.word=?',
            (weight, word,))
    # if matches were found, give the best one
    cursor.execute(
        'SELECT sentence_id, sentence, SUM(weight) AS sum_weight FROM results GROUP BY sentence_id ORDER BY sum_weight DESC LIMIT 1')
    row = cursor.fetchone()
    cursor.execute('DROP TABLE results')
    # otherwise, just randomly pick one of the least used sentences or search on internet
    if row is None:
        cursor.execute(
            'SELECT rowid, sentence FROM sentences WHERE used = (SELECT MIN(used) FROM sentences) ORDER BY RANDOM() LIMIT 1')
        row = cursor.fetchone()
    # tell the database the sentence has been used once more, and prepare the sentence
    B = row[1]
    cursor.execute('UPDATE sentences SET used=used+1 WHERE rowid=?', (row[0],))

No comments:

Post a Comment