import csv
from itertools import islice
from schema import person
from terminusdb_client import WOQLClient
from datetime import datetime
import pytz
import re
import json
import meilisearch
import ast
import hashlib
import requests
import emoji

def get_emoji_regexp():
    # Sort emoji by length to make sure multi-character emojis are
    # matched first
    emojis = sorted(emoji.EMOJI_DATA, key=len, reverse=True)
    pattern = u'(' + u'|'.join(re.escape(u) for u in emojis) + u')'
    return re.compile(pattern)


def remove_emojis(string):
    return get_emoji_regexp().sub(r'', string)

# we keep all the information in dictionaries with Employee id as keys
orgs = {}
orgsjson = []

client = WOQLClient("https://cloud.terminusdb.com/Myseelia/")
client.connect(db="murmurations", team="Myseelia", use_token=True)

client1 = meilisearch.Client(
    'https://ms-9ea4a96f02a8-1969.sfo.meilisearch.io', '117c691a34b21a6651798479ebffd181eb276958')

def delete_index(index_name):
    try:
        index = client1.index(index_name)
        response = index.delete()
        print(response)
    except Exception as e:
        print(e)

delete_index('people')

index = client1.index('people')

# Define the endpoint and headers for the API request
endpoint = 'https://test-index.murmurations.network/v2/nodes'
headers = {'Content-Type': 'application/json'}

# # Load the input data from a file
# with open('murmuration_people.json', 'r') as f:
#     input_data = json.load(f)

# Load the input data from a URL
url = "https://test-index.murmurations.network/v2/nodes?schema=person_schema-v0.1.0"
input_data = requests.get(url).json()

# Extract the data field from the input data
response_data = input_data['data']

# Create a dictionary to keep track of the profile URLs that have already been processed
profile_urls = {}

# Create a dictionary to keep track of people based on their profile URL
people_dict = {}

# Create a list to store the people as `person` objects
people = []

for profile in response_data:
    # Define the data to be sent in the GET request
    endpoint = profile['profile_url']
    headers = {'accept': 'application/json'}
    
    # Send the GET request to retrieve the profile details
    response = requests.get(endpoint, headers=headers)

    if response.status_code == 200:
        # Parse the JSON data and extract the necessary information to create a `person` object
        json_data = response.json()
        name = json_data.get('name', None)
        description = json_data.get('description', None)
        primary_url = json_data.get('primary_url', None)
        image = json_data.get('image', None)
        locality = json_data.get('locality', None)
        profile_url = json_data.get('profile_url', None)

         # Check if the person is already in the database
        if profile_url in profile_urls:
            continue

        personname = remove_emojis(str(name))

        # Create a `Person` object with the extracted information and the people they know
        newperson = person(
            name=personname,
            description=str(description),
            primary_url=str(primary_url),
            image=str(image),
            locality=str(locality),
            vouches_for=set(),
            LI=set()
        )
        # if personname not blank 
        if personname != 'None':
            people_dict[personname] = newperson
            people.append(newperson)
            profile_urls[primary_url] = endpoint
    else:
        print(f"Error {response.status_code}: {response.reason}")

# Update the temporary person objects with missing information
for p in people:
    if not p.name or not p.description or not p.image or not p.locality or not p.vouches_for or not p.LI:
        profileurl = profile_urls[p.primary_url]
        response = requests.get(profileurl, headers={'accept': 'application/json'})
        if response.status_code == 200:
            json_data = response.json()
            p.name = json_data.get('name', None)
            p.description = json_data.get('description', '')
            p.image = json_data.get('image', '')
            p.locality = json_data.get('locality', '')
            p.vouches_for = set()
            p.LI = set()
            for person_data in json_data.get('knows', []):
                url = person_data.get('url')
                incommunity = False
                if url not in profile_urls.values():
                    continue
                print(person_data.get('name'))
                knowsname = person_data.get('name')
                relationship_type = person_data.get('type')
                if relationship_type == 'VOUCHES_FOR':
                    if knowsname in people_dict:
                        p.vouches_for.add(people_dict[knowsname])
                elif relationship_type == 'LI':
                    if knowsname in people_dict:
                        p.LI.add(people_dict[knowsname])
        else:
            print(f"Error {response.status_code}: {response.reason}")
            

BATCH_SIZE = 100

# Split the people list into batches
batches = [people[i:i+BATCH_SIZE] for i in range(0, len(people), BATCH_SIZE)]

# Insert each batch into TerminusDB
inserted = []
for batch in batches:
    print(batch)
    batch_inserted = client.insert_document(batch, commit_msg="Adding people")
    print("inserted")
    inserted.extend(batch_inserted)
document_ids = [doc_id for doc_id in inserted]

print("done inserting")

# Retrieve all documents at once
documents = client.query_document({"@type": "person"})

# Process each document
indexed_documents = []
for document in documents:
    real_id = document['@id']
    num_id = real_id.split("/")[-1]
    document = {k: json.dumps(v) for k, v in document.items() if k != '@id'}
    document.update({'id': num_id})
    indexed_documents.append(document)

# Add all indexed documents to the index at once
index.add_documents(indexed_documents)