# script to grab the zip file from the google drive and populate the database 
# with the data. Phase 1 of the proyect since we wont have user entries yet
# Not all of the buildings and images and fichas will be uploaded due to the 
# formating discrepencies in the directories, this
# will be fixed later, for now we will just upload the ones we can


import mysql.connector
import zipfile
from sys import argv
from os import listdir, mkdir
from os.path import isdir, isfile, abspath, join, basename

# using copy for now instead of move so I can delete the folders and not have to set everything up
from shutil import move, copy
# import module from current directory
import ficha_script
import json


# making a simple connection to the database
# this will be changed later to a more secure connection
def connect_db():
    user = 'root'
    password = ''
    host = 'localhost'
    port = '3306'
    database = 'fotoexploratorio'
    connection = mysql.connector.connect(user=user, password=password, host=host, port=port, database=database)
    
    return connection


def create_tables(connection):

    # create the tables if they dont exist
    cursor = connection.cursor()

    cursor.execute('''CREATE TABLE IF NOT EXISTS buildings (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL
    )''')

    cursor.execute('''CREATE TABLE IF NOT EXISTS images (
    id INT AUTO_INCREMENT PRIMARY KEY,
    building_id INT,
    path VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL,
    year INT,
    medium VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
    author VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
    recovery_method VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
    recovery_date DATE DEFAULT NULL,
    FOREIGN KEY (building_id) REFERENCES buildings(id)
    )''')

    # thinking separating authors into a seperate table eventually but some FICHAS dont have authors
    # so I will leave it like this for now
    # cursor.execute('CREATE TABLE IF NOT EXIST authors (id INT NOT NULL AUTO_INCREMENT), name VARCHAR(255) encoding="utf8", PRIMARY KEY (id))')

    connection.commit()
    cursor.close()


# reciveve the path of the zip file
def unzip(zip):

    # if already exists, skip this step
    if not 'FotoExploratorio 2022 2023' in listdir():
        with zipfile.ZipFile(zip, 'r') as zip_ref:
            zip_ref.extractall()
            print('unziped')
    
    
    path = abspath('FotoExploratorio 2022 2023')
    return path


def get_edificios(path, img_reject, connection):
    # search for the directory called EDIFICIOS
    edificio_folder = join(path, 'EDIFICIOS')
    # get the list of edificios
    edificios = []
    # recurse through the EDIFICIOS folder
    for edificio_folders in listdir(edificio_folder):
        nested_folder = False

        
        # recurse throgh the folders inside said edificio folder
        for objects in listdir(join(edificio_folder, edificio_folders)):


            # if its a file put in the image reject list
            if isfile(edificio_folder + edificio_folders + '/' + objects):
                img_reject.append(edificio_folder + edificio_folders + '/' + objects)

            # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
            if isdir(join(edificio_folder, edificio_folders, objects)) and objects == 'imágenes':
                path_images = join(edificio_folder, edificio_folders, objects)
                
                for items in listdir(path_images):

                    # if its a folder, then we will not add the folder name to the edificios list   
                    if isdir (join(path_images, items)):
                        # since this indicates theres an indented folder, I will not write the folder name of
                        # the upper directory
                        nested_folder = True

                        # check if the folder is already in the list
                        if join(path_images, items) not in edificios:
                            edificios.append(abspath(join(path_images, items)))

            # just skips the folders that are not imagenes:  
            else:
                continue


            # if nested_folder is false then will add the folder name to the edifcio list
            if not nested_folder:
                # check if thats its not already in the list
                if join(edificio_folder, edificio_folders) not in edificios:
                    edificios.append(abspath(join(edificio_folder, edificio_folders)))

    # enter the edificios in the list into the database
    edificios = [edificio for edificio in edificios if 'Fotos' not in edificio]

    for edificio in edificios:
        # remove the path
        edificio = basename(edificio)

        # remove the _uprrp from the name
        edificio = edificio.replace('_uprrp', '')

        # enter the edificio into the database
        cursor = connection.cursor()
        cursor.execute('INSERT IGNORE INTO buildings (name) VALUES (%s)', (edificio,))
        connection.commit()
        cursor.close()
    
    return edificios

def get_images(edificios_list, connection):

    # make a folder to store the images
    if not 'accepted_images' in listdir():
        mkdir('accepted_images')
    

    images = []
    #recurse the edificios folder
    for edificio in edificios_list:
        # append the images in the folder to the list, if the folder has an imagenes folder
        # then we will add the image from there
        cursor = connection.cursor(buffered=True)
        cursor.execute('SELECT id FROM buildings WHERE name = %s', (basename(edificio.replace('_uprrp', '')),))
        building_id = cursor.fetchone()[0]
        cursor.close()
        for folders in listdir(edificio):

            # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
            if isdir(join(edificio, folders)) and folders == 'imágenes':
                for img in listdir(join(edificio, folders)):
                    cursor = connection.cursor(buffered=True)
                    # append the image to the list                     
                    img = basename(img)
                    images.append({'path': folders + img, 'building_id': building_id})
                    copy(join(edificio, folders, img), join('accepted_images', img))
                    cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
                    connection.commit()
                    cursor.close()

            # due to formatting issues, some images are in the edificio folder, so we will add to the database from here
            if isfile(join(edificio, folders)):
                cursor = connection.cursor(buffered=True)
                img = basename(folders)
                images.append({'path': img, 'building_id': building_id})
                copy(join(edificio, folders), join('accepted_images', img))
                cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
                connection.commit()
                cursor.close()


    return images

def get_fichas(path, images_list, connection):

    fichas = []
    fichas_path = path + '/FICHAS/FICHAS_PRELIMINARES/'

    # recurse the fichas folder
    ficha_script.run_script(path)
    fichas_path = 'fichas_jsons/'


    # recurse the fichas json folder, read the image reference and try and find it in the images list
    for ficha in listdir(fichas_path):
        # read the json file for the "Código de imagen" key
        with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
            key = json.load(ficha_file)['Código de la imagen']
    
        # the key may not have a path so we need to search the key inside the edificio folder
        for img in images_list:

            # grab the image name only, remove file extension
            img_name = img['path'].split('.')[0]
            #  if the image name is the same as the key, then we will add the ficha to the list
            if img_name == key:
                # fichas.append(fichas_path + ficha)
                # get the data from the json and insert it into the database
                with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
                    img_path = 'accepted_images/' + img['path']
                    data = json.load(ficha_file)
                    cursor = connection.cursor(buffered=True)
                    # turn on autocommit
                    # insert the data into the database

                    # update the image table with the data from the ficha
                    cursor.execute('''UPDATE images SET
                    year = IFNULL(%s, year),
                    author = IFNULL(%s, author),
                    medium = IFNULL(%s, medium),
                    recovery_method = IFNULL(%s, recovery_method),
                    recovery_date = IFNULL(%s, recovery_date)
                    WHERE path = %s''', 
                    (data['Año'], data['Autor de la fotografía o dibujo'], data['Medio'], data['Recuperado'], data['Fecha de recuperación de la imagen'], img_path))
                    connection.commit()
                    cursor.close()

                break
    

def main():

    try:
        mkdir('fichas_jsons')
    except:
        pass

    try:
        zip_path = argv[1]
    except:
        zip_path = 'FotoExploratorio 2022 2023.zip'

    path = unzip(zip_path)

    # create the list of rejected images and fichas
    img_reject = []
    

    # connection = connect_db()
    connection = connect_db()

    # create the tables
    create_tables(connection)

    # get the list of edificios
    edificios = get_edificios(path, img_reject, connection)
    # get the list of images
    images = get_images(edificios, connection)
    # get the list of fichas
    get_fichas(path, images, connection)

    # write the list of rejected images and fichas to a file
    with open('rejected_images.txt', 'w', encoding='utf-8') as f:
        for img in img_reject:
            f.write(img + '\n')
    # with open('rejected_fichas.txt', 'w', encoding='utf-8') as f:
    #     for ficha in fichas_reject:
    #         f.write(ficha + '\n')

    # write edificio list to see the buildings listed because some of them are not buildings
    with open('edificios.txt', 'w', encoding='utf-8') as f:
        for edificio in edificios:
            f.write(edificio + '\n')


    # end the program
    print('done')
    

if __name__ == '__main__':
    main()