123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- # script to grab the zip file from the google drive and populate the database
- # with the data. Phase 1 of the proyect since we wont have user entries yet
- # Not all of the buildings and images and fichas will be uploaded due to the
- # formating discrepencies in the directories, this
- # will be fixed later, for now we will just upload the ones we can
-
-
- import mysql.connector
- import zipfile
- from sys import argv
- from os import listdir, mkdir
- from os.path import isdir, isfile, abspath, join, basename
-
- # using copy for now instead of move so I can delete the folders and not have to set everything up
- from shutil import move, copy
- # import module from current directory
- import ficha_script
- import json
-
-
- # making a simple connection to the database
- # this will be changed later to a more secure connection
- def connect_db():
- user = 'root'
- password = ''
- host = 'localhost'
- port = '3306'
- database = 'fotoexploratorio'
- connection = mysql.connector.connect(user=user, password=password, host=host, port=port, database=database)
-
- return connection
-
-
- def create_tables(connection):
-
- # create the tables if they dont exist
- cursor = connection.cursor()
-
- cursor.execute('''CREATE TABLE IF NOT EXISTS buildings (
- id INT AUTO_INCREMENT PRIMARY KEY,
- name VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL
- )''')
-
- cursor.execute('''CREATE TABLE IF NOT EXISTS images (
- id INT AUTO_INCREMENT PRIMARY KEY,
- building_id INT,
- path VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL,
- year INT,
- medium VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
- author VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
- recovery_method VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
- recovery_date DATE DEFAULT NULL,
- FOREIGN KEY (building_id) REFERENCES buildings(id)
- )''')
-
- # thinking separating authors into a seperate table eventually but some FICHAS dont have authors
- # so I will leave it like this for now
- # cursor.execute('CREATE TABLE IF NOT EXIST authors (id INT NOT NULL AUTO_INCREMENT), name VARCHAR(255) encoding="utf8", PRIMARY KEY (id))')
-
- connection.commit()
- cursor.close()
-
-
-
- # reciveve the path of the zip file
- def unzip(zip):
-
- # if already exists, skip this step
- if not 'FotoExploratorio 2022 2023' in listdir():
- with zipfile.ZipFile(zip, 'r') as zip_ref:
- zip_ref.extractall()
- print('unziped')
-
-
- path = abspath('FotoExploratorio 2022 2023')
- return path
-
-
-
-
- def get_edificios(path, img_reject, connection):
- # search for the directory called EDIFICIOS
- edificio_folder = join(path, 'EDIFICIOS')
- # get the list of edificios
- edificios = []
- # recurse through the EDIFICIOS folder
- for edificio_folders in listdir(edificio_folder):
- nested_folder = False
-
-
- # recurse throgh the folders inside said edificio folder
- for objects in listdir(join(edificio_folder, edificio_folders)):
-
-
- # if its a file put in the image reject list
- if isfile(edificio_folder + edificio_folders + '/' + objects):
- img_reject.append(edificio_folder + edificio_folders + '/' + objects)
-
- # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
- if isdir(join(edificio_folder, edificio_folders, objects)) and objects == 'imágenes':
- path_images = join(edificio_folder, edificio_folders, objects)
-
- for items in listdir(path_images):
-
- # if its a folder, then we will not add the folder name to the edificios list
- if isdir (join(path_images, items)):
- # since this indicates theres an indented folder, I will not write the folder name of
- # the upper directory
- nested_folder = True
-
- # check if the folder is already in the list
- if join(path_images, items) not in edificios:
- edificios.append(abspath(join(path_images, items)))
-
- # just skips the folders that are not imagenes:
- else:
- continue
-
-
- # if nested_folder is false then will add the folder name to the edifcio list
- if not nested_folder:
- # check if thats its not already in the list
- if join(edificio_folder, edificio_folders) not in edificios:
- edificios.append(abspath(join(edificio_folder, edificio_folders)))
-
- # enter the edificios in the list into the database
- edificios = [edificio for edificio in edificios if 'Fotos' not in edificio]
-
- for edificio in edificios:
- # remove the path
- edificio = basename(edificio)
-
- # remove the _uprrp from the name
- edificio = edificio.replace('_uprrp', '')
-
- # enter the edificio into the database
- cursor = connection.cursor()
- cursor.execute('INSERT IGNORE INTO buildings (name) VALUES (%s)', (edificio,))
- connection.commit()
- cursor.close()
-
- return edificios
-
- def get_images(edificios_list, connection):
-
- # make a folder to store the images
- if not 'accepted_images' in listdir():
- mkdir('accepted_images')
-
-
- images = []
- #recurse the edificios folder
- for edificio in edificios_list:
- # append the images in the folder to the list, if the folder has an imagenes folder
- # then we will add the image from there
- cursor = connection.cursor(buffered=True)
- cursor.execute('SELECT id FROM buildings WHERE name = %s', (basename(edificio.replace('_uprrp', '')),))
- building_id = cursor.fetchone()[0]
- cursor.close()
- for folders in listdir(edificio):
-
- # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
- if isdir(join(edificio, folders)) and folders == 'imágenes':
- for img in listdir(join(edificio, folders)):
- cursor = connection.cursor(buffered=True)
- # append the image to the list
- img = basename(img)
- images.append({'path': folders + img, 'building_id': building_id})
- copy(join(edificio, folders, img), join('accepted_images', img))
- cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
- connection.commit()
- cursor.close()
-
- # due to formatting issues, some images are in the edificio folder, so we will add to the database from here
- if isfile(join(edificio, folders)):
- cursor = connection.cursor(buffered=True)
- img = basename(folders)
- images.append({'path': img, 'building_id': building_id})
- copy(join(edificio, folders), join('accepted_images', img))
- cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
- connection.commit()
- cursor.close()
-
-
-
-
-
-
- return images
-
- def get_fichas(path, images_list, connection):
-
- fichas = []
- fichas_path = path + '/FICHAS/FICHAS_PRELIMINARES/'
-
- # recurse the fichas folder
- ficha_script.run_script(path)
- fichas_path = 'fichas_jsons/'
-
-
- # recurse the fichas json folder, read the image reference and try and find it in the images list
- for ficha in listdir(fichas_path):
- # read the json file for the "Código de imagen" key
- with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
- key = json.load(ficha_file)['Código de la imagen']
-
- # the key may not have a path so we need to search the key inside the edificio folder
- for img in images_list:
-
- # grab the image name only, remove file extension
- img_name = img['path'].split('.')[0]
- # if the image name is the same as the key, then we will add the ficha to the list
- if img_name == key:
- # fichas.append(fichas_path + ficha)
- # get the data from the json and insert it into the database
- with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
- img_path = 'accepted_images/' + img['path']
- data = json.load(ficha_file)
- cursor = connection.cursor(buffered=True)
- # turn on autocommit
- # insert the data into the database
-
- # update the image table with the data from the ficha
- cursor.execute('''UPDATE images SET
- year = IFNULL(%s, year),
- author = IFNULL(%s, author),
- medium = IFNULL(%s, medium),
- recovery_method = IFNULL(%s, recovery_method),
- recovery_date = IFNULL(%s, recovery_date)
- WHERE path = %s''',
- (data['Año'], data['Autor de la fotografía o dibujo'], data['Medio'], data['Recuperado'], data['Fecha de recuperación de la imagen'], img_path))
- connection.commit()
- cursor.close()
-
- break
-
-
-
-
-
-
- def main():
-
- try:
- mkdir('fichas_jsons')
- except:
- pass
-
- try:
- zip_path = argv[1]
- except:
- zip_path = 'FotoExploratorio 2022 2023.zip'
-
- path = unzip(zip_path)
-
- # create the list of rejected images and fichas
- img_reject = []
-
-
- # connection = connect_db()
- connection = connect_db()
-
- # create the tables
- create_tables(connection)
-
- # get the list of edificios
- edificios = get_edificios(path, img_reject, connection)
- # get the list of images
- images = get_images(edificios, connection)
- # get the list of fichas
- get_fichas(path, images, connection)
-
- # write the list of rejected images and fichas to a file
- with open('rejected_images.txt', 'w', encoding='utf-8') as f:
- for img in img_reject:
- f.write(img + '\n')
- # with open('rejected_fichas.txt', 'w', encoding='utf-8') as f:
- # for ficha in fichas_reject:
- # f.write(ficha + '\n')
-
- # write edificio list to see the buildings listed because some of them are not buildings
- with open('edificios.txt', 'w', encoding='utf-8') as f:
- for edificio in edificios:
- f.write(edificio + '\n')
-
-
- # end the program
- print('done')
-
-
-
-
- if __name__ == '__main__':
- main()
|