|
@@ -0,0 +1,295 @@
|
|
1
|
+# script to grab the zip file from the google drive and populate the database
|
|
2
|
+# with the data. Phase 1 of the proyect since we wont have user entries yet
|
|
3
|
+# Not all of the buildings and images and fichas will be uploaded due to the
|
|
4
|
+# formating discrepencies in the directories, this
|
|
5
|
+# will be fixed later, for now we will just upload the ones we can
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+import mysql.connector
|
|
9
|
+import zipfile
|
|
10
|
+from sys import argv
|
|
11
|
+from os import listdir, mkdir
|
|
12
|
+from os.path import isdir, isfile, abspath, join, basename
|
|
13
|
+
|
|
14
|
+# using copy for now instead of move so I can delete the folders and not have to set everything up
|
|
15
|
+from shutil import move, copy
|
|
16
|
+# import module from current directory
|
|
17
|
+import ficha_script
|
|
18
|
+import json
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+# making a simple connection to the database
|
|
22
|
+# this will be changed later to a more secure connection
|
|
23
|
+def connect_db():
|
|
24
|
+ user = 'root'
|
|
25
|
+ password = ''
|
|
26
|
+ host = 'localhost'
|
|
27
|
+ port = '3306'
|
|
28
|
+ database = 'fotoexploratorio'
|
|
29
|
+ connection = mysql.connector.connect(user=user, password=password, host=host, port=port, database=database)
|
|
30
|
+
|
|
31
|
+ return connection
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+def create_tables(connection):
|
|
35
|
+
|
|
36
|
+ # create the tables if they dont exist
|
|
37
|
+ cursor = connection.cursor()
|
|
38
|
+
|
|
39
|
+ cursor.execute('''CREATE TABLE IF NOT EXISTS buildings (
|
|
40
|
+ id INT AUTO_INCREMENT PRIMARY KEY,
|
|
41
|
+ name VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL
|
|
42
|
+ )''')
|
|
43
|
+
|
|
44
|
+ cursor.execute('''CREATE TABLE IF NOT EXISTS images (
|
|
45
|
+ id INT AUTO_INCREMENT PRIMARY KEY,
|
|
46
|
+ building_id INT,
|
|
47
|
+ path VARCHAR(255) CHARACTER SET utf8 UNIQUE NOT NULL,
|
|
48
|
+ year INT,
|
|
49
|
+ medium VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
|
|
50
|
+ author VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
|
|
51
|
+ recovery_method VARCHAR(255) CHARACTER SET utf8 DEFAULT NULL,
|
|
52
|
+ recovery_date DATE DEFAULT NULL,
|
|
53
|
+ FOREIGN KEY (building_id) REFERENCES buildings(id)
|
|
54
|
+ )''')
|
|
55
|
+
|
|
56
|
+ # thinking separating authors into a seperate table eventually but some FICHAS dont have authors
|
|
57
|
+ # so I will leave it like this for now
|
|
58
|
+ # cursor.execute('CREATE TABLE IF NOT EXIST authors (id INT NOT NULL AUTO_INCREMENT), name VARCHAR(255) encoding="utf8", PRIMARY KEY (id))')
|
|
59
|
+
|
|
60
|
+ connection.commit()
|
|
61
|
+ cursor.close()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+# reciveve the path of the zip file
|
|
66
|
+def unzip(zip):
|
|
67
|
+
|
|
68
|
+ # if already exists, skip this step
|
|
69
|
+ if not 'FotoExploratorio 2022 2023' in listdir():
|
|
70
|
+ with zipfile.ZipFile(zip, 'r') as zip_ref:
|
|
71
|
+ zip_ref.extractall()
|
|
72
|
+ print('unziped')
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+ path = abspath('FotoExploratorio 2022 2023')
|
|
76
|
+ return path
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+def get_edificios(path, img_reject, connection):
|
|
82
|
+ # search for the directory called EDIFICIOS
|
|
83
|
+ edificio_folder = join(path, 'EDIFICIOS')
|
|
84
|
+ # get the list of edificios
|
|
85
|
+ edificios = []
|
|
86
|
+ # recurse through the EDIFICIOS folder
|
|
87
|
+ for edificio_folders in listdir(edificio_folder):
|
|
88
|
+ nested_folder = False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+ # recurse throgh the folders inside said edificio folder
|
|
92
|
+ for objects in listdir(join(edificio_folder, edificio_folders)):
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+ # if its a file put in the image reject list
|
|
96
|
+ if isfile(edificio_folder + edificio_folders + '/' + objects):
|
|
97
|
+ img_reject.append(edificio_folder + edificio_folders + '/' + objects)
|
|
98
|
+
|
|
99
|
+ # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
|
|
100
|
+ if isdir(join(edificio_folder, edificio_folders, objects)) and objects == 'imágenes':
|
|
101
|
+ path_images = join(edificio_folder, edificio_folders, objects)
|
|
102
|
+
|
|
103
|
+ for items in listdir(path_images):
|
|
104
|
+
|
|
105
|
+ # if its a folder, then we will not add the folder name to the edificios list
|
|
106
|
+ if isdir (join(path_images, items)):
|
|
107
|
+ # since this indicates theres an indented folder, I will not write the folder name of
|
|
108
|
+ # the upper directory
|
|
109
|
+ nested_folder = True
|
|
110
|
+
|
|
111
|
+ # check if the folder is already in the list
|
|
112
|
+ if join(path_images, items) not in edificios:
|
|
113
|
+ edificios.append(abspath(join(path_images, items)))
|
|
114
|
+
|
|
115
|
+ # just skips the folders that are not imagenes:
|
|
116
|
+ else:
|
|
117
|
+ continue
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+ # if nested_folder is false then will add the folder name to the edifcio list
|
|
121
|
+ if not nested_folder:
|
|
122
|
+ # check if thats its not already in the list
|
|
123
|
+ if join(edificio_folder, edificio_folders) not in edificios:
|
|
124
|
+ edificios.append(abspath(join(edificio_folder, edificio_folders)))
|
|
125
|
+
|
|
126
|
+ # enter the edificios in the list into the database
|
|
127
|
+ edificios = [edificio for edificio in edificios if 'Fotos' not in edificio]
|
|
128
|
+
|
|
129
|
+ for edificio in edificios:
|
|
130
|
+ # remove the path
|
|
131
|
+ edificio = basename(edificio)
|
|
132
|
+
|
|
133
|
+ # remove the _uprrp from the name
|
|
134
|
+ edificio = edificio.replace('_uprrp', '')
|
|
135
|
+
|
|
136
|
+ # enter the edificio into the database
|
|
137
|
+ cursor = connection.cursor()
|
|
138
|
+ cursor.execute('INSERT IGNORE INTO buildings (name) VALUES (%s)', (edificio,))
|
|
139
|
+ connection.commit()
|
|
140
|
+ cursor.close()
|
|
141
|
+
|
|
142
|
+ return edificios
|
|
143
|
+
|
|
144
|
+def get_images(edificios_list, connection):
|
|
145
|
+
|
|
146
|
+ # make a folder to store the images
|
|
147
|
+ if not 'accepted_images' in listdir():
|
|
148
|
+ mkdir('accepted_images')
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+ images = []
|
|
152
|
+ #recurse the edificios folder
|
|
153
|
+ for edificio in edificios_list:
|
|
154
|
+ # append the images in the folder to the list, if the folder has an imagenes folder
|
|
155
|
+ # then we will add the image from there
|
|
156
|
+ cursor = connection.cursor(buffered=True)
|
|
157
|
+ cursor.execute('SELECT id FROM buildings WHERE name = %s', (basename(edificio.replace('_uprrp', '')),))
|
|
158
|
+ building_id = cursor.fetchone()[0]
|
|
159
|
+ cursor.close()
|
|
160
|
+ for folders in listdir(edificio):
|
|
161
|
+
|
|
162
|
+ # if its a folder called imagenes, then we will add the images from there so it goes to the editios list
|
|
163
|
+ if isdir(join(edificio, folders)) and folders == 'imágenes':
|
|
164
|
+ for img in listdir(join(edificio, folders)):
|
|
165
|
+ cursor = connection.cursor(buffered=True)
|
|
166
|
+ # append the image to the list
|
|
167
|
+ img = basename(img)
|
|
168
|
+ images.append({'path': folders + img, 'building_id': building_id})
|
|
169
|
+ copy(join(edificio, folders, img), join('accepted_images', img))
|
|
170
|
+ cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
|
|
171
|
+ connection.commit()
|
|
172
|
+ cursor.close()
|
|
173
|
+
|
|
174
|
+ # due to formatting issues, some images are in the edificio folder, so we will add to the database from here
|
|
175
|
+ if isfile(join(edificio, folders)):
|
|
176
|
+ cursor = connection.cursor(buffered=True)
|
|
177
|
+ img = basename(folders)
|
|
178
|
+ images.append({'path': img, 'building_id': building_id})
|
|
179
|
+ copy(join(edificio, folders), join('accepted_images', img))
|
|
180
|
+ cursor.execute('INSERT IGNORE INTO images (path, building_id) VALUES (%s, %s)', ('accepted_images/' + img, building_id))
|
|
181
|
+ connection.commit()
|
|
182
|
+ cursor.close()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+ return images
|
|
190
|
+
|
|
191
|
+def get_fichas(path, images_list, connection):
|
|
192
|
+
|
|
193
|
+ fichas = []
|
|
194
|
+ fichas_path = path + '/FICHAS/FICHAS_PRELIMINARES/'
|
|
195
|
+
|
|
196
|
+ # recurse the fichas folder
|
|
197
|
+ ficha_script.run_script(path)
|
|
198
|
+ fichas_path = 'fichas_jsons/'
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+ # recurse the fichas json folder, read the image reference and try and find it in the images list
|
|
202
|
+ for ficha in listdir(fichas_path):
|
|
203
|
+ # read the json file for the "Código de imagen" key
|
|
204
|
+ with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
|
|
205
|
+ key = json.load(ficha_file)['Código de la imagen']
|
|
206
|
+
|
|
207
|
+ # the key may not have a path so we need to search the key inside the edificio folder
|
|
208
|
+ for img in images_list:
|
|
209
|
+
|
|
210
|
+ # grab the image name only, remove file extension
|
|
211
|
+ img_name = img['path'].split('.')[0]
|
|
212
|
+ # if the image name is the same as the key, then we will add the ficha to the list
|
|
213
|
+ if img_name == key:
|
|
214
|
+ # fichas.append(fichas_path + ficha)
|
|
215
|
+ # get the data from the json and insert it into the database
|
|
216
|
+ with open(fichas_path + ficha, encoding='utf-8') as ficha_file:
|
|
217
|
+ img_path = 'accepted_images/' + img['path']
|
|
218
|
+ data = json.load(ficha_file)
|
|
219
|
+ cursor = connection.cursor(buffered=True)
|
|
220
|
+ # turn on autocommit
|
|
221
|
+ # insert the data into the database
|
|
222
|
+
|
|
223
|
+ # update the image table with the data from the ficha
|
|
224
|
+ cursor.execute('''UPDATE images SET
|
|
225
|
+ year = IFNULL(%s, year),
|
|
226
|
+ author = IFNULL(%s, author),
|
|
227
|
+ medium = IFNULL(%s, medium),
|
|
228
|
+ recovery_method = IFNULL(%s, recovery_method),
|
|
229
|
+ recovery_date = IFNULL(%s, recovery_date)
|
|
230
|
+ WHERE path = %s''',
|
|
231
|
+ (data['Año'], data['Autor de la fotografía o dibujo'], data['Medio'], data['Recuperado'], data['Fecha de recuperación de la imagen'], img_path))
|
|
232
|
+ connection.commit()
|
|
233
|
+ cursor.close()
|
|
234
|
+
|
|
235
|
+ break
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+def main():
|
|
243
|
+
|
|
244
|
+ try:
|
|
245
|
+ mkdir('fichas_jsons')
|
|
246
|
+ except:
|
|
247
|
+ pass
|
|
248
|
+
|
|
249
|
+ try:
|
|
250
|
+ zip_path = argv[1]
|
|
251
|
+ except:
|
|
252
|
+ zip_path = 'FotoExploratorio 2022 2023.zip'
|
|
253
|
+
|
|
254
|
+ path = unzip(zip_path)
|
|
255
|
+
|
|
256
|
+ # create the list of rejected images and fichas
|
|
257
|
+ img_reject = []
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+ # connection = connect_db()
|
|
261
|
+ connection = connect_db()
|
|
262
|
+
|
|
263
|
+ # create the tables
|
|
264
|
+ create_tables(connection)
|
|
265
|
+
|
|
266
|
+ # get the list of edificios
|
|
267
|
+ edificios = get_edificios(path, img_reject, connection)
|
|
268
|
+ # get the list of images
|
|
269
|
+ images = get_images(edificios, connection)
|
|
270
|
+ # get the list of fichas
|
|
271
|
+ get_fichas(path, images, connection)
|
|
272
|
+
|
|
273
|
+ # write the list of rejected images and fichas to a file
|
|
274
|
+ with open('rejected_images.txt', 'w', encoding='utf-8') as f:
|
|
275
|
+ for img in img_reject:
|
|
276
|
+ f.write(img + '\n')
|
|
277
|
+ # with open('rejected_fichas.txt', 'w', encoding='utf-8') as f:
|
|
278
|
+ # for ficha in fichas_reject:
|
|
279
|
+ # f.write(ficha + '\n')
|
|
280
|
+
|
|
281
|
+ # write edificio list to see the buildings listed because some of them are not buildings
|
|
282
|
+ with open('edificios.txt', 'w', encoding='utf-8') as f:
|
|
283
|
+ for edificio in edificios:
|
|
284
|
+ f.write(edificio + '\n')
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+ # end the program
|
|
288
|
+ print('done')
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+if __name__ == '__main__':
|
|
294
|
+ main()
|
|
295
|
+
|