1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- from bs4 import BeautifulSoup
- import os
- import sys
- import json
-
- files = os.listdir(sys.argv[1])
-
- courses_list = []
-
- for file in files:
- # Extract the college in the file
- if not file[0:3] == "RBA":
- continue
-
- with open(sys.argv[1]+"/"+file) as fp:
- soup = BeautifulSoup(fp)
-
- rows = soup.find_all("tr")
-
- rows.pop(0)
-
- while rows:
- c_name = None
- c_code = None
- c_type = None
- c_days = None
- c_times = None
- c_room = None
- c_schedule = []
- prof = None
- c_cupo = None
- creditos = None
- comment = None
- for i in range(3):
- row = rows.pop(0)
- cols = row.text.split("\n") #, row.getText(), dir(row)
- if i == 0:
- c_code = cols[1].strip()
- c_name = cols[2].strip()
- c_type = cols[3].strip()
- c_days = cols[4].strip()
- c_times = cols[5].strip()
- c_room = cols[6].strip()
- c_schedule.append({"days":c_days, "times": c_times, "room": c_room})
- elif i == 1:
-
- prof = cols[1].strip().split("Prof.")[-1]
- c_cupo = cols[2].strip().split(":")[-1].strip()
- c_days = cols[3].strip()
- if c_days:
- c_times = cols[4].strip()
- c_room = cols[5].strip()
- c_schedule.append({"days":c_days, "times": c_times, "room": c_room})
- elif i == 2:
- #print cols
- creditos = int(cols[1].strip().split()[0])
- if cols[1].find("**") >=0:
- comment = "Cursos no se ofreceran en el proximo semestre"
-
- courses_list.append({"code": c_code, "name": c_name, "type": c_type, "schedule": c_schedule, "cupo": c_cupo, "prof": prof, "creds": creditos, "comm": comment})
-
-
- print "data=",json.dumps(courses_list)
-
|