Oferta académica

ofertatojson.py 1.9KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. from bs4 import BeautifulSoup
  2. import os
  3. import sys
  4. import json
  5. files = os.listdir(sys.argv[1])
  6. courses_list = []
  7. for file in files:
  8. # Extract the college in the file
  9. if not file[0:3] == "RBA":
  10. continue
  11. with open(sys.argv[1]+"/"+file) as fp:
  12. soup = BeautifulSoup(fp)
  13. rows = soup.find_all("tr")
  14. rows.pop(0)
  15. while rows:
  16. c_name = None
  17. c_code = None
  18. c_type = None
  19. c_days = None
  20. c_times = None
  21. c_room = None
  22. c_schedule = []
  23. prof = None
  24. c_cupo = None
  25. creditos = None
  26. comment = None
  27. for i in range(3):
  28. row = rows.pop(0)
  29. cols = row.text.split("\n") #, row.getText(), dir(row)
  30. if i == 0:
  31. c_code = cols[1].strip()
  32. c_name = cols[2].strip()
  33. c_type = cols[3].strip()
  34. c_days = cols[4].strip()
  35. c_times = cols[5].strip()
  36. c_room = cols[6].strip()
  37. c_schedule.append({"days":c_days, "times": c_times, "room": c_room})
  38. elif i == 1:
  39. prof = cols[1].strip().split("Prof.")[-1]
  40. c_cupo = cols[2].strip().split(":")[-1].strip()
  41. c_days = cols[3].strip()
  42. if c_days:
  43. c_times = cols[4].strip()
  44. c_room = cols[5].strip()
  45. c_schedule.append({"days":c_days, "times": c_times, "room": c_room})
  46. elif i == 2:
  47. #print cols
  48. creditos = int(cols[1].strip().split()[0])
  49. if cols[1].find("**") >=0:
  50. comment = "Cursos no se ofreceran en el proximo semestre"
  51. courses_list.append({"code": c_code, "name": c_name, "type": c_type, "schedule": c_schedule, "cupo": c_cupo, "prof": prof, "creds": creditos, "comm": comment})
  52. print "data=",json.dumps(courses_list)