Skip to content
Snippets Groups Projects
Commit b39ab445 authored by Adrian Block's avatar Adrian Block
Browse files

new feature mapper and cli json

parent 60d8d9c9
No related branches found
No related tags found
1 merge request!3Dev
Pipeline #
# Pauline Backend
Pauline is a planner for your semester courses at Paderborn University. With our simple interface, you can easily find the best courses for you in your weekly schedule.
......@@ -4,10 +4,29 @@ import typer
from app import scraper, schemas
from app.data.storage import PlainJSONStorage
from app.scraper import scrape_newest_semester, scrape_specific_semester
scraper_command = typer.Typer()
@scraper_command.command("json", help="Parse the newest semester and save it to a JSON file.")
def json(file: str = typer.Option("scrape.json", help="File name"),
name: str = typer.Option(None, help="Name of the semester")):
"""
Scrape newest semester and save as JSON.
:return:
"""
url = "https://paul.uni-paderborn.de" \
"/scripts/mgrqispi.dll?APPNAME=CampusNet&PRGNAME=EXTERNALPAGES&ARGUMENTS=-N000000000000001,-N000442,-Avvz"
if name:
s = asyncio.run(scrape_specific_semester(url, name))
else:
s = asyncio.run(scrape_newest_semester(url))
with open(file, "w") as f:
f.write(s.json())
@scraper_command.command()
def catalogue(
site_path: str = "/scripts"
......
......@@ -27,6 +27,8 @@ def map_course(course: schemas.Course) -> models.Course:
description=course.description,
small_groups=[map_small_group(small_group) for small_group in course.small_groups],
appointments=[map_appointment(appointment) for appointment in course.appointments],
instructors=course.instructors,
ou=course.ou,
)
......
from .paul import parse_courses_on_site, parse_semesters, scrape_newest_semester, find_and_parse_courses
from .paul import parse_courses_on_site, parse_semesters, scrape_newest_semester, find_and_parse_courses, \
scrape_specific_semester
......@@ -111,7 +111,11 @@ async def parse_courses(links: List[str]) -> List[schemas.Course]:
title = soup.find('form', attrs={'name': 'courseform'}).find('h1').text.strip()
split_title = title.splitlines()
instructors_entry = soup.find('span', attrs={'id': 'dozenten'}).text.strip()
instructors_entry = None
instructors_element = soup.find('span', attrs={'id': 'dozenten'})
if instructors_element:
instructors_entry = instructors_element.text.strip()
ou_entry = soup.find('span', attrs={'name': 'courseOrgUnit'}).text.strip()
parsed_courses.append(schemas.Course(
......@@ -181,6 +185,14 @@ async def scrape_newest_semester(course_catalogue_url: str) -> schemas.Semester:
return schemas.Semester(name=newest_semester, courses=__deduplicate(courses))
async def scrape_specific_semester(course_catalogue_url: str, semester_name: str) -> schemas.Semester:
semesters = parse_semesters(course_catalogue_url)
course_entry = semesters.get(semester_name)
course_links = await __find_courses([course_entry], 0)
courses = await parse_courses(course_links)
return schemas.Semester(name=semester_name, courses=__deduplicate(courses))
def parse_courses_on_site(site_path: str) -> List[Tuple[str, str]]:
r = requests.get("https://paul.uni-paderborn.de" + site_path)
soup = BeautifulSoup(r.content.decode("utf-8"), 'html.parser')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment