Ready for 1 release
This commit is contained in:
parent
03ddeba846
commit
55989e19e5
@ -1,32 +0,0 @@
|
||||
using System.Diagnostics;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using UWLecturePlan.Models;
|
||||
|
||||
namespace UWLecturePlan.Controllers
|
||||
{
|
||||
public class HomeController : Controller
|
||||
{
|
||||
private readonly ILogger<HomeController> _logger;
|
||||
|
||||
public HomeController(ILogger<HomeController> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public IActionResult Index()
|
||||
{
|
||||
return View();
|
||||
}
|
||||
|
||||
public IActionResult Privacy()
|
||||
{
|
||||
return View();
|
||||
}
|
||||
|
||||
[ResponseCache(Duration = 0, Location = ResponseCacheLocation.None, NoStore = true)]
|
||||
public IActionResult Error()
|
||||
{
|
||||
return View(new ErrorViewModel { RequestId = Activity.Current?.Id ?? HttpContext.TraceIdentifier });
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3,6 +3,7 @@ using Microsoft.AspNetCore.Mvc;
|
||||
using UWLecturePlan.Models;
|
||||
using UWLib;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using System;
|
||||
|
||||
namespace UWLecturePlan.Controllers
|
||||
{
|
||||
@ -12,29 +13,43 @@ namespace UWLecturePlan.Controllers
|
||||
|
||||
public IActionResult Index(LecturesViewModel model)
|
||||
{
|
||||
LectureContext db = new LectureContext(@"D:\Projects\C#\UWScraper\UWScraper\bin\Debug\net8.0\lecture.db");
|
||||
LectureContext db = new(@"lecture.db");
|
||||
|
||||
if (model.CurrentSemester == null)
|
||||
{
|
||||
model.CurrentSemester = GetSemester(DateTime.Now);
|
||||
}
|
||||
model.Semester ??= GetSemester(DateTime.Now);
|
||||
|
||||
var from = DateTime.Now;
|
||||
|
||||
var date = DateTime.ParseExact(model.Date, "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
||||
|
||||
model.LectureEvents = db.LectureEvents.Include(x=>x.Lecture)
|
||||
.Where(x => x.From >= from)
|
||||
.Where(x => x.Lecture.Semester == model.CurrentSemester)
|
||||
.Where(x => x.Lecture.Branch == model.BranchFilter || model.BranchFilter == null)
|
||||
.ToList();
|
||||
|
||||
if (model.LocationFilter != null)
|
||||
model.LectureEvents = [.. db.LectureEvents.Include(x=>x.Lecture)
|
||||
.Where(x => x.Lecture.Semester == model.Semester)
|
||||
.Where(x => x.Lecture.Branch == model.Branch || model.Branch == null)
|
||||
|
||||
.Where(x => x.From.Date == date)];
|
||||
|
||||
if (model.TypeFilter != null)
|
||||
{
|
||||
model.LectureEvents = model.LectureEvents
|
||||
.Where(x => x.Location.Contains(model.LocationFilter)).ToList();
|
||||
.Where(x => x.Lecture.Type == model.TypeFilter).ToList();
|
||||
}
|
||||
|
||||
model.Branches = db.LectureEvents.Select(x => x.Lecture.Branch).Distinct().OrderBy(x=>x).ToList();
|
||||
if (model.Location != null)
|
||||
{
|
||||
model.LectureEvents = model.LectureEvents
|
||||
.Where(x => x.Location?.Contains(model.Location) ?? false).ToList();
|
||||
}
|
||||
|
||||
model.Branches = [.. db.LectureEvents.Select(x => x.Lecture.Branch).Distinct().OrderBy(x => x)];
|
||||
|
||||
model.Types = [.. db.LectureEvents.Select(x => x.Lecture.Type).Distinct().OrderBy(x => x)];
|
||||
|
||||
model.Semesters = [.. db.LectureEvents.Select(x => x.Lecture.Semester).Distinct().OrderBy(x => x)];
|
||||
|
||||
model.Days = db.LectureEvents
|
||||
.Where(x => x.Lecture.Semester == model.Semester)
|
||||
.Where(x => x.Lecture.Branch == model.Branch || model.Branch == null)
|
||||
.GroupBy(x => x.From.Date).ToDictionary(x => x.Key, x => x.Count());
|
||||
|
||||
|
||||
return View(model);
|
||||
@ -59,7 +74,7 @@ namespace UWLecturePlan.Controllers
|
||||
|
||||
public static DateTime FirstDateOfWeekISO8601(int year, int weekOfYear)
|
||||
{
|
||||
DateTime jan1 = new DateTime(year, 1, 1);
|
||||
DateTime jan1 = new(year, 1, 1);
|
||||
int daysOffset = DayOfWeek.Thursday - jan1.DayOfWeek;
|
||||
|
||||
// Use first Thursday in January to get first week of the year as
|
||||
|
||||
@ -1,18 +1,30 @@
|
||||
using UWLib;
|
||||
using System.ComponentModel;
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using UWLib;
|
||||
|
||||
namespace UWLecturePlan.Models
|
||||
{
|
||||
public class LecturesViewModel
|
||||
{
|
||||
|
||||
public string CurrentSemester { get; set; }
|
||||
public string? Semester { get; set; }
|
||||
|
||||
public string? LocationFilter { get; set; }
|
||||
public string? Location { get; set; }
|
||||
|
||||
public string? BranchFilter { get; set; }
|
||||
public string? Branch { get; set; }
|
||||
|
||||
public List<LectureEvent> LectureEvents { get; set; }
|
||||
public string? TypeFilter { get; set; }
|
||||
|
||||
public List<string> Branches { get; set; }
|
||||
public string Date { get; set; } = DateTime.Today.ToString("dd.MM.yyyy");
|
||||
|
||||
public List<LectureEvent> LectureEvents { get; set; } = [];
|
||||
|
||||
public List<string> Branches { get; set; } = [];
|
||||
|
||||
public List<string> Types { get; set; } = [];
|
||||
|
||||
public List<string> Semesters { get; set; } = [];
|
||||
|
||||
public Dictionary<DateTime, int> Days { get; set; } = [];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
using System.ComponentModel;
|
||||
using Microsoft.AspNetCore.Mvc.ModelBinding.Binders;
|
||||
using UWLecturePlan;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
// Add services to the container.
|
||||
builder.Services.AddControllersWithViews();
|
||||
builder.Services.AddControllersWithViews(x => x.ModelBinderProviders.Insert(0, new DateTimeModelBinderProvider())).AddRazorRuntimeCompilation();
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
@ -13,6 +17,7 @@ if (!app.Environment.IsDevelopment())
|
||||
app.UseHsts();
|
||||
}
|
||||
|
||||
|
||||
app.UseHttpsRedirection();
|
||||
app.UseStaticFiles();
|
||||
|
||||
@ -20,6 +25,8 @@ app.UseRouting();
|
||||
|
||||
app.UseAuthorization();
|
||||
|
||||
|
||||
|
||||
app.MapControllerRoute(
|
||||
name: "default",
|
||||
pattern: "{controller=Lecture}/{action=Index}/{id?}");
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="8.0.10" />
|
||||
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.20.1" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@ -1,8 +0,0 @@
|
||||
@{
|
||||
ViewData["Title"] = "Home Page";
|
||||
}
|
||||
|
||||
<div class="text-center">
|
||||
<h1 class="display-4">Welcome</h1>
|
||||
<p>Learn about <a href="https://learn.microsoft.com/aspnet/core">building Web apps with ASP.NET Core</a>.</p>
|
||||
</div>
|
||||
@ -1,6 +0,0 @@
|
||||
@{
|
||||
ViewData["Title"] = "Privacy Policy";
|
||||
}
|
||||
<h1>@ViewData["Title"]</h1>
|
||||
|
||||
<p>Use this page to detail your site's privacy policy.</p>
|
||||
@ -1,6 +1,8 @@
|
||||
@using System.Text.RegularExpressions
|
||||
@model LecturesViewModel
|
||||
@{
|
||||
ViewData["Title"] = "Vorlesungsplan";
|
||||
|
||||
string GetBranchName(string branch)
|
||||
{
|
||||
Regex regex = new Regex(@".* - (.*)");
|
||||
@ -17,38 +19,51 @@
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<style>
|
||||
.time {
|
||||
display: inline;
|
||||
@section Header {
|
||||
<search class="container-fluid">
|
||||
<form method="get" class=" mb-2">
|
||||
<div class="row g-1">
|
||||
<div class="order-1 col-6 col-sm-3 col-md-2 col-lg-2 col-xl-2 col-xxl-1">
|
||||
<label for="Semester" class="form-label">Semester:</label>
|
||||
<select asp-for="Semester" class="form-select">
|
||||
@foreach (var semester in Model.Semesters)
|
||||
{
|
||||
<option value="@semester">
|
||||
@semester
|
||||
</option>
|
||||
}
|
||||
|
||||
.time[open] {
|
||||
display: block;
|
||||
</select>
|
||||
</div>
|
||||
<div class="order-1 col-6 col-sm-3 col-md-3 col-lg-2 col-xl-2 col-xxl-2">
|
||||
<label for="Date" class="form-label">Tag:</label>
|
||||
<select asp-for="Date" class="form-select">
|
||||
@foreach (var day in Model.Days)
|
||||
{
|
||||
<option value="@day.Key.ToShortDateString()">
|
||||
@day.Key.ToString("dd.MM.yyyy") - @day.Value
|
||||
</option>
|
||||
}
|
||||
|
||||
details[open] > summary {
|
||||
color:blue;
|
||||
</select>
|
||||
</div>
|
||||
<div class="order-1 col-12 col-sm-6 col-md-7 col-lg-3 col-xxl-2">
|
||||
<label class="form-label" for="Location">Ort:</label> <input type="text" asp-for="Location" class="form-control" />
|
||||
</div>
|
||||
<div class="order-1 col-12 col-sm-10 col-lg-4 col-xxl">
|
||||
<label for="TypeFilter" class="form-label">Typ:</label>
|
||||
<select type="text" asp-for="TypeFilter" class="form-select">
|
||||
<option value="">Alle</option>
|
||||
@foreach (var branch in Model.Types.OrderBy(x => GetBranchName(x)))
|
||||
{
|
||||
<option value="@branch">
|
||||
@GetBranchName(branch)
|
||||
</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-12 col-xxl order-0 order-xxl-1 col-xxl-3">
|
||||
<label for="Branch" class="form-label">Studiengang:</label>
|
||||
|
||||
details > * {
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
details > summary {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
form {
|
||||
width: 100%;
|
||||
}
|
||||
</style>
|
||||
|
||||
<form method="get" >
|
||||
Semester: <input type="text" asp-for="CurrentSemester" style="width:3.5em;text-align:center;" maxlength="5" />
|
||||
Ort: <input type="text" asp-for="LocationFilter" />
|
||||
Studiengang:
|
||||
<select type="text" asp-for="BranchFilter" >
|
||||
<select type="text" asp-for="Branch" class="form-select">
|
||||
<option value="">Alle</option>
|
||||
@foreach (var branch in Model.Branches.OrderBy(x => GetBranchName(x)))
|
||||
{
|
||||
@ -57,19 +72,87 @@
|
||||
</option>
|
||||
}
|
||||
</select>
|
||||
<input type="submit" value="Filter" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="order-1 col-12 col-sm-2 col-lg-1 align-self-end text-end">
|
||||
<input type="submit" value="Filter" class="btn btn-primary w-100" />
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</search>
|
||||
}
|
||||
|
||||
@foreach (var day in Model.LectureEvents.GroupBy(x => x.From.Date).OrderBy(x => x.Key))
|
||||
{
|
||||
<details class="day">
|
||||
<summary>@day.Key.ToString("dd.MM.yyyy") - (@day.Count())</summary>
|
||||
@foreach (var time in day.GroupBy(x => x.From).OrderBy(x => x.Key))
|
||||
<div class="container-fluid" id="lectures">
|
||||
<div class="accordion">
|
||||
@foreach (var time in Model.LectureEvents.GroupBy(x => x.From).OrderBy(x => x.Key))
|
||||
{
|
||||
<details class="time">
|
||||
string timeId = time.Key.ToString("HHmm");
|
||||
string timeHeader = $"header-{timeId}";
|
||||
string timeCollapse = $"collapse-{timeId}";
|
||||
string timeAccordion = $"accordion-{timeId}";
|
||||
<div class="accordion-item">
|
||||
<h2 id="@timeHeader" class="accordion-header">
|
||||
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#@timeCollapse" aria-expanded="false" aria-controls="@timeCollapse">
|
||||
@time.Key.ToString("HH:mm") - (@time.Count())
|
||||
</button>
|
||||
</h2>
|
||||
<div id="@timeCollapse" class="accordion-collapse collapse" aria-labelledby="@timeHeader">
|
||||
<div class="accordion-body">
|
||||
<div class="accordion accordion-flush" id="@timeAccordion">
|
||||
|
||||
@foreach (var eventItem in time.Distinct().OrderBy(x => x.Lecture.Title))
|
||||
{
|
||||
string eventId = $"{timeId}-{eventItem.Id}";
|
||||
string eventHeader = $"header-{eventId}";
|
||||
string eventCollapse = $"collapse-{eventId}";
|
||||
<div class="accordion-item">
|
||||
<h3 id="@eventHeader" class="accordion-header">
|
||||
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#@eventCollapse" aria-expanded="false" aria-controls="@eventCollapse">
|
||||
@Html.Raw(eventItem.Lecture.Title)
|
||||
</button>
|
||||
</h3>
|
||||
<div id="@eventCollapse" class="accordion-collapse collapse" aria-labelledby="@eventHeader" data-bs-parent="#@timeAccordion">
|
||||
<div class="accordion-body">
|
||||
<div class="time">
|
||||
<span>Zeitraum:</span>
|
||||
<time datetime="@eventItem.From.ToString("HH:mm")" class="fw-bolder">@eventItem.From.ToString("HH:mm")</time> - <time datetime="@eventItem.To.ToString("HH:mm")" class="fw-bolder">@eventItem.To.ToString("HH:mm")</time>
|
||||
</div>
|
||||
<div class="room">
|
||||
<span>Ort:</span>
|
||||
<address class="d-inline fw-bolder">@eventItem.Location</address>
|
||||
</div>
|
||||
<a href="@eventItem.Lecture.Url">@eventItem.Lecture.Url</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<noscript>
|
||||
<style>
|
||||
#lectures {
|
||||
display: none;
|
||||
}
|
||||
|
||||
details {
|
||||
}
|
||||
|
||||
details > :not(summary) {
|
||||
margin-left: 15px;
|
||||
}
|
||||
</style>
|
||||
@foreach (var time in Model.LectureEvents.GroupBy(x => x.From).OrderBy(x => x.Key))
|
||||
{
|
||||
<details>
|
||||
<summary>@time.Key.ToString("HH:mm") - (@time.Count())</summary>
|
||||
@foreach (var eventItem in time.OrderBy(x => x.Lecture.Title))
|
||||
@foreach (var eventItem in time.Distinct().OrderBy(x => x.Lecture.Title))
|
||||
{
|
||||
<details class="lecture">
|
||||
<summary class="title">@eventItem.Lecture.Title</summary>
|
||||
@ -78,15 +161,10 @@
|
||||
<div class="room">Ort: @eventItem.Location</div>
|
||||
|
||||
<a href="@eventItem.Lecture.Url">@eventItem.Lecture.Url</a>
|
||||
<details>
|
||||
<summary>Infos</summary>
|
||||
@Html.Raw(eventItem.Lecture.Description);
|
||||
</details>
|
||||
|
||||
</details>
|
||||
}
|
||||
|
||||
</details>
|
||||
}
|
||||
</details>
|
||||
}
|
||||
</noscript>
|
||||
@ -9,41 +9,23 @@
|
||||
<link rel="stylesheet" href="~/UWLecturePlan.styles.css" asp-append-version="true" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow mb-3">
|
||||
<div class="container-fluid">
|
||||
<a class="navbar-brand" asp-area="" asp-controller="Home" asp-action="Index">UWLecturePlan</a>
|
||||
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="navbar-collapse collapse d-sm-inline-flex justify-content-between">
|
||||
<ul class="navbar-nav flex-grow-1">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-controller="Home" asp-action="Index">Home</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-controller="Home" asp-action="Privacy">Privacy</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<header class="p-3 pb-0">
|
||||
@await RenderSectionAsync("Header", required: false)
|
||||
</header>
|
||||
<div class="container">
|
||||
<main role="main" class="pb-3">
|
||||
<main role="main" class="p-3 pt-0 pb-0">
|
||||
@RenderBody()
|
||||
</main>
|
||||
</div>
|
||||
|
||||
<footer class="border-top footer text-muted">
|
||||
<div class="container">
|
||||
© 2024 - UWLecturePlan - <a asp-area="" asp-controller="Home" asp-action="Privacy">Privacy</a>
|
||||
</div>
|
||||
@await RenderSectionAsync("Scripts", required: false)
|
||||
<footer class="p-3">
|
||||
<p class="container-fluid text-center">
|
||||
© 2024 Uni for All
|
||||
<address>
|
||||
<a href="mailto:uniforall@nihil.foo">uniforall@nihil.foo</a>
|
||||
</address>
|
||||
</p>
|
||||
</footer>
|
||||
<script src="~/lib/jquery/dist/jquery.min.js"></script>
|
||||
<script src="~/lib/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script src="~/js/site.js" asp-append-version="true"></script>
|
||||
@await RenderSectionAsync("Scripts", required: false)
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
{
|
||||
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
|
||||
BIN
UWLecturePlan/lecture.db
Normal file
BIN
UWLecturePlan/lecture.db
Normal file
Binary file not shown.
BIN
UWLecturePlan/lecture.db-shm
Normal file
BIN
UWLecturePlan/lecture.db-shm
Normal file
Binary file not shown.
0
UWLecturePlan/lecture.db-wal
Normal file
0
UWLecturePlan/lecture.db-wal
Normal file
@ -16,7 +16,3 @@ html {
|
||||
position: relative;
|
||||
min-height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
margin-bottom: 60px;
|
||||
}
|
||||
@ -12,11 +12,13 @@ namespace UWLib
|
||||
public class Lecture
|
||||
{
|
||||
public int Id { get; set; }
|
||||
public string Semester { get; set; }
|
||||
public string Title { get; set; }
|
||||
public string Url { get; set; }
|
||||
public required string Semester { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public string? Url { get; set; }
|
||||
public string? Description { get; set; }
|
||||
public List<LectureEvent> Events { get; set; } = new List<LectureEvent>();
|
||||
public string Branch { get; set; }
|
||||
public List<LectureEvent> Events { get; set; } = [];
|
||||
public string? Branch { get; set; }
|
||||
|
||||
public string? Type{ get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,8 +5,8 @@
|
||||
public int Id { get; set; }
|
||||
public DateTime From { get; set; }
|
||||
public DateTime To { get; set; }
|
||||
public string Location { get; set; }
|
||||
public string? Location { get; set; }
|
||||
|
||||
public Lecture Lecture { get; set; }
|
||||
public required Lecture Lecture { get; set; }
|
||||
}
|
||||
}
|
||||
@ -10,7 +10,7 @@ namespace UWLib
|
||||
public class LinkToScrape
|
||||
{
|
||||
[Key]
|
||||
public string Url { get; set; }
|
||||
public required string Url { get; set; }
|
||||
public int Sort { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,14 +45,14 @@ namespace UWLib.Migrations
|
||||
name: "FK_LectureEvents_Lectures_LectureId_LectureSemester",
|
||||
columns: x => new { x.LectureId, x.LectureSemester },
|
||||
principalTable: "Lectures",
|
||||
principalColumns: new[] { "Id", "Semester" },
|
||||
principalColumns: ["Id", "Semester"],
|
||||
onDelete: ReferentialAction.Cascade);
|
||||
});
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_LectureEvents_LectureId_LectureSemester",
|
||||
table: "LectureEvents",
|
||||
columns: new[] { "LectureId", "LectureSemester" });
|
||||
columns: ["LectureId", "LectureSemester"]);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
|
||||
129
UWLib/Migrations/20241013131123_Type.Designer.cs
generated
Normal file
129
UWLib/Migrations/20241013131123_Type.Designer.cs
generated
Normal file
@ -0,0 +1,129 @@
|
||||
// <auto-generated />
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||
using UWLib;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace UWLib.Migrations
|
||||
{
|
||||
[DbContext(typeof(LectureContext))]
|
||||
[Migration("20241013131123_Type")]
|
||||
partial class Type
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void BuildTargetModel(ModelBuilder modelBuilder)
|
||||
{
|
||||
#pragma warning disable 612, 618
|
||||
modelBuilder.HasAnnotation("ProductVersion", "8.0.10");
|
||||
|
||||
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||
{
|
||||
b.Property<int>("Id")
|
||||
.HasColumnType("INTEGER");
|
||||
|
||||
b.Property<string>("Semester")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Branch")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Description")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Title")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Type")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Url")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.HasKey("Id", "Semester");
|
||||
|
||||
b.ToTable("Lectures");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||
{
|
||||
b.Property<int>("Id")
|
||||
.ValueGeneratedOnAdd()
|
||||
.HasColumnType("INTEGER");
|
||||
|
||||
b.Property<DateTime>("From")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<int>("LectureId")
|
||||
.HasColumnType("INTEGER");
|
||||
|
||||
b.Property<string>("LectureSemester")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Location")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<DateTime>("To")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.HasKey("Id");
|
||||
|
||||
b.HasIndex("LectureId", "LectureSemester");
|
||||
|
||||
b.ToTable("LectureEvents");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("UWLib.LinkToScrape", b =>
|
||||
{
|
||||
b.Property<string>("Url")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<int>("Sort")
|
||||
.HasColumnType("INTEGER");
|
||||
|
||||
b.HasKey("Url");
|
||||
|
||||
b.ToTable("LinksToScrape");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("UWLib.ScrapedLink", b =>
|
||||
{
|
||||
b.Property<string>("Url")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<DateTime>("LastScrape")
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.HasKey("Url");
|
||||
|
||||
b.ToTable("ScrapedLinks");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||
{
|
||||
b.HasOne("UWLib.Lecture", "Lecture")
|
||||
.WithMany("Events")
|
||||
.HasForeignKey("LectureId", "LectureSemester")
|
||||
.OnDelete(DeleteBehavior.Cascade)
|
||||
.IsRequired();
|
||||
|
||||
b.Navigation("Lecture");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||
{
|
||||
b.Navigation("Events");
|
||||
});
|
||||
#pragma warning restore 612, 618
|
||||
}
|
||||
}
|
||||
}
|
||||
29
UWLib/Migrations/20241013131123_Type.cs
Normal file
29
UWLib/Migrations/20241013131123_Type.cs
Normal file
@ -0,0 +1,29 @@
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace UWLib.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class Type : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.AddColumn<string>(
|
||||
name: "Type",
|
||||
table: "Lectures",
|
||||
type: "TEXT",
|
||||
nullable: false,
|
||||
defaultValue: "");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropColumn(
|
||||
name: "Type",
|
||||
table: "Lectures");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -36,6 +36,10 @@ namespace UWLib.Migrations
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Type")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
b.Property<string>("Url")
|
||||
.IsRequired()
|
||||
.HasColumnType("TEXT");
|
||||
|
||||
@ -10,7 +10,7 @@ namespace UWLib
|
||||
public class ScrapedLink
|
||||
{
|
||||
[Key]
|
||||
public string Url{ get; set; }
|
||||
public required string Url{ get; set; }
|
||||
|
||||
public DateTime LastScrape { get; set; }
|
||||
}
|
||||
|
||||
@ -1,319 +1,29 @@
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Net.NetworkInformation;
|
||||
using System.Text.Encodings.Web;
|
||||
using System.Web;
|
||||
using System.Xml.Linq;
|
||||
using HtmlAgilityPack;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.BiDi.Modules.Script;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using OpenQA.Selenium.DevTools.V127.Target;
|
||||
using OpenQA.Selenium.Support.UI;
|
||||
using UWLib;
|
||||
using static Microsoft.EntityFrameworkCore.DbLoggerCategory;
|
||||
|
||||
using Spectre.Console.Cli;
|
||||
using UWScraper;
|
||||
|
||||
internal class Program
|
||||
{
|
||||
readonly static List<string> checkedUrls = [];
|
||||
readonly static List<string> urlsToCheck = [];
|
||||
|
||||
static bool refreshLectures = false;
|
||||
|
||||
static readonly LectureContext db = new();
|
||||
static ChromeDriver? driver = null;
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
AppDomain.CurrentDomain.ProcessExit += (s, e) =>
|
||||
{
|
||||
Done();
|
||||
Scraper.QuitDriver();
|
||||
};
|
||||
|
||||
var service = ChromeDriverService.CreateDefaultService();
|
||||
service.HideCommandPromptWindow = true;
|
||||
|
||||
var options = new ChromeOptions();
|
||||
//options.AddArgument("--headless");
|
||||
driver = new ChromeDriver(service, options);
|
||||
|
||||
if (args.Any(x => x == "-r" || x == "--refresh-lectures"))
|
||||
AppDomain.CurrentDomain.UnhandledException += (s, e) =>
|
||||
{
|
||||
refreshLectures = true;
|
||||
Scraper.QuitDriver();
|
||||
};
|
||||
|
||||
Console.WriteLine("Refreshing lectures");
|
||||
}
|
||||
|
||||
if (args.Length == 0)
|
||||
var app = new CommandApp<ScrapeCommand>();
|
||||
app.Configure(config =>
|
||||
{
|
||||
urlsToCheck.AddRange([.. db.LinksToScrape.Select(x => x.Url)]);
|
||||
|
||||
//add all urls that were checked in the last 24 hours
|
||||
checkedUrls.AddRange([.. db.ScrapedLinks.Where(x => x.LastScrape > DateTime.Now.AddDays(-1)).Select(x => x.Url)]);
|
||||
|
||||
//remove all checked urls from the urls to check
|
||||
foreach (var url in checkedUrls)
|
||||
{
|
||||
urlsToCheck.Remove(url);
|
||||
}
|
||||
|
||||
if (urlsToCheck.Count == 0)
|
||||
{
|
||||
// add the first url to check / Vorlesungsverzeichnis
|
||||
urlsToCheck.Add("https://ufind.univie.ac.at/de/vvz.html");
|
||||
}
|
||||
}
|
||||
|
||||
if (refreshLectures)
|
||||
{
|
||||
urlsToCheck.AddRange([.. db.Lectures.Select(x => x.Url)]);
|
||||
}
|
||||
|
||||
Console.CursorVisible = false;
|
||||
|
||||
int top = Console.CursorTop;
|
||||
|
||||
while (urlsToCheck.Count > 0)
|
||||
{
|
||||
var url = urlsToCheck.First();
|
||||
|
||||
try
|
||||
{
|
||||
FindUrls(url);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.Clear();
|
||||
Console.WriteLine($"Url: {url}");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine(e);
|
||||
return;
|
||||
}
|
||||
|
||||
Console.CursorLeft = 0;
|
||||
Console.CursorTop = top;
|
||||
Console.WriteLine($"Urls checked: {checkedUrls.Count}");
|
||||
Console.WriteLine($"Urls to check: {urlsToCheck.Count}");
|
||||
}
|
||||
|
||||
Console.WriteLine("Done");
|
||||
}
|
||||
|
||||
private static void Done()
|
||||
{
|
||||
driver?.Quit();
|
||||
|
||||
Console.CursorVisible = true;
|
||||
|
||||
Console.ReadLine();
|
||||
}
|
||||
|
||||
static string GetAbsoluteUrl(string relativeUrl, string baseUrl)
|
||||
{
|
||||
var uri = new Uri(baseUrl);
|
||||
var baseUri = new Uri(uri, relativeUrl);
|
||||
return HttpUtility.HtmlDecode(baseUri.AbsoluteUri);
|
||||
}
|
||||
|
||||
private static void FindUrls(string source)
|
||||
{
|
||||
driver?.Navigate().GoToUrl(source);
|
||||
|
||||
// wait for the page to load
|
||||
WebDriverWait wait = new(driver, TimeSpan.FromSeconds(2));
|
||||
wait.Until(d => d.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]")).Count > 0 ||
|
||||
d.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]")).Count > 0 ||
|
||||
d.FindElements(By.XPath("//h1/*/*[@class='what']")).Count > 0
|
||||
);
|
||||
|
||||
HtmlDocument doc = new();
|
||||
doc.LoadHtml(driver?.PageSource);
|
||||
var root = doc.DocumentNode;
|
||||
|
||||
if (source.Contains("course.html"))
|
||||
{
|
||||
CreateLecture(source, root);
|
||||
}
|
||||
|
||||
if (!refreshLectures)
|
||||
{
|
||||
FindPathLinks(source, root);
|
||||
FindCourseLinks(source, root);
|
||||
}
|
||||
|
||||
RefreshScrapedLink(source);
|
||||
RemoveLinkToScrape(source);
|
||||
db.SaveChanges();
|
||||
|
||||
checkedUrls.Add(source);
|
||||
urlsToCheck.Remove(source);
|
||||
}
|
||||
|
||||
private static void RefreshScrapedLink(string source)
|
||||
{
|
||||
var scrapedLink = db.ScrapedLinks.Find(source);
|
||||
if (scrapedLink == null)
|
||||
{
|
||||
scrapedLink = new ScrapedLink { Url = source };
|
||||
db.ScrapedLinks.Add(scrapedLink);
|
||||
}
|
||||
scrapedLink.LastScrape = DateTime.Now;
|
||||
}
|
||||
|
||||
private static void FindCourseLinks(string source, HtmlNode root)
|
||||
{
|
||||
var courseLinkParents = root.SelectNodes("//a[starts-with(@href, 'course.html')]/..");
|
||||
if (courseLinkParents != null)
|
||||
{
|
||||
foreach (var parent in courseLinkParents)
|
||||
{
|
||||
if (parent.SelectSingleNode("abbr[contains(@title,'Vorlesung')]") != null)
|
||||
{
|
||||
var link = parent.SelectSingleNode("a[contains(@href, 'course.html')]");
|
||||
|
||||
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||
|
||||
if (!checkedUrls.Contains(url))
|
||||
{
|
||||
db.LinksToScrape.Add(new LinkToScrape { Url = url, Sort = 0 });
|
||||
urlsToCheck.Insert(0, url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void FindPathLinks(string source, HtmlNode root)
|
||||
{
|
||||
var pathLinks = root.SelectNodes("//a[starts-with(@href, 'vvz_sub.html')]");
|
||||
|
||||
if (pathLinks != null)
|
||||
{
|
||||
foreach (var link in pathLinks)
|
||||
{
|
||||
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||
|
||||
if (!checkedUrls.Contains(url))
|
||||
{
|
||||
int sort = 1;
|
||||
AddLinkToScrape(url, sort);
|
||||
urlsToCheck.Add(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void RemoveLinkToScrape(string url)
|
||||
{
|
||||
var link = db.LinksToScrape.Find(url);
|
||||
if (link != null)
|
||||
{
|
||||
db.LinksToScrape.Remove(link);
|
||||
}
|
||||
}
|
||||
|
||||
private static void AddLinkToScrape(string url, int sort)
|
||||
{
|
||||
var link = db.LinksToScrape.Find(url);
|
||||
if (link == null)
|
||||
{
|
||||
db.LinksToScrape.Add(new LinkToScrape { Url = url, Sort = sort });
|
||||
}
|
||||
}
|
||||
|
||||
private static void CreateLecture(string source, HtmlNode root)
|
||||
{
|
||||
var branch = root.SelectSingleNode("/html/body/main/div[1]/div[1]/a");
|
||||
var what = root.SelectSingleNode("//h1/*/*[@class='what']");
|
||||
var when = root.SelectSingleNode("//h1/*/*[@class='when']");
|
||||
var info = root.SelectSingleNode("//*[@class='info list']");
|
||||
var events = root.SelectNodes("//ul[@class='classes events list']/li");
|
||||
|
||||
var uri = new Uri(source);
|
||||
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
|
||||
Lecture? lecture = db.Lectures.FirstOrDefault(db => db.Url == source);
|
||||
|
||||
if (lecture == null)
|
||||
{
|
||||
lecture = new Lecture();
|
||||
|
||||
if (query.AllKeys.Contains("lv"))
|
||||
{
|
||||
lecture.Id = int.Parse(query["lv"] ?? "0");
|
||||
}
|
||||
|
||||
if (when != null)
|
||||
{
|
||||
lecture.Semester = when.InnerText;
|
||||
}
|
||||
|
||||
lecture.Url = source;
|
||||
|
||||
db.Lectures.Add(lecture);
|
||||
}
|
||||
|
||||
if (branch != null)
|
||||
{
|
||||
lecture.Branch = branch.InnerText;
|
||||
}
|
||||
|
||||
if (what != null)
|
||||
{
|
||||
lecture.Title = what.InnerText;
|
||||
}
|
||||
|
||||
if (info != null)
|
||||
{
|
||||
lecture.Description = info.InnerHtml;
|
||||
}
|
||||
|
||||
if (events != null)
|
||||
{
|
||||
CreateLectureEvents(events, lecture);
|
||||
}
|
||||
}
|
||||
|
||||
private static void CreateLectureEvents(HtmlNodeCollection events, Lecture lecture)
|
||||
{
|
||||
int year = int.Parse(lecture.Semester[..4]);
|
||||
|
||||
db.RemoveRange(lecture.Events);
|
||||
lecture.Events.Clear();
|
||||
foreach (var item in events)
|
||||
{
|
||||
LectureEvent lectureEvent = new();
|
||||
|
||||
var day = item.SelectSingleNode("*[@class='date']");
|
||||
var time = item.SelectSingleNode("*[@class='time']");
|
||||
var room = item.SelectSingleNode("*[@class='room']");
|
||||
DateTime date = new();
|
||||
if (day != null)
|
||||
{
|
||||
date = DateTime.ParseExact(day.InnerText + year.ToString(), "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
if (time != null)
|
||||
{
|
||||
var text = time.InnerText;
|
||||
|
||||
var times = text.Split(" - ");
|
||||
|
||||
var from = TimeSpan.ParseExact(times[0], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||
var to = TimeSpan.ParseExact(times[1], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||
|
||||
lectureEvent.From = date.Add(from);
|
||||
lectureEvent.To = date.Add(to);
|
||||
}
|
||||
|
||||
if (room != null)
|
||||
{
|
||||
lectureEvent.Location = room.InnerText;
|
||||
}
|
||||
|
||||
lecture.Events.Add(lectureEvent);
|
||||
}
|
||||
#if DEBUG
|
||||
config.PropagateExceptions();
|
||||
config.ValidateExamples();
|
||||
#endif
|
||||
});
|
||||
app.Run(args);
|
||||
}
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"profiles": {
|
||||
"UWScraper": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "-r"
|
||||
"commandLineArgs": "-s ABC"
|
||||
}
|
||||
}
|
||||
}
|
||||
109
UWScraper/ScrapeCommand.cs
Normal file
109
UWScraper/ScrapeCommand.cs
Normal file
@ -0,0 +1,109 @@
|
||||
using System.ComponentModel;
|
||||
using System.Text.RegularExpressions;
|
||||
using Spectre.Console;
|
||||
using Spectre.Console.Cli;
|
||||
|
||||
namespace UWScraper
|
||||
{
|
||||
internal sealed partial class ScrapeCommand : Command<ScrapeCommand.Settings>
|
||||
{
|
||||
public partial class Settings : CommandSettings
|
||||
{
|
||||
[Description("Rescrape all lectures in the data base.")]
|
||||
[CommandOption("-r|--refresh-lectures")]
|
||||
public bool RefreshLectures { get; set; }
|
||||
|
||||
[Description("The timeout for the scraping one page in seconds. (default = 2 seconds)")]
|
||||
[CommandOption("-t|--timeout")]
|
||||
[DefaultValue((uint)2)]
|
||||
public uint Timeout { get; set; }
|
||||
|
||||
[Description("The amount of hours after which a url is rescraped. (default = 24 hours)")]
|
||||
[CommandOption("-h|--rescrape-hours")]
|
||||
[DefaultValue(24d)]
|
||||
public double RescrapeHours { get; set; }
|
||||
|
||||
[Description("The start url for the scraping.")]
|
||||
[CommandOption("-u|--url")]
|
||||
[DefaultValue("https://ufind.univie.ac.at/de/vvz.html")]
|
||||
public string StartUrl { get; set; } = "https://ufind.univie.ac.at/de/vvz.html";
|
||||
|
||||
|
||||
[Description("The path to the database file. Supports relative paths (default = lectures.db)")]
|
||||
[CommandOption("-p|--path")]
|
||||
[DefaultValue("lecture.db")]
|
||||
public string DatabasePath { get; set; } = "lecture.db";
|
||||
|
||||
[Description("The semester for which the lectures should be scraped. \"all\" for all semester (default = current semester) ")]
|
||||
[CommandOption("-s|--semester")]
|
||||
[DefaultValue(null)]
|
||||
public string Semester { get; set; } = DateTime.Now.Year + (DateTime.Now.Month >= 10 ? "W": "S");
|
||||
|
||||
|
||||
[Description("Clear all links to scrape.")]
|
||||
[CommandOption("-c|--clear-links")]
|
||||
[DefaultValue(false)]
|
||||
public bool ClearLinksToScrape { get; set; }
|
||||
|
||||
public override ValidationResult Validate()
|
||||
{
|
||||
Regex semesterRegex = SemesterRegex();
|
||||
|
||||
if (!semesterRegex.IsMatch(Semester))
|
||||
{
|
||||
return ValidationResult.Error($"Semester {Semester} is not valid. Please use the format \"all\" or \"YYYY[S|W]\".");
|
||||
}
|
||||
|
||||
if (!Uri.IsWellFormedUriString(StartUrl, UriKind.Absolute))
|
||||
{
|
||||
return ValidationResult.Error($"The start url {StartUrl} is not a valid url.");
|
||||
}
|
||||
|
||||
return ValidationResult.Success();
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"^(all|\d{4}[S|W])$")]
|
||||
private static partial Regex SemesterRegex();
|
||||
}
|
||||
|
||||
public override int Execute(CommandContext context, Settings settings)
|
||||
{
|
||||
AnsiConsole.Progress()
|
||||
.Columns(
|
||||
new TaskDescriptionColumn(),
|
||||
new ValuesColumn(),
|
||||
new ProgressBarColumn(),
|
||||
new PercentageColumn(),
|
||||
new RemainingTimeColumn()
|
||||
)
|
||||
.Start(ctx =>
|
||||
{
|
||||
|
||||
Scraper.Init += (e) =>
|
||||
{
|
||||
var task = ctx.AddTask("Scraped urls", true, e.ToScrape + e.Scraped);
|
||||
task.Value(e.Scraped);
|
||||
|
||||
Scraper.Scraped += (e) =>
|
||||
{
|
||||
if (!task.IsStarted)
|
||||
{
|
||||
task.StartTask();
|
||||
}
|
||||
|
||||
task.MaxValue(e.ToScrape + e.Scraped);
|
||||
|
||||
task.Value(e.Scraped);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Scraper.Scrape(settings);
|
||||
});
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
449
UWScraper/Scraper.cs
Normal file
449
UWScraper/Scraper.cs
Normal file
@ -0,0 +1,449 @@
|
||||
using System.Globalization;
|
||||
using HtmlAgilityPack;
|
||||
using System.Web;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using OpenQA.Selenium.Support.UI;
|
||||
using UWLib;
|
||||
using OpenQA.Selenium;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using System.Text;
|
||||
using System.IO;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.EntityFrameworkCore.Design;
|
||||
using System.Reflection;
|
||||
using System.ComponentModel;
|
||||
|
||||
namespace UWScraper
|
||||
{
|
||||
|
||||
internal static class Scraper
|
||||
{
|
||||
public class ScrapedEventArgs : EventArgs
|
||||
{
|
||||
public int Scraped { get; set; }
|
||||
public int ToScrape { get; set; }
|
||||
}
|
||||
|
||||
readonly static List<string> checkedUrls = [];
|
||||
readonly static List<string> urlsToCheck = [];
|
||||
|
||||
static bool refreshLectures = false;
|
||||
static uint timeout = 2;
|
||||
static string semester = "";
|
||||
|
||||
static LectureContext db = new("lecture.db");
|
||||
static readonly ChromeDriver? driver = null;
|
||||
|
||||
public delegate void ScrapedEventHandler(ScrapedEventArgs e);
|
||||
public delegate void InitEventHandler();
|
||||
|
||||
public static event ScrapedEventHandler? Scraped;
|
||||
|
||||
public static event ScrapedEventHandler? Init;
|
||||
|
||||
static internal void QuitDriver()
|
||||
{
|
||||
driver?.Quit();
|
||||
}
|
||||
|
||||
static Scraper()
|
||||
{
|
||||
var service = ChromeDriverService.CreateDefaultService();
|
||||
service.HideCommandPromptWindow = true;
|
||||
|
||||
var options = new ChromeOptions();
|
||||
//options.AddArgument("--headless");
|
||||
driver = new ChromeDriver(service, options);
|
||||
}
|
||||
|
||||
static internal void Scrape(ScrapeCommand.Settings settings)
|
||||
{
|
||||
db = new LectureContext(settings.DatabasePath);
|
||||
|
||||
Scraper.timeout = settings.Timeout;
|
||||
Scraper.refreshLectures = settings.RefreshLectures;
|
||||
|
||||
if (settings.ClearLinksToScrape)
|
||||
{
|
||||
db.LinksToScrape.RemoveRange(db.LinksToScrape.ToList());
|
||||
db.SaveChanges();
|
||||
}
|
||||
|
||||
if (settings.Semester != null)
|
||||
{
|
||||
semester = settings.Semester;
|
||||
}
|
||||
|
||||
if (!refreshLectures)
|
||||
{
|
||||
urlsToCheck.AddRange([.. db.LinksToScrape.OrderBy(x => x.Sort).Select(x => x.Url)]);
|
||||
|
||||
//add all urls that were checked in the last 24 hours
|
||||
checkedUrls.AddRange([.. db.ScrapedLinks.Where(x => x.LastScrape > DateTime.Now.AddDays(-settings.RescrapeHours)).Select(x => x.Url)]);
|
||||
|
||||
//remove all checked urls from the urls to check
|
||||
foreach (var url in checkedUrls)
|
||||
{
|
||||
urlsToCheck.Remove(url);
|
||||
}
|
||||
|
||||
if (urlsToCheck.Count == 0)
|
||||
{
|
||||
// add the first url to check / Vorlesungsverzeichnis
|
||||
var defaultValueAttr = settings.GetType().GetCustomAttribute<DefaultValueAttribute>();
|
||||
|
||||
if (defaultValueAttr != null && settings.StartUrl == defaultValueAttr.Value?.ToString())
|
||||
{
|
||||
urlsToCheck.Add($"{settings.StartUrl}+?semester={semester}");
|
||||
}
|
||||
else
|
||||
{
|
||||
urlsToCheck.Add(settings.StartUrl);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
urlsToCheck.AddRange([.. db.Lectures.Select(x => x.Url)]);
|
||||
}
|
||||
|
||||
Console.CursorVisible = false;
|
||||
|
||||
int top = Console.CursorTop;
|
||||
|
||||
Init?.Invoke(new ScrapedEventArgs() { Scraped = checkedUrls.Count, ToScrape = urlsToCheck.Count });
|
||||
|
||||
while (urlsToCheck.Count > 0)
|
||||
{
|
||||
var url = urlsToCheck.First();
|
||||
|
||||
ScrapeUrl(url);
|
||||
|
||||
Scraped?.Invoke(new ScrapedEventArgs() { Scraped = checkedUrls.Count, ToScrape = urlsToCheck.Count });
|
||||
}
|
||||
}
|
||||
|
||||
static string GetAbsoluteUrl(string relativeUrl, string baseUrl)
|
||||
{
|
||||
var uri = new Uri(baseUrl);
|
||||
var baseUri = new Uri(uri, relativeUrl);
|
||||
|
||||
var absolute = HttpUtility.HtmlDecode(baseUri.AbsoluteUri);
|
||||
|
||||
var sanitized = SanitizeUrl(absolute);
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
public static string SanitizeUrl(string url)
|
||||
{
|
||||
string[] filter = ["from", "to", "details"];
|
||||
|
||||
Uri uri = new(url);
|
||||
|
||||
url = uri.GetLeftPart(UriPartial.Path);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(uri.Query))
|
||||
{
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
|
||||
StringBuilder builder = new();
|
||||
|
||||
builder.Append('?');
|
||||
foreach (var item in query.AllKeys.Where(x => !filter.Contains(x)).OrderBy(x => x))
|
||||
{
|
||||
builder.Append($"{item}={query[item]}");
|
||||
builder.Append('&');
|
||||
}
|
||||
|
||||
builder.Length--;
|
||||
|
||||
var sortedQuery = builder.ToString();
|
||||
|
||||
url += sortedQuery;
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
private static void ScrapeUrl(string source)
|
||||
{
|
||||
Navigate(source);
|
||||
|
||||
RefreshScrapedLink(source);
|
||||
RemoveLinkToScrape(source);
|
||||
db.SaveChanges();
|
||||
|
||||
checkedUrls.Add(source);
|
||||
urlsToCheck.Remove(source);
|
||||
}
|
||||
|
||||
private static void Navigate(string source)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
||||
var s = GetSemesterOfUrl(source);
|
||||
|
||||
if (!string.IsNullOrEmpty(s) && semester != "all" && semester != s)
|
||||
{
|
||||
return;
|
||||
}
|
||||
driver?.Navigate().GoToUrl(source);
|
||||
|
||||
// wait for the page to load
|
||||
WebDriverWait wait = new(driver, TimeSpan.FromSeconds(timeout));
|
||||
wait.Until(d => d.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]")).Count > 0 ||
|
||||
d.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]")).Count > 0 ||
|
||||
d.FindElements(By.XPath("//h1/*/*[@class='what']")).Count > 0
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
HtmlDocument doc = new();
|
||||
doc.LoadHtml(driver?.PageSource);
|
||||
var root = doc.DocumentNode;
|
||||
|
||||
var semesterNode = root.SelectSingleNode("/html/body/main/nav/span[@class='current']");
|
||||
|
||||
if (semesterNode != null)
|
||||
{
|
||||
s = semesterNode.InnerText;
|
||||
if (!string.IsNullOrEmpty(s) && semester != "all" && semester != s)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (source.Contains("course.html"))
|
||||
{
|
||||
CreateLecture(source, root);
|
||||
}
|
||||
|
||||
if (!refreshLectures)
|
||||
{
|
||||
FindPathLinks(source, root);
|
||||
FindCourseLinks(source, root);
|
||||
}
|
||||
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetSemesterOfUrl(string source)
|
||||
{
|
||||
Uri uri = new(source);
|
||||
|
||||
if (uri.Query.Contains("semester"))
|
||||
{
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
|
||||
if (query.AllKeys.Contains("semester"))
|
||||
{
|
||||
return query["semester"] ?? string.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
private static void RefreshScrapedLink(string source)
|
||||
{
|
||||
var scrapedLink = db.ScrapedLinks.Find(source);
|
||||
if (scrapedLink == null)
|
||||
{
|
||||
scrapedLink = new ScrapedLink { Url = source };
|
||||
db.ScrapedLinks.Add(scrapedLink);
|
||||
}
|
||||
scrapedLink.LastScrape = DateTime.Now;
|
||||
}
|
||||
|
||||
private static void FindCourseLinks(string source, HtmlNode root)
|
||||
{
|
||||
var courseLinkParents = root.SelectNodes("//a[starts-with(@href, 'course.html')]/..");
|
||||
if (courseLinkParents != null)
|
||||
{
|
||||
foreach (var parent in courseLinkParents)
|
||||
{
|
||||
var link = parent.SelectSingleNode("a[contains(@href, 'course.html')]");
|
||||
|
||||
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||
|
||||
if (!checkedUrls.Contains(url) && !urlsToCheck.Contains(url))
|
||||
{
|
||||
AddLinkToScrape(url, 0);
|
||||
urlsToCheck.Insert(0, url);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void FindPathLinks(string source, HtmlNode root)
|
||||
{
|
||||
var pathLinks = root.SelectNodes("//a[starts-with(@href, 'vvz_sub.html')]");
|
||||
|
||||
if (pathLinks != null)
|
||||
{
|
||||
foreach (var link in pathLinks)
|
||||
{
|
||||
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||
|
||||
if (!checkedUrls.Contains(url) && !urlsToCheck.Contains(url))
|
||||
{
|
||||
AddLinkToScrape(url, 1);
|
||||
urlsToCheck.Add(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void RemoveLinkToScrape(string url)
|
||||
{
|
||||
var link = db.LinksToScrape.Find(url);
|
||||
if (link != null)
|
||||
{
|
||||
db.LinksToScrape.Remove(link);
|
||||
}
|
||||
}
|
||||
|
||||
private static void AddLinkToScrape(string url, int sort)
|
||||
{
|
||||
var link = db.LinksToScrape.Find(url);
|
||||
if (link == null)
|
||||
{
|
||||
db.LinksToScrape.Add(new LinkToScrape { Url = url, Sort = sort });
|
||||
}
|
||||
}
|
||||
|
||||
private static void CreateLecture(string source, HtmlNode root)
|
||||
{
|
||||
//ingoreLectureTypes
|
||||
|
||||
var branch = root.SelectSingleNode("//*[@class='spl']");
|
||||
var number = root.SelectSingleNode("//*[@class='title']//*[@class='number']");
|
||||
var type = root.SelectSingleNode("//*[@class='title']//*[@class='type']");
|
||||
var what = root.SelectSingleNode("//*[@class='title']//*[@class='what']");
|
||||
var when = root.SelectSingleNode("//*[@class='title']//*[@class='when']");
|
||||
var info = root.SelectSingleNode("//*[@class='info list']");
|
||||
var events = root.SelectNodes("//ul[@class='classes events list']/li");
|
||||
|
||||
var uri = new Uri(source);
|
||||
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
|
||||
int id = 0;
|
||||
string semester = string.Empty;
|
||||
|
||||
if (number != null)
|
||||
{
|
||||
id = int.Parse(number.InnerText);
|
||||
}
|
||||
if (query.AllKeys.Contains("lv"))
|
||||
{
|
||||
id = int.Parse(query["lv"] ?? "0");
|
||||
}
|
||||
|
||||
if (when != null)
|
||||
{
|
||||
semester = when.InnerText;
|
||||
}
|
||||
else if (query.AllKeys.Contains("semester"))
|
||||
{
|
||||
semester = query["semester"] ?? "";
|
||||
}
|
||||
|
||||
Lecture? lecture = db.Lectures.Include(x => x.Events).FirstOrDefault(db => db.Id == id && db.Semester == semester);
|
||||
|
||||
if (lecture == null)
|
||||
{
|
||||
lecture = new Lecture
|
||||
{
|
||||
Id = id,
|
||||
|
||||
Semester = semester
|
||||
};
|
||||
|
||||
db.Lectures.Add(lecture);
|
||||
}
|
||||
|
||||
lecture.Url = source;
|
||||
|
||||
if (branch != null)
|
||||
{
|
||||
lecture.Branch = branch.InnerText;
|
||||
}
|
||||
|
||||
if (what != null)
|
||||
{
|
||||
lecture.Title = what.InnerText;
|
||||
}
|
||||
|
||||
if (info != null)
|
||||
{
|
||||
lecture.Description = info.InnerHtml;
|
||||
}
|
||||
|
||||
if (type != null)
|
||||
{
|
||||
lecture.Type = type.GetAttributeValue("title", "");
|
||||
}
|
||||
|
||||
if (events != null)
|
||||
{
|
||||
CreateLectureEvents(events, lecture);
|
||||
}
|
||||
}
|
||||
|
||||
private static void CreateLectureEvents(HtmlNodeCollection events, Lecture lecture)
|
||||
{
|
||||
int year = int.Parse(lecture.Semester[..4]);
|
||||
|
||||
db.RemoveRange(lecture.Events);
|
||||
lecture.Events.Clear();
|
||||
foreach (var item in events)
|
||||
{
|
||||
LectureEvent lectureEvent = new() { Lecture = lecture };
|
||||
|
||||
var day = item.SelectSingleNode("*[@class='date']");
|
||||
var time = item.SelectSingleNode("*[@class='time']");
|
||||
var room = item.SelectSingleNode("*[@class='room']");
|
||||
DateTime date = new();
|
||||
if (day != null)
|
||||
{
|
||||
date = DateTime.ParseExact(day.InnerText + year.ToString(), "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
if (time != null)
|
||||
{
|
||||
var text = time.InnerText;
|
||||
|
||||
var times = text.Split(" - ");
|
||||
|
||||
var from = TimeSpan.ParseExact(times[0], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||
var to = TimeSpan.ParseExact(times[1], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||
|
||||
lectureEvent.From = date.Add(from);
|
||||
lectureEvent.To = date.Add(to);
|
||||
}
|
||||
|
||||
if (room != null)
|
||||
{
|
||||
lectureEvent.Location = room.InnerText;
|
||||
}
|
||||
|
||||
lecture.Events.Add(lectureEvent);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -14,6 +14,8 @@
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.25.0" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.49.1" />
|
||||
<PackageReference Include="Spectre.Console.Cli" Version="0.49.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
13
UWScraper/ValuesColumn.cs
Normal file
13
UWScraper/ValuesColumn.cs
Normal file
@ -0,0 +1,13 @@
|
||||
using Spectre.Console;
|
||||
using Spectre.Console.Rendering;
|
||||
|
||||
namespace UWScraper
|
||||
{
|
||||
internal class ValuesColumn : ProgressColumn
|
||||
{
|
||||
public override IRenderable Render(RenderOptions options, ProgressTask task, TimeSpan deltaTime)
|
||||
{
|
||||
return new Text($"{task.Value} / {task.MaxValue}", Style.Plain).RightJustified();
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user