Performance update with HTMLAgilityPack
Added Branches Added LinksToScrape Added ScrapedLinks
This commit is contained in:
parent
824df98750
commit
03ddeba846
@ -25,6 +25,7 @@ namespace UWLecturePlan.Controllers
|
|||||||
model.LectureEvents = db.LectureEvents.Include(x=>x.Lecture)
|
model.LectureEvents = db.LectureEvents.Include(x=>x.Lecture)
|
||||||
.Where(x => x.From >= from)
|
.Where(x => x.From >= from)
|
||||||
.Where(x => x.Lecture.Semester == model.CurrentSemester)
|
.Where(x => x.Lecture.Semester == model.CurrentSemester)
|
||||||
|
.Where(x => x.Lecture.Branch == model.BranchFilter || model.BranchFilter == null)
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
if (model.LocationFilter != null)
|
if (model.LocationFilter != null)
|
||||||
@ -33,6 +34,8 @@ namespace UWLecturePlan.Controllers
|
|||||||
.Where(x => x.Location.Contains(model.LocationFilter)).ToList();
|
.Where(x => x.Location.Contains(model.LocationFilter)).ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model.Branches = db.LectureEvents.Select(x => x.Lecture.Branch).Distinct().OrderBy(x=>x).ToList();
|
||||||
|
|
||||||
|
|
||||||
return View(model);
|
return View(model);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,8 +7,12 @@ namespace UWLecturePlan.Models
|
|||||||
|
|
||||||
public string CurrentSemester { get; set; }
|
public string CurrentSemester { get; set; }
|
||||||
|
|
||||||
public string LocationFilter { get; set; }
|
public string? LocationFilter { get; set; }
|
||||||
|
|
||||||
public List<LectureEvent> LectureEvents { get; set; }
|
public string? BranchFilter { get; set; }
|
||||||
|
|
||||||
|
public List<LectureEvent> LectureEvents { get; set; }
|
||||||
|
|
||||||
|
public List<string> Branches { get; set; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,21 @@
|
|||||||
@model LecturesViewModel
|
@using System.Text.RegularExpressions
|
||||||
|
@model LecturesViewModel
|
||||||
|
@{
|
||||||
|
string GetBranchName(string branch)
|
||||||
|
{
|
||||||
|
Regex regex = new Regex(@".* - (.*)");
|
||||||
|
|
||||||
|
var match = regex.Match(branch);
|
||||||
|
if (match.Success)
|
||||||
|
{
|
||||||
|
return match.Groups[1].Value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return branch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
@ -30,38 +47,46 @@
|
|||||||
<form method="get" >
|
<form method="get" >
|
||||||
Semester: <input type="text" asp-for="CurrentSemester" style="width:3.5em;text-align:center;" maxlength="5" />
|
Semester: <input type="text" asp-for="CurrentSemester" style="width:3.5em;text-align:center;" maxlength="5" />
|
||||||
Ort: <input type="text" asp-for="LocationFilter" />
|
Ort: <input type="text" asp-for="LocationFilter" />
|
||||||
|
Studiengang:
|
||||||
|
<select type="text" asp-for="BranchFilter" >
|
||||||
|
<option value="">Alle</option>
|
||||||
|
@foreach (var branch in Model.Branches.OrderBy(x => GetBranchName(x)))
|
||||||
|
{
|
||||||
|
<option value="@branch">
|
||||||
|
@GetBranchName(branch)
|
||||||
|
</option>
|
||||||
|
}
|
||||||
|
</select>
|
||||||
<input type="submit" value="Filter" />
|
<input type="submit" value="Filter" />
|
||||||
|
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
|
||||||
@foreach (var day in Model.LectureEvents.GroupBy(x => x.From.Date).OrderBy(x => x.Key))
|
@foreach (var day in Model.LectureEvents.GroupBy(x => x.From.Date).OrderBy(x => x.Key))
|
||||||
{
|
{
|
||||||
<details class="day">
|
<details class="day">
|
||||||
<summary>@day.Key.ToString("dd.MM.yyyy") - (@day.Count())</summary>
|
<summary>@day.Key.ToString("dd.MM.yyyy") - (@day.Count())</summary>
|
||||||
@foreach (var time in day.GroupBy(x => x.From).OrderBy(x => x.Key))
|
@foreach (var time in day.GroupBy(x => x.From).OrderBy(x => x.Key))
|
||||||
{
|
{
|
||||||
<details class="time">
|
<details class="time">
|
||||||
<summary>@time.Key.ToString("HH:mm") - (@time.Count())</summary>
|
<summary>@time.Key.ToString("HH:mm") - (@time.Count())</summary>
|
||||||
@foreach (var eventItem in time.OrderBy(x=>x.Lecture.Title))
|
@foreach (var eventItem in time.OrderBy(x => x.Lecture.Title))
|
||||||
{
|
{
|
||||||
<details class="lecture">
|
<details class="lecture">
|
||||||
<summary class="title">@eventItem.Lecture.Title</summary>
|
<summary class="title">@eventItem.Lecture.Title</summary>
|
||||||
|
|
||||||
<div class="time">Zeitraum: @eventItem.From.ToString("HH:mm") - @eventItem.To.ToString("HH:mm")</div>
|
<div class="time">Zeitraum: @eventItem.From.ToString("HH:mm") - @eventItem.To.ToString("HH:mm")</div>
|
||||||
<div class="room">Ort: @eventItem.Location</div>
|
<div class="room">Ort: @eventItem.Location</div>
|
||||||
|
|
||||||
<a href="@eventItem.Lecture.Url">@eventItem.Lecture.Url</a>
|
<a href="@eventItem.Lecture.Url">@eventItem.Lecture.Url</a>
|
||||||
<details>
|
<details>
|
||||||
<summary>Infos</summary>
|
<summary>Infos</summary>
|
||||||
@Html.Raw(eventItem.Lecture.Description);
|
@Html.Raw(eventItem.Lecture.Description);
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
}
|
}
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
}
|
}
|
||||||
</details>
|
</details>
|
||||||
}
|
}
|
||||||
@ -12,11 +12,11 @@ namespace UWLib
|
|||||||
public class Lecture
|
public class Lecture
|
||||||
{
|
{
|
||||||
public int Id { get; set; }
|
public int Id { get; set; }
|
||||||
|
|
||||||
public string Semester { get; set; }
|
public string Semester { get; set; }
|
||||||
public string Title { get; set; }
|
public string Title { get; set; }
|
||||||
public string Url { get; set; }
|
public string Url { get; set; }
|
||||||
public string? Description { get; set; }
|
public string? Description { get; set; }
|
||||||
public List<LectureEvent> Events { get; set; } = new List<LectureEvent>();
|
public List<LectureEvent> Events { get; set; } = new List<LectureEvent>();
|
||||||
|
public string Branch { get; set; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,6 +12,10 @@ namespace UWLib
|
|||||||
public DbSet<Lecture> Lectures { get; set; }
|
public DbSet<Lecture> Lectures { get; set; }
|
||||||
public DbSet<LectureEvent> LectureEvents { get; set; }
|
public DbSet<LectureEvent> LectureEvents { get; set; }
|
||||||
|
|
||||||
|
public DbSet<ScrapedLink> ScrapedLinks { get; set; }
|
||||||
|
|
||||||
|
public DbSet<LinkToScrape> LinksToScrape { get; set; }
|
||||||
|
|
||||||
|
|
||||||
public string DbPath { get; }
|
public string DbPath { get; }
|
||||||
|
|
||||||
@ -19,7 +23,7 @@ namespace UWLib
|
|||||||
public LectureContext(string path)
|
public LectureContext(string path)
|
||||||
{
|
{
|
||||||
DbPath = path;
|
DbPath = path;
|
||||||
this.Database.Migrate();
|
Database.Migrate();
|
||||||
}
|
}
|
||||||
|
|
||||||
public LectureContext()
|
public LectureContext()
|
||||||
@ -29,7 +33,7 @@ namespace UWLib
|
|||||||
DbPath = System.IO.Path.Join(path, "lecture.db");
|
DbPath = System.IO.Path.Join(path, "lecture.db");
|
||||||
DbPath = "lecture.db";
|
DbPath = "lecture.db";
|
||||||
|
|
||||||
this.Database.Migrate();
|
Database.Migrate();
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following configures EF to create a Sqlite database file in the
|
// The following configures EF to create a Sqlite database file in the
|
||||||
|
|||||||
16
UWLib/LinkToScrape.cs
Normal file
16
UWLib/LinkToScrape.cs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace UWLib
|
||||||
|
{
|
||||||
|
public class LinkToScrape
|
||||||
|
{
|
||||||
|
[Key]
|
||||||
|
public string Url { get; set; }
|
||||||
|
public int Sort { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
108
UWLib/Migrations/20241012131142_ScrapedLinks.Designer.cs
generated
Normal file
108
UWLib/Migrations/20241012131142_ScrapedLinks.Designer.cs
generated
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
// <auto-generated />
|
||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||||
|
using UWLib;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
[DbContext(typeof(LectureContext))]
|
||||||
|
[Migration("20241012131142_ScrapedLinks")]
|
||||||
|
partial class ScrapedLinks
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void BuildTargetModel(ModelBuilder modelBuilder)
|
||||||
|
{
|
||||||
|
#pragma warning disable 612, 618
|
||||||
|
modelBuilder.HasAnnotation("ProductVersion", "8.0.10");
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("Semester")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Description")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Title")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id", "Semester");
|
||||||
|
|
||||||
|
b.ToTable("Lectures");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<DateTime>("From")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<int>("LectureId")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("LectureSemester")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Location")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("To")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id");
|
||||||
|
|
||||||
|
b.HasIndex("LectureId", "LectureSemester");
|
||||||
|
|
||||||
|
b.ToTable("LectureEvents");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.ScrapedLink", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("LastScrape")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("ScrapedLinks");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("UWLib.Lecture", "Lecture")
|
||||||
|
.WithMany("Events")
|
||||||
|
.HasForeignKey("LectureId", "LectureSemester")
|
||||||
|
.OnDelete(DeleteBehavior.Cascade)
|
||||||
|
.IsRequired();
|
||||||
|
|
||||||
|
b.Navigation("Lecture");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Navigation("Events");
|
||||||
|
});
|
||||||
|
#pragma warning restore 612, 618
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
34
UWLib/Migrations/20241012131142_ScrapedLinks.cs
Normal file
34
UWLib/Migrations/20241012131142_ScrapedLinks.cs
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class ScrapedLinks : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "ScrapedLinks",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
Url = table.Column<string>(type: "TEXT", nullable: false),
|
||||||
|
LastScrape = table.Column<DateTime>(type: "TEXT", nullable: false)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("PK_ScrapedLinks", x => x.Url);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "ScrapedLinks");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
112
UWLib/Migrations/20241012131329_Branch.Designer.cs
generated
Normal file
112
UWLib/Migrations/20241012131329_Branch.Designer.cs
generated
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
// <auto-generated />
|
||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||||
|
using UWLib;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
[DbContext(typeof(LectureContext))]
|
||||||
|
[Migration("20241012131329_Branch")]
|
||||||
|
partial class Branch
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void BuildTargetModel(ModelBuilder modelBuilder)
|
||||||
|
{
|
||||||
|
#pragma warning disable 612, 618
|
||||||
|
modelBuilder.HasAnnotation("ProductVersion", "8.0.10");
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("Semester")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Branch")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Description")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Title")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id", "Semester");
|
||||||
|
|
||||||
|
b.ToTable("Lectures");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<DateTime>("From")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<int>("LectureId")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("LectureSemester")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Location")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("To")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id");
|
||||||
|
|
||||||
|
b.HasIndex("LectureId", "LectureSemester");
|
||||||
|
|
||||||
|
b.ToTable("LectureEvents");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.ScrapedLink", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("LastScrape")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("ScrapedLinks");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("UWLib.Lecture", "Lecture")
|
||||||
|
.WithMany("Events")
|
||||||
|
.HasForeignKey("LectureId", "LectureSemester")
|
||||||
|
.OnDelete(DeleteBehavior.Cascade)
|
||||||
|
.IsRequired();
|
||||||
|
|
||||||
|
b.Navigation("Lecture");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Navigation("Events");
|
||||||
|
});
|
||||||
|
#pragma warning restore 612, 618
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
29
UWLib/Migrations/20241012131329_Branch.cs
Normal file
29
UWLib/Migrations/20241012131329_Branch.cs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class Branch : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.AddColumn<string>(
|
||||||
|
name: "Branch",
|
||||||
|
table: "Lectures",
|
||||||
|
type: "TEXT",
|
||||||
|
nullable: false,
|
||||||
|
defaultValue: "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropColumn(
|
||||||
|
name: "Branch",
|
||||||
|
table: "Lectures");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
125
UWLib/Migrations/20241012140426_LinksToScrape.Designer.cs
generated
Normal file
125
UWLib/Migrations/20241012140426_LinksToScrape.Designer.cs
generated
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
// <auto-generated />
|
||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||||
|
using UWLib;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
[DbContext(typeof(LectureContext))]
|
||||||
|
[Migration("20241012140426_LinksToScrape")]
|
||||||
|
partial class LinksToScrape
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void BuildTargetModel(ModelBuilder modelBuilder)
|
||||||
|
{
|
||||||
|
#pragma warning disable 612, 618
|
||||||
|
modelBuilder.HasAnnotation("ProductVersion", "8.0.10");
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("Semester")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Branch")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Description")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Title")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id", "Semester");
|
||||||
|
|
||||||
|
b.ToTable("Lectures");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<DateTime>("From")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<int>("LectureId")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.Property<string>("LectureSemester")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Location")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("To")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Id");
|
||||||
|
|
||||||
|
b.HasIndex("LectureId", "LectureSemester");
|
||||||
|
|
||||||
|
b.ToTable("LectureEvents");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LinkToScrape", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<int>("Sort")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("LinksToScrape");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.ScrapedLink", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("LastScrape")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("ScrapedLinks");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("UWLib.Lecture", "Lecture")
|
||||||
|
.WithMany("Events")
|
||||||
|
.HasForeignKey("LectureId", "LectureSemester")
|
||||||
|
.OnDelete(DeleteBehavior.Cascade)
|
||||||
|
.IsRequired();
|
||||||
|
|
||||||
|
b.Navigation("Lecture");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.Lecture", b =>
|
||||||
|
{
|
||||||
|
b.Navigation("Events");
|
||||||
|
});
|
||||||
|
#pragma warning restore 612, 618
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
33
UWLib/Migrations/20241012140426_LinksToScrape.cs
Normal file
33
UWLib/Migrations/20241012140426_LinksToScrape.cs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace UWLib.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class LinksToScrape : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "LinksToScrape",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
Url = table.Column<string>(type: "TEXT", nullable: false),
|
||||||
|
Sort = table.Column<int>(type: "INTEGER", nullable: false)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("PK_LinksToScrape", x => x.Url);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "LinksToScrape");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -25,6 +25,10 @@ namespace UWLib.Migrations
|
|||||||
b.Property<string>("Semester")
|
b.Property<string>("Semester")
|
||||||
.HasColumnType("TEXT");
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<string>("Branch")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
b.Property<string>("Description")
|
b.Property<string>("Description")
|
||||||
.HasColumnType("TEXT");
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
@ -71,6 +75,32 @@ namespace UWLib.Migrations
|
|||||||
b.ToTable("LectureEvents");
|
b.ToTable("LectureEvents");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.LinkToScrape", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<int>("Sort")
|
||||||
|
.HasColumnType("INTEGER");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("LinksToScrape");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("UWLib.ScrapedLink", b =>
|
||||||
|
{
|
||||||
|
b.Property<string>("Url")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.Property<DateTime>("LastScrape")
|
||||||
|
.HasColumnType("TEXT");
|
||||||
|
|
||||||
|
b.HasKey("Url");
|
||||||
|
|
||||||
|
b.ToTable("ScrapedLinks");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
modelBuilder.Entity("UWLib.LectureEvent", b =>
|
||||||
{
|
{
|
||||||
b.HasOne("UWLib.Lecture", "Lecture")
|
b.HasOne("UWLib.Lecture", "Lecture")
|
||||||
|
|||||||
17
UWLib/ScrapedLink.cs
Normal file
17
UWLib/ScrapedLink.cs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace UWLib
|
||||||
|
{
|
||||||
|
public class ScrapedLink
|
||||||
|
{
|
||||||
|
[Key]
|
||||||
|
public string Url{ get; set; }
|
||||||
|
|
||||||
|
public DateTime LastScrape { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,11 +1,15 @@
|
|||||||
|
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.Net.NetworkInformation;
|
using System.Net.NetworkInformation;
|
||||||
|
using System.Text.Encodings.Web;
|
||||||
using System.Web;
|
using System.Web;
|
||||||
using System.Xml.Linq;
|
using System.Xml.Linq;
|
||||||
|
using HtmlAgilityPack;
|
||||||
using OpenQA.Selenium;
|
using OpenQA.Selenium;
|
||||||
using OpenQA.Selenium.BiDi.Modules.Script;
|
using OpenQA.Selenium.BiDi.Modules.Script;
|
||||||
using OpenQA.Selenium.Chrome;
|
using OpenQA.Selenium.Chrome;
|
||||||
|
using OpenQA.Selenium.DevTools.V127.Target;
|
||||||
using OpenQA.Selenium.Support.UI;
|
using OpenQA.Selenium.Support.UI;
|
||||||
using UWLib;
|
using UWLib;
|
||||||
using static Microsoft.EntityFrameworkCore.DbLoggerCategory;
|
using static Microsoft.EntityFrameworkCore.DbLoggerCategory;
|
||||||
@ -13,170 +17,303 @@ using static Microsoft.EntityFrameworkCore.DbLoggerCategory;
|
|||||||
|
|
||||||
internal class Program
|
internal class Program
|
||||||
{
|
{
|
||||||
|
readonly static List<string> checkedUrls = [];
|
||||||
|
readonly static List<string> urlsToCheck = [];
|
||||||
|
|
||||||
static List<string> checkedUrls = new List<string>();
|
static bool refreshLectures = false;
|
||||||
static List<string> urlsToCheck = new List<string>();
|
|
||||||
|
|
||||||
static LectureContext db = new LectureContext();
|
static readonly LectureContext db = new();
|
||||||
static IWebDriver driver = null;
|
static ChromeDriver? driver = null;
|
||||||
private static void Main(string[] args)
|
private static void Main(string[] args)
|
||||||
{
|
{
|
||||||
|
AppDomain.CurrentDomain.ProcessExit += (s, e) =>
|
||||||
|
{
|
||||||
|
Done();
|
||||||
|
};
|
||||||
|
|
||||||
|
var service = ChromeDriverService.CreateDefaultService();
|
||||||
|
service.HideCommandPromptWindow = true;
|
||||||
|
|
||||||
var options = new ChromeOptions();
|
var options = new ChromeOptions();
|
||||||
//options.AddArgument("--headless");
|
//options.AddArgument("--headless");
|
||||||
driver = new ChromeDriver(options);
|
driver = new ChromeDriver(service, options);
|
||||||
|
|
||||||
checkedUrls.AddRange(db.Lectures.Select(x => x.Url).ToList());
|
if (args.Any(x => x == "-r" || x == "--refresh-lectures"))
|
||||||
|
{
|
||||||
|
refreshLectures = true;
|
||||||
|
|
||||||
urlsToCheck.Add("https://ufind.univie.ac.at/de/vvz.html");
|
Console.WriteLine("Refreshing lectures");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.Length == 0)
|
||||||
|
{
|
||||||
|
urlsToCheck.AddRange([.. db.LinksToScrape.Select(x => x.Url)]);
|
||||||
|
|
||||||
|
//add all urls that were checked in the last 24 hours
|
||||||
|
checkedUrls.AddRange([.. db.ScrapedLinks.Where(x => x.LastScrape > DateTime.Now.AddDays(-1)).Select(x => x.Url)]);
|
||||||
|
|
||||||
|
//remove all checked urls from the urls to check
|
||||||
|
foreach (var url in checkedUrls)
|
||||||
|
{
|
||||||
|
urlsToCheck.Remove(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (urlsToCheck.Count == 0)
|
||||||
|
{
|
||||||
|
// add the first url to check / Vorlesungsverzeichnis
|
||||||
|
urlsToCheck.Add("https://ufind.univie.ac.at/de/vvz.html");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (refreshLectures)
|
||||||
|
{
|
||||||
|
urlsToCheck.AddRange([.. db.Lectures.Select(x => x.Url)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.CursorVisible = false;
|
||||||
|
|
||||||
|
int top = Console.CursorTop;
|
||||||
|
|
||||||
while (urlsToCheck.Count > 0)
|
while (urlsToCheck.Count > 0)
|
||||||
{
|
{
|
||||||
var url = urlsToCheck.First();
|
var url = urlsToCheck.First();
|
||||||
|
|
||||||
FindUrls(url);
|
try
|
||||||
|
{
|
||||||
|
FindUrls(url);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.Clear();
|
||||||
|
Console.WriteLine($"Url: {url}");
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine(e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.CursorLeft = 0;
|
||||||
|
Console.CursorTop = top;
|
||||||
|
Console.WriteLine($"Urls checked: {checkedUrls.Count}");
|
||||||
|
Console.WriteLine($"Urls to check: {urlsToCheck.Count}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("Done");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Done()
|
||||||
|
{
|
||||||
|
driver?.Quit();
|
||||||
|
|
||||||
|
Console.CursorVisible = true;
|
||||||
|
|
||||||
|
Console.ReadLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
static string GetAbsoluteUrl(string relativeUrl, string baseUrl)
|
||||||
|
{
|
||||||
|
var uri = new Uri(baseUrl);
|
||||||
|
var baseUri = new Uri(uri, relativeUrl);
|
||||||
|
return HttpUtility.HtmlDecode(baseUri.AbsoluteUri);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void FindUrls(string source)
|
private static void FindUrls(string source)
|
||||||
{
|
{
|
||||||
if (checkedUrls.Contains(source))
|
driver?.Navigate().GoToUrl(source);
|
||||||
{
|
|
||||||
urlsToCheck.Remove(source);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (db.Lectures.Any(x=>x.Url == source))
|
// wait for the page to load
|
||||||
{
|
WebDriverWait wait = new(driver, TimeSpan.FromSeconds(2));
|
||||||
checkedUrls.Add(source);
|
|
||||||
urlsToCheck.Remove(source);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
driver.Navigate().GoToUrl(source);
|
|
||||||
WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(2));
|
|
||||||
wait.Until(d => d.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]")).Count > 0 ||
|
wait.Until(d => d.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]")).Count > 0 ||
|
||||||
d.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]")).Count > 0 ||
|
d.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]")).Count > 0 ||
|
||||||
d.FindElements(By.XPath("//h1/*/*[@class='what']")).Count > 0
|
d.FindElements(By.XPath("//h1/*/*[@class='what']")).Count > 0
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HtmlDocument doc = new();
|
||||||
|
doc.LoadHtml(driver?.PageSource);
|
||||||
|
var root = doc.DocumentNode;
|
||||||
|
|
||||||
if (source.Contains("course.html"))
|
if (source.Contains("course.html"))
|
||||||
{
|
{
|
||||||
Lecture lecture = new Lecture();
|
CreateLecture(source, root);
|
||||||
|
|
||||||
lecture.Url = source;
|
|
||||||
|
|
||||||
var uri = new Uri(source);
|
|
||||||
|
|
||||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
|
||||||
|
|
||||||
if (query.AllKeys.Contains("lv"))
|
|
||||||
{
|
|
||||||
lecture.Id = int.Parse(query["lv"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
int year = DateTime.Now.Year;
|
|
||||||
|
|
||||||
|
|
||||||
var what = driver.FindElements(By.XPath("//h1/*/*[@class='what']"));
|
|
||||||
var when = driver.FindElements(By.XPath("//h1/*/*[@class='when']"));
|
|
||||||
var info = driver.FindElements(By.XPath("//*[@class='info list']"));
|
|
||||||
var events = driver.FindElements(By.XPath("//ul[@class='classes events list']/li"));
|
|
||||||
|
|
||||||
if (what.Count > 0)
|
|
||||||
{
|
|
||||||
lecture.Title = what.First().Text;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (when.Count > 0)
|
|
||||||
{
|
|
||||||
lecture.Semester = when.First().Text;
|
|
||||||
year = int.Parse(when.First().Text.Substring(0, 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.Count > 0)
|
|
||||||
{
|
|
||||||
lecture.Description = info.First().GetAttribute("innerHTML");
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (var item in events)
|
|
||||||
{
|
|
||||||
LectureEvent lectureEvent = new LectureEvent();
|
|
||||||
|
|
||||||
var day = item.FindElements(By.XPath("*[@class='date']"));
|
|
||||||
var time = item.FindElements(By.XPath("*[@class='time']"));
|
|
||||||
var room = item.FindElements(By.XPath("*[@class='room']"));
|
|
||||||
DateTime date = new DateTime();
|
|
||||||
if (day.Count > 0)
|
|
||||||
{
|
|
||||||
date = DateTime.ParseExact(day.First().Text + year.ToString(), "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (time.Count > 0)
|
|
||||||
{
|
|
||||||
var text = time.First().Text;
|
|
||||||
|
|
||||||
var times = text.Split(" - ");
|
|
||||||
|
|
||||||
var from = TimeSpan.ParseExact(times[0], "hh\\:mm", CultureInfo.InvariantCulture);
|
|
||||||
var to = TimeSpan.ParseExact(times[1], "hh\\:mm", CultureInfo.InvariantCulture);
|
|
||||||
|
|
||||||
lectureEvent.From = date.Add(from);
|
|
||||||
lectureEvent.To = date.Add(to);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (room.Count > 0)
|
|
||||||
{
|
|
||||||
lectureEvent.Location = room.First().Text;
|
|
||||||
}
|
|
||||||
|
|
||||||
lecture.Events.Add(lectureEvent);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
db.Lectures.Add(lecture);
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
db.SaveChanges();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var pathLinks = driver.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]"));
|
if (!refreshLectures)
|
||||||
foreach (var link in pathLinks)
|
|
||||||
{
|
{
|
||||||
var url = link.GetAttribute("href");
|
FindPathLinks(source, root);
|
||||||
|
FindCourseLinks(source, root);
|
||||||
if (!checkedUrls.Contains(url))
|
|
||||||
{
|
|
||||||
urlsToCheck.Add(link.GetAttribute("href"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var courseLinkParents = driver.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]/.."));
|
RefreshScrapedLink(source);
|
||||||
|
RemoveLinkToScrape(source);
|
||||||
foreach (var parent in courseLinkParents)
|
db.SaveChanges();
|
||||||
{
|
|
||||||
if (parent.FindElements(By.XPath("abbr[@title='Vorlesung']")).Count > 0)
|
|
||||||
{
|
|
||||||
var link = parent.FindElement(By.XPath("a[starts-with(@href, 'course.html')]"));
|
|
||||||
|
|
||||||
var url = link.GetAttribute("href");
|
|
||||||
|
|
||||||
if (!checkedUrls.Contains(url))
|
|
||||||
{
|
|
||||||
urlsToCheck.Insert(0, link.GetAttribute("href"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// driver.Close();
|
|
||||||
|
|
||||||
checkedUrls.Add(source);
|
checkedUrls.Add(source);
|
||||||
urlsToCheck.Remove(source);
|
urlsToCheck.Remove(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void RefreshScrapedLink(string source)
|
||||||
|
{
|
||||||
|
var scrapedLink = db.ScrapedLinks.Find(source);
|
||||||
|
if (scrapedLink == null)
|
||||||
|
{
|
||||||
|
scrapedLink = new ScrapedLink { Url = source };
|
||||||
|
db.ScrapedLinks.Add(scrapedLink);
|
||||||
|
}
|
||||||
|
scrapedLink.LastScrape = DateTime.Now;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void FindCourseLinks(string source, HtmlNode root)
|
||||||
|
{
|
||||||
|
var courseLinkParents = root.SelectNodes("//a[starts-with(@href, 'course.html')]/..");
|
||||||
|
if (courseLinkParents != null)
|
||||||
|
{
|
||||||
|
foreach (var parent in courseLinkParents)
|
||||||
|
{
|
||||||
|
if (parent.SelectSingleNode("abbr[contains(@title,'Vorlesung')]") != null)
|
||||||
|
{
|
||||||
|
var link = parent.SelectSingleNode("a[contains(@href, 'course.html')]");
|
||||||
|
|
||||||
|
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||||
|
|
||||||
|
if (!checkedUrls.Contains(url))
|
||||||
|
{
|
||||||
|
db.LinksToScrape.Add(new LinkToScrape { Url = url, Sort = 0 });
|
||||||
|
urlsToCheck.Insert(0, url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void FindPathLinks(string source, HtmlNode root)
|
||||||
|
{
|
||||||
|
var pathLinks = root.SelectNodes("//a[starts-with(@href, 'vvz_sub.html')]");
|
||||||
|
|
||||||
|
if (pathLinks != null)
|
||||||
|
{
|
||||||
|
foreach (var link in pathLinks)
|
||||||
|
{
|
||||||
|
var url = GetAbsoluteUrl(link.GetAttributeValue("href", ""), source);
|
||||||
|
|
||||||
|
if (!checkedUrls.Contains(url))
|
||||||
|
{
|
||||||
|
int sort = 1;
|
||||||
|
AddLinkToScrape(url, sort);
|
||||||
|
urlsToCheck.Add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void RemoveLinkToScrape(string url)
|
||||||
|
{
|
||||||
|
var link = db.LinksToScrape.Find(url);
|
||||||
|
if (link != null)
|
||||||
|
{
|
||||||
|
db.LinksToScrape.Remove(link);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void AddLinkToScrape(string url, int sort)
|
||||||
|
{
|
||||||
|
var link = db.LinksToScrape.Find(url);
|
||||||
|
if (link == null)
|
||||||
|
{
|
||||||
|
db.LinksToScrape.Add(new LinkToScrape { Url = url, Sort = sort });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void CreateLecture(string source, HtmlNode root)
|
||||||
|
{
|
||||||
|
var branch = root.SelectSingleNode("/html/body/main/div[1]/div[1]/a");
|
||||||
|
var what = root.SelectSingleNode("//h1/*/*[@class='what']");
|
||||||
|
var when = root.SelectSingleNode("//h1/*/*[@class='when']");
|
||||||
|
var info = root.SelectSingleNode("//*[@class='info list']");
|
||||||
|
var events = root.SelectNodes("//ul[@class='classes events list']/li");
|
||||||
|
|
||||||
|
var uri = new Uri(source);
|
||||||
|
|
||||||
|
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||||
|
|
||||||
|
Lecture? lecture = db.Lectures.FirstOrDefault(db => db.Url == source);
|
||||||
|
|
||||||
|
if (lecture == null)
|
||||||
|
{
|
||||||
|
lecture = new Lecture();
|
||||||
|
|
||||||
|
if (query.AllKeys.Contains("lv"))
|
||||||
|
{
|
||||||
|
lecture.Id = int.Parse(query["lv"] ?? "0");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (when != null)
|
||||||
|
{
|
||||||
|
lecture.Semester = when.InnerText;
|
||||||
|
}
|
||||||
|
|
||||||
|
lecture.Url = source;
|
||||||
|
|
||||||
|
db.Lectures.Add(lecture);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (branch != null)
|
||||||
|
{
|
||||||
|
lecture.Branch = branch.InnerText;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (what != null)
|
||||||
|
{
|
||||||
|
lecture.Title = what.InnerText;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info != null)
|
||||||
|
{
|
||||||
|
lecture.Description = info.InnerHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (events != null)
|
||||||
|
{
|
||||||
|
CreateLectureEvents(events, lecture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void CreateLectureEvents(HtmlNodeCollection events, Lecture lecture)
|
||||||
|
{
|
||||||
|
int year = int.Parse(lecture.Semester[..4]);
|
||||||
|
|
||||||
|
db.RemoveRange(lecture.Events);
|
||||||
|
lecture.Events.Clear();
|
||||||
|
foreach (var item in events)
|
||||||
|
{
|
||||||
|
LectureEvent lectureEvent = new();
|
||||||
|
|
||||||
|
var day = item.SelectSingleNode("*[@class='date']");
|
||||||
|
var time = item.SelectSingleNode("*[@class='time']");
|
||||||
|
var room = item.SelectSingleNode("*[@class='room']");
|
||||||
|
DateTime date = new();
|
||||||
|
if (day != null)
|
||||||
|
{
|
||||||
|
date = DateTime.ParseExact(day.InnerText + year.ToString(), "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (time != null)
|
||||||
|
{
|
||||||
|
var text = time.InnerText;
|
||||||
|
|
||||||
|
var times = text.Split(" - ");
|
||||||
|
|
||||||
|
var from = TimeSpan.ParseExact(times[0], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||||
|
var to = TimeSpan.ParseExact(times[1], "hh\\:mm", CultureInfo.InvariantCulture);
|
||||||
|
|
||||||
|
lectureEvent.From = date.Add(from);
|
||||||
|
lectureEvent.To = date.Add(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (room != null)
|
||||||
|
{
|
||||||
|
lectureEvent.Location = room.InnerText;
|
||||||
|
}
|
||||||
|
|
||||||
|
lecture.Events.Add(lectureEvent);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
8
UWScraper/Properties/launchSettings.json
Normal file
8
UWScraper/Properties/launchSettings.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"profiles": {
|
||||||
|
"UWScraper": {
|
||||||
|
"commandName": "Project",
|
||||||
|
"commandLineArgs": "-r"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -9,6 +9,10 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.67" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.67" />
|
||||||
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="8.0.10">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
<PackageReference Include="Selenium.WebDriver" Version="4.25.0" />
|
<PackageReference Include="Selenium.WebDriver" Version="4.25.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|||||||
BIN
UWScraper/lecture.db
Normal file
BIN
UWScraper/lecture.db
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user