182 lines
5.3 KiB
C#
182 lines
5.3 KiB
C#
|
|
using System.Globalization;
|
|
using System.Net.NetworkInformation;
|
|
using System.Web;
|
|
using System.Xml.Linq;
|
|
using OpenQA.Selenium;
|
|
using OpenQA.Selenium.BiDi.Modules.Script;
|
|
using OpenQA.Selenium.Chrome;
|
|
using OpenQA.Selenium.Support.UI;
|
|
using UWLib;
|
|
using static Microsoft.EntityFrameworkCore.DbLoggerCategory;
|
|
|
|
|
|
internal class Program
|
|
{
|
|
|
|
static List<string> checkedUrls = new List<string>();
|
|
static List<string> urlsToCheck = new List<string>();
|
|
|
|
static LectureContext db = new LectureContext();
|
|
static IWebDriver driver = null;
|
|
private static void Main(string[] args)
|
|
{
|
|
var options = new ChromeOptions();
|
|
//options.AddArgument("--headless");
|
|
driver = new ChromeDriver(options);
|
|
|
|
checkedUrls.AddRange(db.Lectures.Select(x => x.Url).ToList());
|
|
|
|
urlsToCheck.Add("https://ufind.univie.ac.at/de/vvz.html");
|
|
|
|
while (urlsToCheck.Count > 0)
|
|
{
|
|
var url = urlsToCheck.First();
|
|
|
|
FindUrls(url);
|
|
}
|
|
}
|
|
|
|
private static void FindUrls(string source)
|
|
{
|
|
if (checkedUrls.Contains(source))
|
|
{
|
|
urlsToCheck.Remove(source);
|
|
return;
|
|
}
|
|
|
|
if (db.Lectures.Any(x=>x.Url == source))
|
|
{
|
|
checkedUrls.Add(source);
|
|
urlsToCheck.Remove(source);
|
|
return;
|
|
}
|
|
|
|
|
|
driver.Navigate().GoToUrl(source);
|
|
WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(2));
|
|
wait.Until(d => d.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]")).Count > 0 ||
|
|
d.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]")).Count > 0 ||
|
|
d.FindElements(By.XPath("//h1/*/*[@class='what']")).Count > 0
|
|
);
|
|
|
|
|
|
if (source.Contains("course.html"))
|
|
{
|
|
Lecture lecture = new Lecture();
|
|
|
|
lecture.Url = source;
|
|
|
|
var uri = new Uri(source);
|
|
|
|
var query = HttpUtility.ParseQueryString(uri.Query);
|
|
|
|
if (query.AllKeys.Contains("lv"))
|
|
{
|
|
lecture.Id = int.Parse(query["lv"]);
|
|
}
|
|
|
|
int year = DateTime.Now.Year;
|
|
|
|
|
|
var what = driver.FindElements(By.XPath("//h1/*/*[@class='what']"));
|
|
var when = driver.FindElements(By.XPath("//h1/*/*[@class='when']"));
|
|
var info = driver.FindElements(By.XPath("//*[@class='info list']"));
|
|
var events = driver.FindElements(By.XPath("//ul[@class='classes events list']/li"));
|
|
|
|
if (what.Count > 0)
|
|
{
|
|
lecture.Title = what.First().Text;
|
|
}
|
|
|
|
|
|
if (when.Count > 0)
|
|
{
|
|
lecture.Semester = when.First().Text;
|
|
year = int.Parse(when.First().Text.Substring(0, 4));
|
|
}
|
|
|
|
if (info.Count > 0)
|
|
{
|
|
lecture.Description = info.First().GetAttribute("innerHTML");
|
|
}
|
|
|
|
foreach (var item in events)
|
|
{
|
|
LectureEvent lectureEvent = new LectureEvent();
|
|
|
|
var day = item.FindElements(By.XPath("*[@class='date']"));
|
|
var time = item.FindElements(By.XPath("*[@class='time']"));
|
|
var room = item.FindElements(By.XPath("*[@class='room']"));
|
|
DateTime date = new DateTime();
|
|
if (day.Count > 0)
|
|
{
|
|
date = DateTime.ParseExact(day.First().Text + year.ToString(), "dd.MM.yyyy", CultureInfo.InvariantCulture);
|
|
}
|
|
|
|
if (time.Count > 0)
|
|
{
|
|
var text = time.First().Text;
|
|
|
|
var times = text.Split(" - ");
|
|
|
|
var from = TimeSpan.ParseExact(times[0], "hh\\:mm", CultureInfo.InvariantCulture);
|
|
var to = TimeSpan.ParseExact(times[1], "hh\\:mm", CultureInfo.InvariantCulture);
|
|
|
|
lectureEvent.From = date.Add(from);
|
|
lectureEvent.To = date.Add(to);
|
|
}
|
|
|
|
if (room.Count > 0)
|
|
{
|
|
lectureEvent.Location = room.First().Text;
|
|
}
|
|
|
|
lecture.Events.Add(lectureEvent);
|
|
}
|
|
|
|
try
|
|
{
|
|
db.Lectures.Add(lecture);
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
}
|
|
|
|
db.SaveChanges();
|
|
}
|
|
|
|
var pathLinks = driver.FindElements(By.XPath("//a[starts-with(@href, 'vvz_sub.html')]"));
|
|
foreach (var link in pathLinks)
|
|
{
|
|
var url = link.GetAttribute("href");
|
|
|
|
if (!checkedUrls.Contains(url))
|
|
{
|
|
urlsToCheck.Add(link.GetAttribute("href"));
|
|
}
|
|
}
|
|
|
|
var courseLinkParents = driver.FindElements(By.XPath("//a[starts-with(@href, 'course.html')]/.."));
|
|
|
|
foreach (var parent in courseLinkParents)
|
|
{
|
|
if (parent.FindElements(By.XPath("abbr[@title='Vorlesung']")).Count > 0)
|
|
{
|
|
var link = parent.FindElement(By.XPath("a[starts-with(@href, 'course.html')]"));
|
|
|
|
var url = link.GetAttribute("href");
|
|
|
|
if (!checkedUrls.Contains(url))
|
|
{
|
|
urlsToCheck.Insert(0, link.GetAttribute("href"));
|
|
}
|
|
}
|
|
}
|
|
|
|
// driver.Close();
|
|
|
|
checkedUrls.Add(source);
|
|
urlsToCheck.Remove(source);
|
|
}
|
|
} |