Lectureplan/UWScraper/ScrapeCommand.cs
Robin Weichselbraun 55989e19e5 Ready for 1 release
2024-10-20 15:45:16 +02:00

110 lines
3.9 KiB
C#

using System.ComponentModel;
using System.Text.RegularExpressions;
using Spectre.Console;
using Spectre.Console.Cli;
namespace UWScraper
{
internal sealed partial class ScrapeCommand : Command<ScrapeCommand.Settings>
{
public partial class Settings : CommandSettings
{
[Description("Rescrape all lectures in the data base.")]
[CommandOption("-r|--refresh-lectures")]
public bool RefreshLectures { get; set; }
[Description("The timeout for the scraping one page in seconds. (default = 2 seconds)")]
[CommandOption("-t|--timeout")]
[DefaultValue((uint)2)]
public uint Timeout { get; set; }
[Description("The amount of hours after which a url is rescraped. (default = 24 hours)")]
[CommandOption("-h|--rescrape-hours")]
[DefaultValue(24d)]
public double RescrapeHours { get; set; }
[Description("The start url for the scraping.")]
[CommandOption("-u|--url")]
[DefaultValue("https://ufind.univie.ac.at/de/vvz.html")]
public string StartUrl { get; set; } = "https://ufind.univie.ac.at/de/vvz.html";
[Description("The path to the database file. Supports relative paths (default = lectures.db)")]
[CommandOption("-p|--path")]
[DefaultValue("lecture.db")]
public string DatabasePath { get; set; } = "lecture.db";
[Description("The semester for which the lectures should be scraped. \"all\" for all semester (default = current semester) ")]
[CommandOption("-s|--semester")]
[DefaultValue(null)]
public string Semester { get; set; } = DateTime.Now.Year + (DateTime.Now.Month >= 10 ? "W": "S");
[Description("Clear all links to scrape.")]
[CommandOption("-c|--clear-links")]
[DefaultValue(false)]
public bool ClearLinksToScrape { get; set; }
public override ValidationResult Validate()
{
Regex semesterRegex = SemesterRegex();
if (!semesterRegex.IsMatch(Semester))
{
return ValidationResult.Error($"Semester {Semester} is not valid. Please use the format \"all\" or \"YYYY[S|W]\".");
}
if (!Uri.IsWellFormedUriString(StartUrl, UriKind.Absolute))
{
return ValidationResult.Error($"The start url {StartUrl} is not a valid url.");
}
return ValidationResult.Success();
}
[GeneratedRegex(@"^(all|\d{4}[S|W])$")]
private static partial Regex SemesterRegex();
}
public override int Execute(CommandContext context, Settings settings)
{
AnsiConsole.Progress()
.Columns(
new TaskDescriptionColumn(),
new ValuesColumn(),
new ProgressBarColumn(),
new PercentageColumn(),
new RemainingTimeColumn()
)
.Start(ctx =>
{
Scraper.Init += (e) =>
{
var task = ctx.AddTask("Scraped urls", true, e.ToScrape + e.Scraped);
task.Value(e.Scraped);
Scraper.Scraped += (e) =>
{
if (!task.IsStarted)
{
task.StartTask();
}
task.MaxValue(e.ToScrape + e.Scraped);
task.Value(e.Scraped);
};
};
Scraper.Scrape(settings);
});
return 0;
}
}
}