using System.ComponentModel; using System.Text.RegularExpressions; using Spectre.Console; using Spectre.Console.Cli; namespace UWScraper { internal sealed partial class ScrapeCommand : Command { public partial class Settings : CommandSettings { [Description("Rescrape all lectures in the data base.")] [CommandOption("-r|--refresh-lectures")] public bool RefreshLectures { get; set; } [Description("The timeout for the scraping one page in seconds. (default = 2 seconds)")] [CommandOption("-t|--timeout")] [DefaultValue((uint)2)] public uint Timeout { get; set; } [Description("The amount of hours after which a url is rescraped. (default = 24 hours)")] [CommandOption("-h|--rescrape-hours")] [DefaultValue(24d)] public double RescrapeHours { get; set; } [Description("The start url for the scraping.")] [CommandOption("-u|--url")] [DefaultValue("https://ufind.univie.ac.at/de/vvz.html")] public string StartUrl { get; set; } = "https://ufind.univie.ac.at/de/vvz.html"; [Description("The path to the database file. Supports relative paths (default = lectures.db)")] [CommandOption("-p|--path")] [DefaultValue("lecture.db")] public string DatabasePath { get; set; } = "lecture.db"; [Description("The semester for which the lectures should be scraped. \"all\" for all semester (default = current semester) ")] [CommandOption("-s|--semester")] [DefaultValue(null)] public string Semester { get; set; } = DateTime.Now.Year + (DateTime.Now.Month >= 10 ? "W": "S"); [Description("Clear all links to scrape.")] [CommandOption("-c|--clear-links")] [DefaultValue(false)] public bool ClearLinksToScrape { get; set; } public override ValidationResult Validate() { Regex semesterRegex = SemesterRegex(); if (!semesterRegex.IsMatch(Semester)) { return ValidationResult.Error($"Semester {Semester} is not valid. Please use the format \"all\" or \"YYYY[S|W]\"."); } if (!Uri.IsWellFormedUriString(StartUrl, UriKind.Absolute)) { return ValidationResult.Error($"The start url {StartUrl} is not a valid url."); } return ValidationResult.Success(); } [GeneratedRegex(@"^(all|\d{4}[S|W])$")] private static partial Regex SemesterRegex(); } public override int Execute(CommandContext context, Settings settings) { AnsiConsole.Progress() .Columns( new TaskDescriptionColumn(), new ValuesColumn(), new ProgressBarColumn(), new PercentageColumn(), new RemainingTimeColumn() ) .Start(ctx => { Scraper.Init += (e) => { var task = ctx.AddTask("Scraped urls", true, e.ToScrape + e.Scraped); task.Value(e.Scraped); Scraper.Scraped += (e) => { if (!task.IsStarted) { task.StartTask(); } task.MaxValue(e.ToScrape + e.Scraped); task.Value(e.Scraped); }; }; Scraper.Scrape(settings); }); return 0; } } }