110 lines
3.9 KiB
C#
110 lines
3.9 KiB
C#
using System.ComponentModel;
|
|
using System.Text.RegularExpressions;
|
|
using Spectre.Console;
|
|
using Spectre.Console.Cli;
|
|
|
|
namespace UWScraper
|
|
{
|
|
internal sealed partial class ScrapeCommand : Command<ScrapeCommand.Settings>
|
|
{
|
|
public partial class Settings : CommandSettings
|
|
{
|
|
[Description("Rescrape all lectures in the data base.")]
|
|
[CommandOption("-r|--refresh-lectures")]
|
|
public bool RefreshLectures { get; set; }
|
|
|
|
[Description("The timeout for the scraping one page in seconds. (default = 2 seconds)")]
|
|
[CommandOption("-t|--timeout")]
|
|
[DefaultValue((uint)2)]
|
|
public uint Timeout { get; set; }
|
|
|
|
[Description("The amount of hours after which a url is rescraped. (default = 24 hours)")]
|
|
[CommandOption("-h|--rescrape-hours")]
|
|
[DefaultValue(24d)]
|
|
public double RescrapeHours { get; set; }
|
|
|
|
[Description("The start url for the scraping.")]
|
|
[CommandOption("-u|--url")]
|
|
[DefaultValue("https://ufind.univie.ac.at/de/vvz.html")]
|
|
public string StartUrl { get; set; } = "https://ufind.univie.ac.at/de/vvz.html";
|
|
|
|
|
|
[Description("The path to the database file. Supports relative paths (default = lectures.db)")]
|
|
[CommandOption("-p|--path")]
|
|
[DefaultValue("lecture.db")]
|
|
public string DatabasePath { get; set; } = "lecture.db";
|
|
|
|
[Description("The semester for which the lectures should be scraped. \"all\" for all semester (default = current semester) ")]
|
|
[CommandOption("-s|--semester")]
|
|
[DefaultValue(null)]
|
|
public string Semester { get; set; } = DateTime.Now.Year + (DateTime.Now.Month >= 10 ? "W": "S");
|
|
|
|
|
|
[Description("Clear all links to scrape.")]
|
|
[CommandOption("-c|--clear-links")]
|
|
[DefaultValue(false)]
|
|
public bool ClearLinksToScrape { get; set; }
|
|
|
|
public override ValidationResult Validate()
|
|
{
|
|
Regex semesterRegex = SemesterRegex();
|
|
|
|
if (!semesterRegex.IsMatch(Semester))
|
|
{
|
|
return ValidationResult.Error($"Semester {Semester} is not valid. Please use the format \"all\" or \"YYYY[S|W]\".");
|
|
}
|
|
|
|
if (!Uri.IsWellFormedUriString(StartUrl, UriKind.Absolute))
|
|
{
|
|
return ValidationResult.Error($"The start url {StartUrl} is not a valid url.");
|
|
}
|
|
|
|
return ValidationResult.Success();
|
|
}
|
|
|
|
[GeneratedRegex(@"^(all|\d{4}[S|W])$")]
|
|
private static partial Regex SemesterRegex();
|
|
}
|
|
|
|
public override int Execute(CommandContext context, Settings settings)
|
|
{
|
|
AnsiConsole.Progress()
|
|
.Columns(
|
|
new TaskDescriptionColumn(),
|
|
new ValuesColumn(),
|
|
new ProgressBarColumn(),
|
|
new PercentageColumn(),
|
|
new RemainingTimeColumn()
|
|
)
|
|
.Start(ctx =>
|
|
{
|
|
|
|
Scraper.Init += (e) =>
|
|
{
|
|
var task = ctx.AddTask("Scraped urls", true, e.ToScrape + e.Scraped);
|
|
task.Value(e.Scraped);
|
|
|
|
Scraper.Scraped += (e) =>
|
|
{
|
|
if (!task.IsStarted)
|
|
{
|
|
task.StartTask();
|
|
}
|
|
|
|
task.MaxValue(e.ToScrape + e.Scraped);
|
|
|
|
task.Value(e.Scraped);
|
|
};
|
|
};
|
|
|
|
|
|
Scraper.Scrape(settings);
|
|
});
|
|
|
|
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
}
|