#!/usr/bin/perl

############################################################################
# slg 0.3-cf4c082 (https://www.fabiankeil.de/gehacktes/slick-little-girl/) #
#                                                                          #
# Lists books loaned from the Stadtbibliothek Köln and optionally          #
# extends their due dates.                                                 #
#                                                                          #
# Copyright (c) 2013-2024 Fabian Keil <fk@fabiankeil.de>                   #
#                                                                          #
# Permission to use, copy, modify, and distribute this software for any    #
# purpose with or without fee is hereby granted, provided that the above   #
# copyright notice and this permission notice appear in all copies.        #
#                                                                          #
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES #
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         #
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  #
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   #
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    #
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  #
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           #
############################################################################

use strict;
use warnings;
use Getopt::Long;

use constant {
    START_URL => 'https://katalog.stbib-koeln.de/alswww2.dll/APS_OPAC?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1',
    ACCOUNT_URL => 'https://katalog.stbib-koeln.de/alswww2.dll/APS_ACCOUNT?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1',
    BASE_URL => 'https://katalog.stbib-koeln.de/alswww2.dll/',
    PATH_GARBAGE => 'Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1',

    CURL_BINARY => 'curl',
    COOKIE_FILE => $ENV{'HOME'} . '/.cache/slg/cookies',
    USER_AGENT => 'Slick Little Girl',
    CA_CERT_BUNDLE => '',

    # The loaned media is stored on disk so following calls
    # don't have to request it again
    TEMPORARY_HTML_FILE => $ENV{'HOME'} . '/.cache/slg/loans.html',

    MAX_EXTENSIONS => '2',
    LOGIN_DATA_FILE => $ENV{'HOME'} . '/.config/slg/login-data',
    ENCRYPTED_LOGIN_DATA_FILE => $ENV{'HOME'} . '/.config/slg/login-data.gpg',
    SHOW_DEBUG_MESSAGES => 0,
};

sub get_login_parameters($$) {
    my ($object, $interlock) = @_;
    my $login_parameters = "";
    my %params = (
        BrowseAsHloc => '-1',
        Style => 'WAI',
        SubStyle => '',
        Lang => 'GER',
        ResponseEncoding => 'utf-8',
        Method => 'CheckID',
        ZonesLogin => '1',
        Interlock => $interlock,
    );

    foreach my $name (keys %params) {
        $login_parameters .= sprintf(" -d '%s=%s'", $name, $params{$name});
    }

    $login_parameters .= sprintf(" -d @%s '%s%s'", LOGIN_DATA_FILE, BASE_URL, $object);

    return $login_parameters;
}

sub bad_news($) {
    my $message = shift;
    die "\n$message\n";
}

sub die_unless_page_contains_pattern($$$) {
    my ($page_ref, $page_name, $pattern) = @_;

    foreach my $line (@{$page_ref}) {
        return if ($line =~ m@$pattern@);
    }

    open(my $fd, '>', TEMPORARY_HTML_FILE)
        or bad_news("Failed to open '" . TEMPORARY_HTML_FILE . "': $!");
    foreach my $line (@{$page_ref}) {
        print $fd $line;
    }
    close $fd;

    bad_news("'$page_name' does not contain '$pattern'. Content dumped to: " . TEMPORARY_HTML_FILE);
}

sub get_curl_command($) {
    my $parameters = shift;
    my $curl_cmd = CURL_BINARY;

    # $curl_cmd .= " -v"; # This can confuse the parser
    $curl_cmd .= " --silent --show-error";
    $curl_cmd .= sprintf(" --user-agent '%s'", USER_AGENT);
    $curl_cmd .= sprintf(" --cacert '%s'", CA_CERT_BUNDLE) if (CA_CERT_BUNDLE ne "");
    $curl_cmd .= sprintf(" --cookie-jar '%s' --cookie '%s'", COOKIE_FILE, COOKIE_FILE);
    $curl_cmd .= " $parameters 2>&1";

    return $curl_cmd;
}

sub execute_curl_command_expecting_pattern($$$) {
    my ($parameters, $page_name, $pattern) = @_;
    my @page;

    my $curl_command = get_curl_command($parameters);

    printf("Executing: %s\n", $curl_command) if (SHOW_DEBUG_MESSAGES);
    @page = `$curl_command`;
    if ($?) {
        bad_news("Failed to execute $curl_command: @page");
    }
    die_unless_page_contains_pattern(\@page, $page_name, $pattern);
    return \@page;
}

sub request_start_page() {
    my $start_page_parameters = sprintf("'%s'", START_URL);
    my $page_ref;

    $page_ref = execute_curl_command_expecting_pattern($start_page_parameters, 'start-page',
                                                       'Barrierefreier WWW-Katalog');
    return get_object_in_page($page_ref);
}

sub request_login_page($) {
    my $object = shift;
    my $page_ref;
    my $start_page_parameters =
        sprintf("'%s&Finished=%s'", ACCOUNT_URL, $object);

    $page_ref = execute_curl_command_expecting_pattern($start_page_parameters,
                                                       'start-page-2', 'Login ins Kundenkonto');
    return get_object_in_page($page_ref);
}

sub request_first_loans_page($) {
    my $object = shift;
    my $page_ref;
    # https://katalog.stbib-koeln.de/alswww4.dll/Obj_385601361385344?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&Method=ShowLoans&BrowseAsHloc=-1
    my $get_loans_parameters = sprintf("'%s%s?%s&Method=ShowLoans'",
                                       BASE_URL, $object, PATH_GARBAGE);

    $page_ref = execute_curl_command_expecting_pattern($get_loans_parameters,
                                                       'loans-page',
                                                       'Dies ist eine Liste der Medien');

    open(my $fd, '>', TEMPORARY_HTML_FILE) or bad_news("Failed to open '" . TEMPORARY_HTML_FILE . "': $!");
    print $fd "@{$page_ref}";
    close $fd;

    return get_object_in_page($page_ref);
}

sub request_next_loans_page($) {
    my $object = shift;
    my $page_ref;
    # https://katalog.stbib-koeln.de/alswww4.dll/Obj_108721363176086?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&Method=PageBrowseDown&Finished=Obj_108721363176086
    my $get_loans_parameters = sprintf("'%s%s?%s&Method=PageBrowseDown'",
                                       BASE_URL, $object, PATH_GARBAGE);

    $page_ref = execute_curl_command_expecting_pattern($get_loans_parameters,
                                                       'loans-page',
                                                       'Dies ist eine Liste der Medien');

    open(my $fd, '>>', TEMPORARY_HTML_FILE) or bad_news("Failed to open '" . TEMPORARY_HTML_FILE . "': $!");
    print $fd "@{$page_ref}";
    close $fd;

    return get_object_in_page($page_ref);
}

sub get_new_due_date($) {
    my $page_ref = shift;

    foreach my $line (@{$page_ref}) {
        if ($line =~ m@Bitte geben Sie dieses Medium spätestens am <B> (\d{2}/\d{2}/\d{4}) </B> zurück@) {
            return fix_date_format($1);
        }
    }
    bad_news("Can't find new due date. Probably the renewal failed");
}

sub request_loan_extension($) {
    my $medium = shift;
    my $page_ref;
    my $extend_loan_parameters = sprintf("'%s'", $medium->{'renew-url'});

    printf("\nRequesting a loan extension for: %s\n", $medium->{Titel});

    $page_ref = execute_curl_command_expecting_pattern($extend_loan_parameters,
                                                       'loand-extended-page',
                                                       'Verlängerung wurde abgeschlossen');

    printf("Success. New due date is: %s\n", get_new_due_date($page_ref));

    return get_object_in_page($page_ref);
}

sub get_object_in_page($) {
    my $page_ref = shift;
    my $object;

    foreach my $line (@{$page_ref}) {
        if ($line =~ m@(APS_(?:ACCOUNT|MEDIALINK|SEARCH)\?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1&Finished=(Obj_\d+))@) {
            $object = $2;
        }
    }

    bad_news("Didn't find object in: @{$page_ref}") unless (defined $object);

    return $object;
}

sub submit_login_information($) {
    my $object = shift;
    my $page_ref;
    my $login_parameters = get_login_parameters($object, $object);

    if (-f ENCRYPTED_LOGIN_DATA_FILE) {
        system("gpg", "-q", "-o", LOGIN_DATA_FILE, "--decrypt", ENCRYPTED_LOGIN_DATA_FILE) == 0
            or die "Failed to decrypt $!";
        sanity_check_login_data(LOGIN_DATA_FILE);
    }

    $page_ref = execute_curl_command_expecting_pattern($login_parameters,
                                                       'Login page 2',
                                                       'Entliehene Medien');
    if (-f ENCRYPTED_LOGIN_DATA_FILE) {
        unlink(LOGIN_DATA_FILE)
            or die "Failed to remove temporary login file: $!";
    }
    return get_object_in_page($page_ref);
}

sub show_life_sign($) {
    my $sign = shift;
    print $sign unless SHOW_DEBUG_MESSAGES;
}

sub do_login_dance() {
    my $object;

    printf("Login dance in progress.") unless SHOW_DEBUG_MESSAGES;

    $object = request_start_page();
    show_life_sign(".");

    $object = request_login_page($object);
    show_life_sign(".");

    $object = submit_login_information($object);
    show_life_sign(".\n");

    return $object;
}

sub fix_date_format($) {
    my $obsolete_date = shift;
    if ($obsolete_date =~ m@^(\d{2})/(\d{2})/(\d{4})$@) {
        my ($day, $month, $year) = ($1, $2, $3);
        return sprintf("%s-%s-%s", $year, $month, $day);
    }
    bad_news("Failed to parse date: $obsolete_date");
}

sub el_cheapo_html_decode($) {
    my $html = shift;

    $html =~ s@&Auml;@Ä@g;
    $html =~ s@&auml;@ä@g;
    $html =~ s@&Ouml;@Ö@g;
    $html =~ s@&ouml;@ö@g;
    $html =~ s@&Uuml;@Ü@g;
    $html =~ s@&uuml;@ü@g;
    $html =~ s@&szlig;@ß@g;
    $html =~ s@&amp;@&@g;
    $html =~ s@&eacute;@é@;
    $html =~ s@&lt;@<@g;
    $html =~ s@&gt;@>@g;
    $html =~ s@&quot;@"@g;
    $html =~ s@&oslash;@ø@g;

    return $html;
}

sub parse_media() {
    my $page = TEMPORARY_HTML_FILE;
    my $attribute;
    my %media;
    my $i = -1;
    my $loan_extention_prohibitted = 0;

    open (my $LOANS_FD, '<', $page) or bad_news("Can't open $page");
    while (<$LOANS_FD>) {
        if (m@Sie können derzeit keine Verlängerung durchführen.@) {
            printf("Deadlines can't be extended at this time.\n") unless ($loan_extention_prohibitted == 1);
            $loan_extention_prohibitted = 1;
        }
        if (m@<A href="(APS_CAT[^"]*)">([^<]*)</A>@){
            # <A href="APS_CAT?DB=Catalogue&BACNO=T005242086&Method=ShowCatalogueRecord&Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1&Finished=Obj_573731361558297%3FStyle%3DWAI%26SubStyle%3D%26Lang%3DGER%26ResponseEncoding%3Dutf-8">Polizei im Zwielicht</A>

            # New title detected
            if ($i >= 0) {
                unless (defined $media{$i}{'renew-url'} or
                        defined $media{$i}{'no-extension-today'} or
                        defined $media{$i}{'no-extension-ever'} or
                        $media{$i}{'Vorgemerkt'} == 1 or
                        $media{$i}{'Anzahl Verlängerungen'} == MAX_EXTENSIONS or
                        defined $media{$i}{'Fernleihe'} or
                        $loan_extention_prohibitted == 1) {
                    bad_news("Missing renewal URL for medium $i ($media{$i}{'Titel'}). Renewals: $media{$i}{'Anzahl Verlängerungen'}");
                }
            }
            $i++;

            # $media{$i}{'url'} = $1;
            $media{$i}{'Titel'} = el_cheapo_html_decode($2);
            $media{$i}{'Anzahl Verlängerungen'} = 0;
            $media{$i}{'Vorgemerkt'} = 0;
            if (m@NK-Ausw&auml;rtiger Leihverkehr@i){
                # <A href="APS_CAT?DB=Catalogue&BACNO=T00449613X&Method=ShowCatalogueRecord&Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&BrowseAsHloc=-1&Finished=Obj_78441434192341%3FStyle%3DWAI%26SubStyle%3D%26Lang%3DGER%26ResponseEncoding%3Dutf-8">NK-Ausw&auml;rtiger Leihverkehr</A>
                $media{$i}{'Fernleihe'} = 1;
            }
            next;
        }
        if (m@Medium vorgemerkt@) {
            # This medium's due date can't be extended even if
            # MAX_EXTENSIONS would otherwise allow it.
            $media{$i}{'Vorgemerkt'} = 1;
        }
        if (m@Heute (verlängert|entliehen)@) {
            # This medium's due date can't be extended today.
            $media{$i}{'no-extension-today'} = 1;
        }
        if (m@Kann nicht verlängert werden@) {
            # Some loans can't be extended for example the "Energie sparen / Testgerät"
            $media{$i}{'no-extension-ever'} = 1;
        }

        if (m@<A href="(Obj_\d*[a-z\d=%&_?-]*&Method=Renew[a-z\d=%&_?-]*)">@i){
            # <A href="Obj_573731361558297?Style=WAI&SubStyle=&Lang=GER&ResponseEncoding=utf-8&Method=Renew&Item=108618&BrowseAsHloc=-1&Finished=Obj_573731361558297%3FStyle%3DWAI%26SubStyle%3D%26Lang%3DGER%26ResponseEncoding%3Dutf-8">
            my $renewal_url = BASE_URL . $1;
            bad_news("Offending characters in renewal url: $renewal_url") unless ($renewal_url =~ m@^https://[./a-z\d=%&_?-]+$@i);
            $media{$i}{'renew-url'} = $renewal_url;
            next;
        }

        if (defined $attribute) {
            $_ =~ s@(\s*<br>)?\s*$@@;
            $_ =~ s@^\s*@@;
            $_ = el_cheapo_html_decode($_);
            $media{$i}{$attribute} = ($attribute =~ /datum$/) ? fix_date_format($_) : $_;
            $attribute = undef;
            next;
        }
        if (/<nobr>(Autor|Katalognummer|Mediennummer|Ausleihdatum|Rückgabedatum|Anzahl Verlängerungen)/) {
            $attribute = $1;
        }
    }

    return \%media;
}

sub print_spacer() {
    printf("%s\n", "-" x 50);
}

sub get_media_attributes_to_show() {
    my @attributes = (
        "Rückgabedatum",
        "Ausleihdatum",
        "Titel",
        "Autor",
        "Anzahl Verlängerungen",
        "Vorgemerkt",
#        "Katalognummer",
#        "Mediennummer",
    );
    return @attributes;
}

sub show_media($) {
    my $media = shift;
    my $k = 0;
    my %shown_media;

    unless (defined $media->{$k}) {
        print("No loans detected.\n");
        return;
    }

    print_spacer();
    while (defined $media->{$k}) {
        my $id = $media->{$k}{'Mediennummer'};
        my $media_info = "";
        foreach my $a (get_media_attributes_to_show()) {
            next if ($a eq 'renew-url');
            $media_info .= sprintf("%s='%s' ", $a, $media->{$k}{$a});
        }
        chop $media_info;
        unless (defined $shown_media{$id}) {
            $shown_media{$id} = 1;
            printf("%2d: %s\n", $k, $media_info);
        }
        $k++;
    }
    print_spacer();
}

sub sanity_check_login_data($) {
    my $file = shift;
    local($/);
    my $login_data;

    open(my $fd, '<', $file) or bad_news("Can't read login data from file $file: $!");
    $login_data = <$fd>;
    close $fd;

    unless ($login_data =~ /^BRWR=[A\d ]+&PIN=\d{4}$/) {
        printf("Unexpected content in %s. Login might not work", $file);
    }
}

sub retrieve_media_list($$) {
    my $object = shift;
    my $max_pages = shift;
    my $i = 0;

    printf("Retrieving loaned media") unless SHOW_DEBUG_MESSAGES;
    $object = request_first_loans_page($object);

    while (++$i < $max_pages) {
        show_life_sign(".");
        $object = request_next_loans_page($object);
    }
    show_life_sign(".\n");

    return $object;
}

sub expand_extendable_items(@) {
    my @extendable_items = @_;
    my @expanded_items = ();

    foreach my $format (@extendable_items) {
        if ($format =~ /^\d+$/) {
            push(@expanded_items, $format);
            next;
        } elsif ($format =~ /^(\d+)-(\d+)$/) {
            my ($begin, $end) = ($1, $2);
            if ($begin < $end) {
                foreach my $number ($begin .. $end) {
                    push(@expanded_items, $number);
                }
                next;
            }
        }
        bad_news("Invalid item to extend: $format");
    }

    return @expanded_items;
}

sub main() {
    my $object;
    my $media;
    my $resume_session = 0;
    my $max_media_pages = 1;
    my @extendable_items = ();

    GetOptions('resume-session' => \$resume_session,
               'max-media-pages=i' => \$max_media_pages,
               'extend-loan=s@' => \@extendable_items) or exit(1);

    sanity_check_login_data(LOGIN_DATA_FILE) if (-f LOGIN_DATA_FILE);

    @extendable_items = expand_extendable_items(@extendable_items);

    unless ($resume_session) {
        $object = do_login_dance();
        $object = retrieve_media_list($object, $max_media_pages);
    }

    $media = parse_media();
    show_media($media);

    foreach my $extendable_item (@extendable_items) {
        # While renewal URLs are predictable there's
        # an additional server-side check ...
        unless (defined $media->{$extendable_item}) {
            bad_news("Invalid medium number");
        }
        unless (defined $media->{$extendable_item}{'renew-url'}) {
            bad_news("Loan extension for '$media->{$extendable_item}{Titel}' isn't allowed.");
        }
        request_loan_extension($media->{$extendable_item})
    }
}

main();
