#!/usr/bin/perl

use strict;
use warnings;
use Term::Screen;
use Switch;
use List::Util qw[min max];
use POSIX;

#Init map 12x5: Le caractère définit l'élément présent dans la case
# d -> case dangereuse
# ■ -> mur
# r -> récompense
# . -> case vide
my @map = (
         [ ".", ".", ".", ".", "d", ".", ".", ".", ".", ".", ".", "." ],
         [ "d", ".", ".", ".", "■", ".", ".", "d", ".", ".", ".", "." ],
         [ ".", ".", "d", ".", ".", "■", ".", ".", ".", "■", ".", "r" ],
         [ ".", ".", ".", ".", ".", ".", ".", ".", ".", ".", "■", "■" ],
         [ ".", ".", ".", ".", "d", ".", ".", ".", ".", "d", ".", "." ],
);

#output csv
my $csvout = "qlearn.csv";

#Map équivalente pour les Q-values
my @qmap = (
         [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
         [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
         [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
         [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
         [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
); 

#Pos. initiale [y][x]
my $xbot = 0;
my $ybot = 4;
my $loop = 1;
my $wins = 0; 
my $winsminus1 = 0;
my $moves = 0;

#Coefs λ et ϵ
my $apprentissage = 1;
my $exploration = 1;

my $scr = Term::Screen->new();
unless ($scr) { die " Erreur à l'initialisation de l'affichage.\n"; }

$scr->clrscr();

open (FILE, ">> $csvout") || die "Erreur à l'ouverture du CSV.\n";
print FILE "Deplacements;Apprentissage;Exploration\n";
close(FILE);

#Loop principal
while (1)
{
&calculatePosition(\@map, \@qmap, $xbot, $ybot, $apprentissage, $exploration, $wins);
&printMap($scr, \@map, \@qmap, $xbot, $ybot);

$scr->at(14,0)->puts("Apprentissage: ".$apprentissage);
$scr->at(15,0)->puts("Exploration: ".$exploration);
$scr->at(16,0)->puts("Step ".$loop);
$scr->at(16,20)->puts("Arrivée atteinte ".$wins." fois");
$scr->at(18,0);

$moves++;
if ($wins != $winsminus1)
{
    open (FILE, ">> $csvout") || die "Erreur à l'ouverture du CSV.\n";
    print FILE "$moves;$apprentissage;$exploration\n";
    close(FILE);
    $moves = 0;
}
$winsminus1 = $wins;

$loop++;
sleep 1;
}



#calculatePosition($map, $qvalues, $botx, $boty, $appr, $explo, $wins)
sub calculatePosition
{
    my ($map, $qmap, $xbot, $ybot, $appr, $explo) = @_;

    my $maxQ = 0;
    my $nb = rand(1); #Coeff politique
    my $xold = $xbot;
    my $yold = $ybot;

    my $direction = 0; #0 = N, 1 = E, 2 = S, 3 = W

    if ($nb < $explo)
    {
        #Exploration random
        $direction = int(rand(4));
    }
    else
    {
        #Exploration greedy
        ($direction, $maxQ) = &bestQValue($qmap, $xbot, $ybot);
    }

    #Déplacement du robot selon la direction choisie
    switch ($direction) {
        case 0      { if ($ybot != 0) {$ybot--;} }
        case 1      { if ($xbot != 11){$xbot++;} }
        case 2      { if ($ybot != 4) {$ybot++;} }
        case 3      { if ($xbot != 0) {$xbot--;} }
    }

    #Verif murs
    if ($map[$ybot][$xbot] eq "■")
    {
        $xbot = $xold;
        $ybot = $yold;
    }
    else
    {
        #Màj Q-Value
        my $rec = 0;
        if ($map[$yold][$xold] eq "d")
            { $rec = -5; }

        if ($map[$yold][$xold] eq "r")
            { 
                $rec = 1000; 

                #Si objectif atteint, reset de la position du bot et incrémentation du nombre de succès
                $xbot = 0;
                $ybot = 4;
                $_[6]++;
            }

        #Q(state, action) = R(state, action) + Gamma * Max[Q(next state, all actions)]
        $qmap[$yold][$xold] = $appr * ($rec + ($explo * $maxQ)) + (1 - $appr)*$qmap[$yold][$xold];

        #Màj variables code principal
        $_[1] = $qmap;
        $_[2] = $xbot;
        $_[3] = $ybot;

        #Décrémenter coefficients
        $_[4] = 0.999 * $appr;
        $_[5] = 0.999 * $explo;
    }
    

}

#bestQValue($qmap, $xbot, $ybot)
sub bestQValue
{
    my ($qmap, $xbot, $ybot) = @_;
    
    #hash contenant les directions possibles et leurs qvalues
    my %dir;

    if ($ybot != 0) 
        { $dir{0} = $qmap[$ybot-1][$xbot]; } #north

    if ($xbot != 11) 
        { $dir{1} = $qmap[$ybot][$xbot+1]; } #east

    if ($ybot != 4) 
        { $dir{2} = $qmap[$ybot+1][$xbot]; } #south

    if ($xbot != 0)
        { $dir{3} = $qmap[$ybot][$xbot-1]; } #west

    #On itère sur l'hash pour garder les valeurs maximales
    my $maxvalue = max values %dir;
    my @keys = keys %dir;
    for my $qval (@keys) 
    {
        if ($dir{$qval} != $maxvalue)
            { delete $dir{$qval}; } #On efface la valeur si elle n'est pas une des val.maximales
    }

    #On prend une valeur au hasard sur les restantes
    @keys = keys %dir;
    my $rand = $keys[rand @keys];
    return ($rand, $dir{$rand});
    
}


#printMap($term, $map, $qvaluemap, $botx, $boty)
sub printMap
{
    my($scr, $map, $qmap, $xbot, $ybot) = @_;

    #####Dessin map
    my $x = 5;
    my $y = 5; 
    foreach my $line (@$map) 
    {   
        foreach my $cell (@$line)
        {
            $scr->at($y,$x)->puts($cell); 
            $x++;
        }
        $x = 5;
        $y++;
    }

    $scr->at(12,5);
    $scr->bold()->puts("Environnement")->normal();


    #####Dessin Q-Values
    $x = 25;
    $y = 5;
    foreach my $line (@$qmap) 
    {   
        foreach my $cell (@$line)
        {
            #arrondi supérieur
            $cell = ceil($cell);

            if ($cell == 0)
             { $scr->at($y,$x)->puts("0"); }

            if ($cell > 0)
             { $scr->at($y,$x)->puts("+"); }

            if ($cell < 0)
             { $scr->at($y,$x)->puts("-"); }

            $x = $x + 2;
        }
        $x = 25;
        $y++;
    }

    $scr->at(12,25);
    $scr->bold()->puts("Q-Values")->normal();

    #####Positionnement bot (o)
    $scr->at(5+$ybot,5+$xbot)->puts("☻");
}