package search.minimax;

import com.itextpdf.text.pdf.ColumnText;
import game.Game;
import gnu.trove.list.array.TLongArrayList;
import java.util.concurrent.ThreadLocalRandom;
import main.collections.FVector;
import main.collections.FastArrayList;
import metadata.ai.heuristics.Heuristics;
import org.apache.batik.svggen.SVGSyntax;
import other.context.Context;
import other.move.Move;
import other.state.State;
import policies.softmax.SoftmaxFromMetadataSelection;
import policies.softmax.SoftmaxPolicy;

/* loaded from: input_file:search/minimax/LazyUBFM.class */
public class LazyUBFM extends UBFM {
    private static float actionEvaluationWeight = 0.5f;
    protected SoftmaxPolicy learnedSelectionPolicy;
    boolean firstTurn;
    float estimatedHeuristicScoresRange;
    float maxActionLogit;
    float minActionLogit;
    float estimatedActionLogitRange;
    float actionLogitSum;
    float actionLogitComputations;
    float estimatedActionLogitMean;
    float maxRegisteredValue;
    float minRegisteredValue;

    public static LazyUBFM createLazyUBFM() {
        return new LazyUBFM();
    }

    public LazyUBFM() {
        this.learnedSelectionPolicy = null;
        this.maxActionLogit = Float.NEGATIVE_INFINITY;
        this.minActionLogit = Float.POSITIVE_INFINITY;
        setLearnedSelectionPolicy(new SoftmaxFromMetadataSelection(0.0d));
        this.friendlyName = "Lazy UBFM";
    }

    public LazyUBFM(Heuristics heuristics) {
        super(heuristics);
        this.learnedSelectionPolicy = null;
        this.maxActionLogit = Float.NEGATIVE_INFINITY;
        this.minActionLogit = Float.POSITIVE_INFINITY;
        setLearnedSelectionPolicy(new SoftmaxFromMetadataSelection(0.0d));
        this.friendlyName = "Lazy UBFM";
    }

    @Override // search.minimax.UBFM, other.AI
    public Move selectAction(Game game2, Context context, double d, int i, int i2) {
        Move selectAction = super.selectAction(game2, context, d, i, i2);
        this.firstTurn = false;
        this.estimatedHeuristicScoresRange = this.maxHeuristicEval - this.minHeuristicEval;
        this.estimatedActionLogitRange = this.maxActionLogit - this.minActionLogit;
        this.estimatedActionLogitMean = this.actionLogitSum / this.actionLogitComputations;
        return selectAction;
    }

    @Override // search.minimax.UBFM
    protected FVector estimateMovesValues(FastArrayList<Move> fastArrayList, Context context, int i, TLongArrayList tLongArrayList, int i2, long j) {
        State state = context.state();
        int playerToAgent = state.playerToAgent(state.mover());
        float contextValue = getContextValue(context, i, tLongArrayList, playerToAgent);
        if (this.savingSearchTreeDescription) {
            this.searchTreeOutput.append(SVGSyntax.OPEN_PARENTHESIS + stringOfNodeHashes(tLongArrayList) + SVGSyntax.COMMA + Float.toString(contextValue) + SVGSyntax.COMMA + (playerToAgent == i ? 1 : 2) + "),\n");
        }
        int size = fastArrayList.size();
        FVector fVector = new FVector(size);
        for (int i3 = 0; i3 < size; i3++) {
            float computeLogit = this.learnedSelectionPolicy.computeLogit(context, fastArrayList.get(i3));
            this.actionLogitSum += computeLogit;
            this.actionLogitComputations += 1.0f;
            this.maxActionLogit = Math.max(computeLogit, this.maxActionLogit);
            this.minActionLogit = Math.min(computeLogit, this.minActionLogit);
            fVector.set(i3, computeLogit);
        }
        if (this.firstTurn) {
            return super.estimateMovesValues(fastArrayList, context, i, tLongArrayList, i2, j);
        }
        int i4 = i == playerToAgent ? 1 : -1;
        for (int i5 = 0; i5 < size; i5++) {
            double d = 1.0d;
            if (this.debugDisplay) {
                d = ThreadLocalRandom.current().nextDouble();
                if (d < 0.05d) {
                    System.out.printf("action score is %.6g and heuristicScore is %.6g ", Float.valueOf(fVector.get(i5)), Float.valueOf(contextValue));
                }
            }
            float f = ((((actionEvaluationWeight * (fVector.get(i5) - this.estimatedActionLogitMean)) * i4) * this.estimatedHeuristicScoresRange) * 2.0f) / this.estimatedActionLogitRange;
            fVector.set(i5, contextValue + f);
            this.maxRegisteredValue = Math.max(contextValue + f, this.maxRegisteredValue);
            this.minRegisteredValue = Math.min(contextValue + f, this.minRegisteredValue);
            if (this.debugDisplay && d < 0.05d) {
                System.out.printf("-> eval is %.6g\n", Float.valueOf(fVector.get(i5)));
            }
        }
        return fVector;
    }

    @Override // search.minimax.UBFM, other.AI
    public void initAI(Game game2, int i) {
        super.initAI(game2, i);
        if (this.learnedSelectionPolicy != null) {
            this.learnedSelectionPolicy.initAI(game2, i);
        }
        this.firstTurn = true;
        this.actionLogitComputations = ColumnText.GLOBAL_SPACE_CHAR_RATIO;
        this.actionLogitSum = ColumnText.GLOBAL_SPACE_CHAR_RATIO;
        this.maxActionLogit = Float.NEGATIVE_INFINITY;
        this.minActionLogit = Float.POSITIVE_INFINITY;
        this.maxRegisteredValue = Float.NEGATIVE_INFINITY;
        this.minRegisteredValue = Float.POSITIVE_INFINITY;
    }

    @Override // search.minimax.UBFM, other.AI
    public boolean supportsGame(Game game2) {
        if (game2.isStochasticGame() || game2.hiddenInformation() || game2.hasSubgames() || !game2.isAlternatingMoveGame()) {
            return false;
        }
        return (game2.metadata().ai().features() == null && game2.metadata().ai().trainedFeatureTrees() == null) ? false : true;
    }

    @Override // search.minimax.UBFM
    public double scoreToValueEst(float f, float f2, float f3) {
        if (f <= f2 + 10.0f) {
            return -1.0d;
        }
        if (f >= f3 - 10.0f) {
            return 1.0d;
        }
        this.minRegisteredValue = Math.min(this.minRegisteredValue, this.minHeuristicEval);
        this.maxRegisteredValue = Math.max(this.maxRegisteredValue, this.maxHeuristicEval);
        return (-0.8d) + (1.6d * ((f - this.minRegisteredValue) / (this.maxRegisteredValue - this.minRegisteredValue)));
    }

    public void setLearnedSelectionPolicy(SoftmaxPolicy softmaxPolicy) {
        this.learnedSelectionPolicy = softmaxPolicy;
    }

    public static void setActionEvaluationWeight(float f) {
        actionEvaluationWeight = f;
    }
}
