Python port of Java dice game algorithm

I am in the process of learning Python (background in C++ and R). So after the obligatory "Hello World", I decided that my first non-trivial program would be a port of a Java implementation of the counter-factual regret minimization algorithm for a simple dice game called Liar Die [original source, Joodle online compiler].

The program runs a million simulations of the dice game and computes the optimal bluffing/calling frequencies. It does this by creating Node class instances for all decision points in the game, and keeping track of the various actions the player to move can make, as well as the expected values of those actions.

I then tried to translate this into Python as faithfully as possible:

import numpy as np



class LiarDieTrainer:

    DOUBT, ACCEPT = 0, 1



    class Node:

        u, pPlayer, pOpponent = 0.0, 0.0, 0.0



        def __init__(self, numActions):

            self.regretSum = np.zeros(numActions)

            self.strategy = np.zeros(numActions)

            self.strategySum = np.zeros(numActions)



        def getStrategy(self):            

            self.strategy = np.maximum(self.regretSum, 0)

            normalizingSum = np.sum(self.strategy)

            if normalizingSum > 0:

                self.strategy /= normalizingSum

            else:

                self.strategy.fill(1.0/len(self.strategy))

            self.strategySum += self.pPlayer * self.strategy

            return self.strategy



        def getAverageStrategy(self):

            normalizingSum = np.sum(self.strategySum)

            if normalizingSum > 0:

                self.strategySum /= normalizingSum

            else:

                self.strategySum.fill(1.0/len(self.strategySum))

            return self.strategySum



    def __init__(self, sides):

        self.sides = sides

        self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)

        for myClaim in range(sides):

            for oppClaim in range(myClaim+1, sides+1):                

                self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)

        self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)

        for oppClaim  in range(sides):

            for roll in range(1, sides+1):

                self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)



    def train(self, iterations):

        regret = np.zeros(self.sides)

        rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)

        for it in range(iterations):

            for i in range(len(rollAfterAcceptingClaim)):

                rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1



            for oppClaim in range(self.sides+1):

                if oppClaim > 0:

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.getStrategy()

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            nextNode.pPlayer += actionProb[1] * node.pPlayer

                            nextNode.pOpponent += node.pOpponent



                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.getStrategy()

                    for myClaim in range(oppClaim+1, self.sides+1):

                        nextClaimProb = actionProb[myClaim - oppClaim - 1]

                        if nextClaimProb > 0:

                            nextNode = self.responseNodes[oppClaim, myClaim]

                            nextNode.pPlayer += node.pOpponent

                            nextNode.pOpponent += nextClaimProb * node.pPlayer



            for oppClaim in reversed(range(self.sides+1)):

                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.strategy

                    node.u = 0.0

                    for myClaim in range(oppClaim+1, self.sides+1):

                        actionIndex = myClaim - oppClaim - 1

                        nextNode = self.responseNodes[oppClaim, myClaim]

                        childUtil = - nextNode.u

                        regret[actionIndex] = childUtil

                        node.u += actionProb[actionIndex] * childUtil

                    for a in range(len(actionProb)):

                        regret[a] -= node.u

                        node.regretSum[a] += node.pOpponent * regret[a]

                    node.pPlayer = node.pOpponent = 0              



                if oppClaim > 0:                    

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.strategy

                        node.u = 0.0

                        doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1

                        regret[self.DOUBT] = doubtUtil

                        node.u += actionProb[self.DOUBT] * doubtUtil

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            regret[self.ACCEPT] += nextNode.u

                            node.u += actionProb[self.ACCEPT] * nextNode.u

                        for a in range(len(actionProb)):

                            regret[a] -= node.u

                            node.regretSum[a] += node.pOpponent * regret[a]

                        node.pPlayer = node.pOpponent = 0



            if it == iterations // 2:

                for nodes in self.responseNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)

                for nodes in self.claimNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)                  



        for initialRoll in range(1, self.sides+1):

            print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))       

        print("nOld ClaimtNew ClaimtAction Probabilities")            

        for myClaim in range(self.sides):

            for oppClaim in range(myClaim+1, self.sides+1):

                print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

        print("nOld ClaimtRolltAction Probabilities")

        for oppClaim in range(self.sides):

            for roll in range(1, self.sides+1):

                print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))



trainer = LiarDieTrainer(6)

trainer.train(1000)

Working example on the Ideone online compiler (factor of 1000 less iterations, apparently Python is way slower than even Java). Unfortunately, the algorithm works by randomly throwing dice, and the Java/Python random number generators give different sequences, and the dice game may not have a unique equilibrium anyway. This means I can't directly compare the outcomes.

Questions:

how can I make my code more Pythonic?

which other idioms / coding style should I apply?

which other useful libraries (besides NumPy) could I have used for this exercise?

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

2

Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20

@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35

add a comment |

I then tried to translate this into Python as faithfully as possible:

import numpy as np



class LiarDieTrainer:

    DOUBT, ACCEPT = 0, 1



    class Node:

        u, pPlayer, pOpponent = 0.0, 0.0, 0.0



        def __init__(self, numActions):

            self.regretSum = np.zeros(numActions)

            self.strategy = np.zeros(numActions)

            self.strategySum = np.zeros(numActions)



        def getStrategy(self):            

            self.strategy = np.maximum(self.regretSum, 0)

            normalizingSum = np.sum(self.strategy)

            if normalizingSum > 0:

                self.strategy /= normalizingSum

            else:

                self.strategy.fill(1.0/len(self.strategy))

            self.strategySum += self.pPlayer * self.strategy

            return self.strategy



        def getAverageStrategy(self):

            normalizingSum = np.sum(self.strategySum)

            if normalizingSum > 0:

                self.strategySum /= normalizingSum

            else:

                self.strategySum.fill(1.0/len(self.strategySum))

            return self.strategySum



    def __init__(self, sides):

        self.sides = sides

        self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)

        for myClaim in range(sides):

            for oppClaim in range(myClaim+1, sides+1):                

                self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)

        self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)

        for oppClaim  in range(sides):

            for roll in range(1, sides+1):

                self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)



    def train(self, iterations):

        regret = np.zeros(self.sides)

        rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)

        for it in range(iterations):

            for i in range(len(rollAfterAcceptingClaim)):

                rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1



            for oppClaim in range(self.sides+1):

                if oppClaim > 0:

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.getStrategy()

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            nextNode.pPlayer += actionProb[1] * node.pPlayer

                            nextNode.pOpponent += node.pOpponent



                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.getStrategy()

                    for myClaim in range(oppClaim+1, self.sides+1):

                        nextClaimProb = actionProb[myClaim - oppClaim - 1]

                        if nextClaimProb > 0:

                            nextNode = self.responseNodes[oppClaim, myClaim]

                            nextNode.pPlayer += node.pOpponent

                            nextNode.pOpponent += nextClaimProb * node.pPlayer



            for oppClaim in reversed(range(self.sides+1)):

                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.strategy

                    node.u = 0.0

                    for myClaim in range(oppClaim+1, self.sides+1):

                        actionIndex = myClaim - oppClaim - 1

                        nextNode = self.responseNodes[oppClaim, myClaim]

                        childUtil = - nextNode.u

                        regret[actionIndex] = childUtil

                        node.u += actionProb[actionIndex] * childUtil

                    for a in range(len(actionProb)):

                        regret[a] -= node.u

                        node.regretSum[a] += node.pOpponent * regret[a]

                    node.pPlayer = node.pOpponent = 0              



                if oppClaim > 0:                    

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.strategy

                        node.u = 0.0

                        doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1

                        regret[self.DOUBT] = doubtUtil

                        node.u += actionProb[self.DOUBT] * doubtUtil

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            regret[self.ACCEPT] += nextNode.u

                            node.u += actionProb[self.ACCEPT] * nextNode.u

                        for a in range(len(actionProb)):

                            regret[a] -= node.u

                            node.regretSum[a] += node.pOpponent * regret[a]

                        node.pPlayer = node.pOpponent = 0



            if it == iterations // 2:

                for nodes in self.responseNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)

                for nodes in self.claimNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)                  



        for initialRoll in range(1, self.sides+1):

            print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))       

        print("nOld ClaimtNew ClaimtAction Probabilities")            

        for myClaim in range(self.sides):

            for oppClaim in range(myClaim+1, self.sides+1):

                print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

        print("nOld ClaimtRolltAction Probabilities")

        for oppClaim in range(self.sides):

            for roll in range(1, self.sides+1):

                print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))



trainer = LiarDieTrainer(6)

trainer.train(1000)

Questions:

how can I make my code more Pythonic?

which other idioms / coding style should I apply?

which other useful libraries (besides NumPy) could I have used for this exercise?

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

2

Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20

@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35

add a comment |

I then tried to translate this into Python as faithfully as possible:

import numpy as np



class LiarDieTrainer:

    DOUBT, ACCEPT = 0, 1



    class Node:

        u, pPlayer, pOpponent = 0.0, 0.0, 0.0



        def __init__(self, numActions):

            self.regretSum = np.zeros(numActions)

            self.strategy = np.zeros(numActions)

            self.strategySum = np.zeros(numActions)



        def getStrategy(self):            

            self.strategy = np.maximum(self.regretSum, 0)

            normalizingSum = np.sum(self.strategy)

            if normalizingSum > 0:

                self.strategy /= normalizingSum

            else:

                self.strategy.fill(1.0/len(self.strategy))

            self.strategySum += self.pPlayer * self.strategy

            return self.strategy



        def getAverageStrategy(self):

            normalizingSum = np.sum(self.strategySum)

            if normalizingSum > 0:

                self.strategySum /= normalizingSum

            else:

                self.strategySum.fill(1.0/len(self.strategySum))

            return self.strategySum



    def __init__(self, sides):

        self.sides = sides

        self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)

        for myClaim in range(sides):

            for oppClaim in range(myClaim+1, sides+1):                

                self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)

        self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)

        for oppClaim  in range(sides):

            for roll in range(1, sides+1):

                self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)



    def train(self, iterations):

        regret = np.zeros(self.sides)

        rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)

        for it in range(iterations):

            for i in range(len(rollAfterAcceptingClaim)):

                rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1



            for oppClaim in range(self.sides+1):

                if oppClaim > 0:

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.getStrategy()

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            nextNode.pPlayer += actionProb[1] * node.pPlayer

                            nextNode.pOpponent += node.pOpponent



                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.getStrategy()

                    for myClaim in range(oppClaim+1, self.sides+1):

                        nextClaimProb = actionProb[myClaim - oppClaim - 1]

                        if nextClaimProb > 0:

                            nextNode = self.responseNodes[oppClaim, myClaim]

                            nextNode.pPlayer += node.pOpponent

                            nextNode.pOpponent += nextClaimProb * node.pPlayer



            for oppClaim in reversed(range(self.sides+1)):

                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.strategy

                    node.u = 0.0

                    for myClaim in range(oppClaim+1, self.sides+1):

                        actionIndex = myClaim - oppClaim - 1

                        nextNode = self.responseNodes[oppClaim, myClaim]

                        childUtil = - nextNode.u

                        regret[actionIndex] = childUtil

                        node.u += actionProb[actionIndex] * childUtil

                    for a in range(len(actionProb)):

                        regret[a] -= node.u

                        node.regretSum[a] += node.pOpponent * regret[a]

                    node.pPlayer = node.pOpponent = 0              



                if oppClaim > 0:                    

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.strategy

                        node.u = 0.0

                        doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1

                        regret[self.DOUBT] = doubtUtil

                        node.u += actionProb[self.DOUBT] * doubtUtil

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            regret[self.ACCEPT] += nextNode.u

                            node.u += actionProb[self.ACCEPT] * nextNode.u

                        for a in range(len(actionProb)):

                            regret[a] -= node.u

                            node.regretSum[a] += node.pOpponent * regret[a]

                        node.pPlayer = node.pOpponent = 0



            if it == iterations // 2:

                for nodes in self.responseNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)

                for nodes in self.claimNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)                  



        for initialRoll in range(1, self.sides+1):

            print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))       

        print("nOld ClaimtNew ClaimtAction Probabilities")            

        for myClaim in range(self.sides):

            for oppClaim in range(myClaim+1, self.sides+1):

                print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

        print("nOld ClaimtRolltAction Probabilities")

        for oppClaim in range(self.sides):

            for roll in range(1, self.sides+1):

                print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))



trainer = LiarDieTrainer(6)

trainer.train(1000)

Questions:

how can I make my code more Pythonic?

which other idioms / coding style should I apply?

which other useful libraries (besides NumPy) could I have used for this exercise?

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

I then tried to translate this into Python as faithfully as possible:

import numpy as np



class LiarDieTrainer:

    DOUBT, ACCEPT = 0, 1



    class Node:

        u, pPlayer, pOpponent = 0.0, 0.0, 0.0



        def __init__(self, numActions):

            self.regretSum = np.zeros(numActions)

            self.strategy = np.zeros(numActions)

            self.strategySum = np.zeros(numActions)



        def getStrategy(self):            

            self.strategy = np.maximum(self.regretSum, 0)

            normalizingSum = np.sum(self.strategy)

            if normalizingSum > 0:

                self.strategy /= normalizingSum

            else:

                self.strategy.fill(1.0/len(self.strategy))

            self.strategySum += self.pPlayer * self.strategy

            return self.strategy



        def getAverageStrategy(self):

            normalizingSum = np.sum(self.strategySum)

            if normalizingSum > 0:

                self.strategySum /= normalizingSum

            else:

                self.strategySum.fill(1.0/len(self.strategySum))

            return self.strategySum



    def __init__(self, sides):

        self.sides = sides

        self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)

        for myClaim in range(sides):

            for oppClaim in range(myClaim+1, sides+1):                

                self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)

        self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)

        for oppClaim  in range(sides):

            for roll in range(1, sides+1):

                self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)



    def train(self, iterations):

        regret = np.zeros(self.sides)

        rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)

        for it in range(iterations):

            for i in range(len(rollAfterAcceptingClaim)):

                rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

            self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1



            for oppClaim in range(self.sides+1):

                if oppClaim > 0:

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.getStrategy()

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            nextNode.pPlayer += actionProb[1] * node.pPlayer

                            nextNode.pOpponent += node.pOpponent



                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.getStrategy()

                    for myClaim in range(oppClaim+1, self.sides+1):

                        nextClaimProb = actionProb[myClaim - oppClaim - 1]

                        if nextClaimProb > 0:

                            nextNode = self.responseNodes[oppClaim, myClaim]

                            nextNode.pPlayer += node.pOpponent

                            nextNode.pOpponent += nextClaimProb * node.pPlayer



            for oppClaim in reversed(range(self.sides+1)):

                if oppClaim < self.sides:

                    node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                    actionProb = node.strategy

                    node.u = 0.0

                    for myClaim in range(oppClaim+1, self.sides+1):

                        actionIndex = myClaim - oppClaim - 1

                        nextNode = self.responseNodes[oppClaim, myClaim]

                        childUtil = - nextNode.u

                        regret[actionIndex] = childUtil

                        node.u += actionProb[actionIndex] * childUtil

                    for a in range(len(actionProb)):

                        regret[a] -= node.u

                        node.regretSum[a] += node.pOpponent * regret[a]

                    node.pPlayer = node.pOpponent = 0              



                if oppClaim > 0:                    

                    for myClaim in range(oppClaim):

                        node = self.responseNodes[myClaim, oppClaim]

                        actionProb = node.strategy

                        node.u = 0.0

                        doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1

                        regret[self.DOUBT] = doubtUtil

                        node.u += actionProb[self.DOUBT] * doubtUtil

                        if oppClaim < self.sides:

                            nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]

                            regret[self.ACCEPT] += nextNode.u

                            node.u += actionProb[self.ACCEPT] * nextNode.u

                        for a in range(len(actionProb)):

                            regret[a] -= node.u

                            node.regretSum[a] += node.pOpponent * regret[a]

                        node.pPlayer = node.pOpponent = 0



            if it == iterations // 2:

                for nodes in self.responseNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)

                for nodes in self.claimNodes:

                    for node in nodes:

                        if node:

                            node.strategySum.fill(0)                  



        for initialRoll in range(1, self.sides+1):

            print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))       

        print("nOld ClaimtNew ClaimtAction Probabilities")            

        for myClaim in range(self.sides):

            for oppClaim in range(myClaim+1, self.sides+1):

                print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

        print("nOld ClaimtRolltAction Probabilities")

        for oppClaim in range(self.sides):

            for roll in range(1, self.sides+1):

                print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))



trainer = LiarDieTrainer(6)

trainer.train(1000)

Questions:

how can I make my code more Pythonic?

which other idioms / coding style should I apply?

which other useful libraries (besides NumPy) could I have used for this exercise?

python algorithm game dice

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

edited Jan 26 at 14:42

Imus

3,353223

edited Jan 26 at 14:42

Imus

3,353223

edited Jan 26 at 14:42

Imus

3,353223

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

asked Dec 27 '17 at 20:45

TemplateRex

1,588922

2

Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20

@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35

add a comment |

2

Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20

@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35

Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20

@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35

add a comment |

1 Answer
1

active

oldest

votes

Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.

You have some inconsistent spacing here:

for oppClaim  in range(sides):

A linter would catch both of these issues.

This:

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1

should use a temporary variable:

node = self.claim_nodes[0, roll_after_accepting_claim[0]]

node.p_player = 1

node.p_opponent = 1

These two loops:

            for nodes in self.responseNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

            for nodes in self.claimNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

can be refactored into one set of nested loops:

for node_source in (self.response_node, self.claim_nodes):

    for nodes in node_source:

        for node in nodes:

            if node:

                node.strategy_sum.fill(0)

Strings such as this:

print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

are good candidates for being converted to f-strings:

ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()

print(f't{my_claim}t{opp_claim}t{ave_strategy}')

Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.

You should consider adding a main function instead of calling train from global code.

answered 29 mins ago

Reinderien

2,436619

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f183735%2fpython-port-of-java-dice-game-algorithm%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.

You have some inconsistent spacing here:

for oppClaim  in range(sides):

A linter would catch both of these issues.

This:

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1

should use a temporary variable:

node = self.claim_nodes[0, roll_after_accepting_claim[0]]

node.p_player = 1

node.p_opponent = 1

These two loops:

            for nodes in self.responseNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

            for nodes in self.claimNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

can be refactored into one set of nested loops:

for node_source in (self.response_node, self.claim_nodes):

    for nodes in node_source:

        for node in nodes:

            if node:

                node.strategy_sum.fill(0)

Strings such as this:

print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

are good candidates for being converted to f-strings:

ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()

print(f't{my_claim}t{opp_claim}t{ave_strategy}')

You should consider adding a main function instead of calling train from global code.

answered 29 mins ago

Reinderien

2,436619

add a comment |

Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.

You have some inconsistent spacing here:

for oppClaim  in range(sides):

A linter would catch both of these issues.

This:

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1

should use a temporary variable:

node = self.claim_nodes[0, roll_after_accepting_claim[0]]

node.p_player = 1

node.p_opponent = 1

These two loops:

            for nodes in self.responseNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

            for nodes in self.claimNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

can be refactored into one set of nested loops:

for node_source in (self.response_node, self.claim_nodes):

    for nodes in node_source:

        for node in nodes:

            if node:

                node.strategy_sum.fill(0)

Strings such as this:

print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

are good candidates for being converted to f-strings:

ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()

print(f't{my_claim}t{opp_claim}t{ave_strategy}')

You should consider adding a main function instead of calling train from global code.

answered 29 mins ago

Reinderien

2,436619

add a comment |

Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.

You have some inconsistent spacing here:

for oppClaim  in range(sides):

A linter would catch both of these issues.

This:

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1

should use a temporary variable:

node = self.claim_nodes[0, roll_after_accepting_claim[0]]

node.p_player = 1

node.p_opponent = 1

These two loops:

            for nodes in self.responseNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

            for nodes in self.claimNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

can be refactored into one set of nested loops:

for node_source in (self.response_node, self.claim_nodes):

    for nodes in node_source:

        for node in nodes:

            if node:

                node.strategy_sum.fill(0)

Strings such as this:

print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

are good candidates for being converted to f-strings:

ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()

print(f't{my_claim}t{opp_claim}t{ave_strategy}')

You should consider adding a main function instead of calling train from global code.

answered 29 mins ago

Reinderien

2,436619

Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.

You have some inconsistent spacing here:

for oppClaim  in range(sides):

A linter would catch both of these issues.

This:

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1

        self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1

should use a temporary variable:

node = self.claim_nodes[0, roll_after_accepting_claim[0]]

node.p_player = 1

node.p_opponent = 1

These two loops:

            for nodes in self.responseNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

            for nodes in self.claimNodes:

                for node in nodes:

                    if node:

                        node.strategySum.fill(0)

can be refactored into one set of nested loops:

for node_source in (self.response_node, self.claim_nodes):

    for nodes in node_source:

        for node in nodes:

            if node:

                node.strategy_sum.fill(0)

Strings such as this:

print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))

are good candidates for being converted to f-strings:

ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()

print(f't{my_claim}t{opp_claim}t{ave_strategy}')

You should consider adding a main function instead of calling train from global code.

answered 29 mins ago

Reinderien

2,436619

answered 29 mins ago

Reinderien

2,436619

answered 29 mins ago

Reinderien

2,436619

answered 29 mins ago

Reinderien

2,436619

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Code Review Stack Exchange!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

Use MathJax to format equations. MathJax reference.

To learn more, see our tips on writing great answers.

Some of your past answers have not been well-received, and you're in danger of being blocked from answering.

Please pay close attention to the following guidance:

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Cfrtjryk