Python port of Java dice game algorithm
I am in the process of learning Python (background in C++ and R). So after the obligatory "Hello World", I decided that my first non-trivial program would be a port of a Java implementation of the counter-factual regret minimization algorithm for a simple dice game called Liar Die [original source, Joodle online compiler].
The program runs a million simulations of the dice game and computes the optimal bluffing/calling frequencies. It does this by creating Node
class instances for all decision points in the game, and keeping track of the various actions the player to move can make, as well as the expected values of those actions.
I then tried to translate this into Python as faithfully as possible:
import numpy as np
class LiarDieTrainer:
DOUBT, ACCEPT = 0, 1
class Node:
u, pPlayer, pOpponent = 0.0, 0.0, 0.0
def __init__(self, numActions):
self.regretSum = np.zeros(numActions)
self.strategy = np.zeros(numActions)
self.strategySum = np.zeros(numActions)
def getStrategy(self):
self.strategy = np.maximum(self.regretSum, 0)
normalizingSum = np.sum(self.strategy)
if normalizingSum > 0:
self.strategy /= normalizingSum
else:
self.strategy.fill(1.0/len(self.strategy))
self.strategySum += self.pPlayer * self.strategy
return self.strategy
def getAverageStrategy(self):
normalizingSum = np.sum(self.strategySum)
if normalizingSum > 0:
self.strategySum /= normalizingSum
else:
self.strategySum.fill(1.0/len(self.strategySum))
return self.strategySum
def __init__(self, sides):
self.sides = sides
self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)
for myClaim in range(sides):
for oppClaim in range(myClaim+1, sides+1):
self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)
self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)
for oppClaim in range(sides):
for roll in range(1, sides+1):
self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)
def train(self, iterations):
regret = np.zeros(self.sides)
rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)
for it in range(iterations):
for i in range(len(rollAfterAcceptingClaim)):
rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
for oppClaim in range(self.sides+1):
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.getStrategy()
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
nextNode.pPlayer += actionProb[1] * node.pPlayer
nextNode.pOpponent += node.pOpponent
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.getStrategy()
for myClaim in range(oppClaim+1, self.sides+1):
nextClaimProb = actionProb[myClaim - oppClaim - 1]
if nextClaimProb > 0:
nextNode = self.responseNodes[oppClaim, myClaim]
nextNode.pPlayer += node.pOpponent
nextNode.pOpponent += nextClaimProb * node.pPlayer
for oppClaim in reversed(range(self.sides+1)):
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.strategy
node.u = 0.0
for myClaim in range(oppClaim+1, self.sides+1):
actionIndex = myClaim - oppClaim - 1
nextNode = self.responseNodes[oppClaim, myClaim]
childUtil = - nextNode.u
regret[actionIndex] = childUtil
node.u += actionProb[actionIndex] * childUtil
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.strategy
node.u = 0.0
doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1
regret[self.DOUBT] = doubtUtil
node.u += actionProb[self.DOUBT] * doubtUtil
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
regret[self.ACCEPT] += nextNode.u
node.u += actionProb[self.ACCEPT] * nextNode.u
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if it == iterations // 2:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for initialRoll in range(1, self.sides+1):
print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))
print("nOld ClaimtNew ClaimtAction Probabilities")
for myClaim in range(self.sides):
for oppClaim in range(myClaim+1, self.sides+1):
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
print("nOld ClaimtRolltAction Probabilities")
for oppClaim in range(self.sides):
for roll in range(1, self.sides+1):
print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))
trainer = LiarDieTrainer(6)
trainer.train(1000)
Working example on the Ideone online compiler (factor of 1000 less iterations, apparently Python is way slower than even Java). Unfortunately, the algorithm works by randomly throwing dice, and the Java/Python random number generators give different sequences, and the dice game may not have a unique equilibrium anyway. This means I can't directly compare the outcomes.
Questions:
- how can I make my code more Pythonic?
- which other idioms / coding style should I apply?
- which other useful libraries (besides NumPy) could I have used for this exercise?
python algorithm game dice
add a comment |
I am in the process of learning Python (background in C++ and R). So after the obligatory "Hello World", I decided that my first non-trivial program would be a port of a Java implementation of the counter-factual regret minimization algorithm for a simple dice game called Liar Die [original source, Joodle online compiler].
The program runs a million simulations of the dice game and computes the optimal bluffing/calling frequencies. It does this by creating Node
class instances for all decision points in the game, and keeping track of the various actions the player to move can make, as well as the expected values of those actions.
I then tried to translate this into Python as faithfully as possible:
import numpy as np
class LiarDieTrainer:
DOUBT, ACCEPT = 0, 1
class Node:
u, pPlayer, pOpponent = 0.0, 0.0, 0.0
def __init__(self, numActions):
self.regretSum = np.zeros(numActions)
self.strategy = np.zeros(numActions)
self.strategySum = np.zeros(numActions)
def getStrategy(self):
self.strategy = np.maximum(self.regretSum, 0)
normalizingSum = np.sum(self.strategy)
if normalizingSum > 0:
self.strategy /= normalizingSum
else:
self.strategy.fill(1.0/len(self.strategy))
self.strategySum += self.pPlayer * self.strategy
return self.strategy
def getAverageStrategy(self):
normalizingSum = np.sum(self.strategySum)
if normalizingSum > 0:
self.strategySum /= normalizingSum
else:
self.strategySum.fill(1.0/len(self.strategySum))
return self.strategySum
def __init__(self, sides):
self.sides = sides
self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)
for myClaim in range(sides):
for oppClaim in range(myClaim+1, sides+1):
self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)
self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)
for oppClaim in range(sides):
for roll in range(1, sides+1):
self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)
def train(self, iterations):
regret = np.zeros(self.sides)
rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)
for it in range(iterations):
for i in range(len(rollAfterAcceptingClaim)):
rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
for oppClaim in range(self.sides+1):
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.getStrategy()
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
nextNode.pPlayer += actionProb[1] * node.pPlayer
nextNode.pOpponent += node.pOpponent
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.getStrategy()
for myClaim in range(oppClaim+1, self.sides+1):
nextClaimProb = actionProb[myClaim - oppClaim - 1]
if nextClaimProb > 0:
nextNode = self.responseNodes[oppClaim, myClaim]
nextNode.pPlayer += node.pOpponent
nextNode.pOpponent += nextClaimProb * node.pPlayer
for oppClaim in reversed(range(self.sides+1)):
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.strategy
node.u = 0.0
for myClaim in range(oppClaim+1, self.sides+1):
actionIndex = myClaim - oppClaim - 1
nextNode = self.responseNodes[oppClaim, myClaim]
childUtil = - nextNode.u
regret[actionIndex] = childUtil
node.u += actionProb[actionIndex] * childUtil
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.strategy
node.u = 0.0
doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1
regret[self.DOUBT] = doubtUtil
node.u += actionProb[self.DOUBT] * doubtUtil
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
regret[self.ACCEPT] += nextNode.u
node.u += actionProb[self.ACCEPT] * nextNode.u
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if it == iterations // 2:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for initialRoll in range(1, self.sides+1):
print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))
print("nOld ClaimtNew ClaimtAction Probabilities")
for myClaim in range(self.sides):
for oppClaim in range(myClaim+1, self.sides+1):
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
print("nOld ClaimtRolltAction Probabilities")
for oppClaim in range(self.sides):
for roll in range(1, self.sides+1):
print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))
trainer = LiarDieTrainer(6)
trainer.train(1000)
Working example on the Ideone online compiler (factor of 1000 less iterations, apparently Python is way slower than even Java). Unfortunately, the algorithm works by randomly throwing dice, and the Java/Python random number generators give different sequences, and the dice game may not have a unique equilibrium anyway. This means I can't directly compare the outcomes.
Questions:
- how can I make my code more Pythonic?
- which other idioms / coding style should I apply?
- which other useful libraries (besides NumPy) could I have used for this exercise?
python algorithm game dice
2
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35
add a comment |
I am in the process of learning Python (background in C++ and R). So after the obligatory "Hello World", I decided that my first non-trivial program would be a port of a Java implementation of the counter-factual regret minimization algorithm for a simple dice game called Liar Die [original source, Joodle online compiler].
The program runs a million simulations of the dice game and computes the optimal bluffing/calling frequencies. It does this by creating Node
class instances for all decision points in the game, and keeping track of the various actions the player to move can make, as well as the expected values of those actions.
I then tried to translate this into Python as faithfully as possible:
import numpy as np
class LiarDieTrainer:
DOUBT, ACCEPT = 0, 1
class Node:
u, pPlayer, pOpponent = 0.0, 0.0, 0.0
def __init__(self, numActions):
self.regretSum = np.zeros(numActions)
self.strategy = np.zeros(numActions)
self.strategySum = np.zeros(numActions)
def getStrategy(self):
self.strategy = np.maximum(self.regretSum, 0)
normalizingSum = np.sum(self.strategy)
if normalizingSum > 0:
self.strategy /= normalizingSum
else:
self.strategy.fill(1.0/len(self.strategy))
self.strategySum += self.pPlayer * self.strategy
return self.strategy
def getAverageStrategy(self):
normalizingSum = np.sum(self.strategySum)
if normalizingSum > 0:
self.strategySum /= normalizingSum
else:
self.strategySum.fill(1.0/len(self.strategySum))
return self.strategySum
def __init__(self, sides):
self.sides = sides
self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)
for myClaim in range(sides):
for oppClaim in range(myClaim+1, sides+1):
self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)
self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)
for oppClaim in range(sides):
for roll in range(1, sides+1):
self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)
def train(self, iterations):
regret = np.zeros(self.sides)
rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)
for it in range(iterations):
for i in range(len(rollAfterAcceptingClaim)):
rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
for oppClaim in range(self.sides+1):
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.getStrategy()
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
nextNode.pPlayer += actionProb[1] * node.pPlayer
nextNode.pOpponent += node.pOpponent
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.getStrategy()
for myClaim in range(oppClaim+1, self.sides+1):
nextClaimProb = actionProb[myClaim - oppClaim - 1]
if nextClaimProb > 0:
nextNode = self.responseNodes[oppClaim, myClaim]
nextNode.pPlayer += node.pOpponent
nextNode.pOpponent += nextClaimProb * node.pPlayer
for oppClaim in reversed(range(self.sides+1)):
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.strategy
node.u = 0.0
for myClaim in range(oppClaim+1, self.sides+1):
actionIndex = myClaim - oppClaim - 1
nextNode = self.responseNodes[oppClaim, myClaim]
childUtil = - nextNode.u
regret[actionIndex] = childUtil
node.u += actionProb[actionIndex] * childUtil
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.strategy
node.u = 0.0
doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1
regret[self.DOUBT] = doubtUtil
node.u += actionProb[self.DOUBT] * doubtUtil
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
regret[self.ACCEPT] += nextNode.u
node.u += actionProb[self.ACCEPT] * nextNode.u
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if it == iterations // 2:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for initialRoll in range(1, self.sides+1):
print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))
print("nOld ClaimtNew ClaimtAction Probabilities")
for myClaim in range(self.sides):
for oppClaim in range(myClaim+1, self.sides+1):
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
print("nOld ClaimtRolltAction Probabilities")
for oppClaim in range(self.sides):
for roll in range(1, self.sides+1):
print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))
trainer = LiarDieTrainer(6)
trainer.train(1000)
Working example on the Ideone online compiler (factor of 1000 less iterations, apparently Python is way slower than even Java). Unfortunately, the algorithm works by randomly throwing dice, and the Java/Python random number generators give different sequences, and the dice game may not have a unique equilibrium anyway. This means I can't directly compare the outcomes.
Questions:
- how can I make my code more Pythonic?
- which other idioms / coding style should I apply?
- which other useful libraries (besides NumPy) could I have used for this exercise?
python algorithm game dice
I am in the process of learning Python (background in C++ and R). So after the obligatory "Hello World", I decided that my first non-trivial program would be a port of a Java implementation of the counter-factual regret minimization algorithm for a simple dice game called Liar Die [original source, Joodle online compiler].
The program runs a million simulations of the dice game and computes the optimal bluffing/calling frequencies. It does this by creating Node
class instances for all decision points in the game, and keeping track of the various actions the player to move can make, as well as the expected values of those actions.
I then tried to translate this into Python as faithfully as possible:
import numpy as np
class LiarDieTrainer:
DOUBT, ACCEPT = 0, 1
class Node:
u, pPlayer, pOpponent = 0.0, 0.0, 0.0
def __init__(self, numActions):
self.regretSum = np.zeros(numActions)
self.strategy = np.zeros(numActions)
self.strategySum = np.zeros(numActions)
def getStrategy(self):
self.strategy = np.maximum(self.regretSum, 0)
normalizingSum = np.sum(self.strategy)
if normalizingSum > 0:
self.strategy /= normalizingSum
else:
self.strategy.fill(1.0/len(self.strategy))
self.strategySum += self.pPlayer * self.strategy
return self.strategy
def getAverageStrategy(self):
normalizingSum = np.sum(self.strategySum)
if normalizingSum > 0:
self.strategySum /= normalizingSum
else:
self.strategySum.fill(1.0/len(self.strategySum))
return self.strategySum
def __init__(self, sides):
self.sides = sides
self.responseNodes = np.empty((sides, sides+1), dtype=self.Node)
for myClaim in range(sides):
for oppClaim in range(myClaim+1, sides+1):
self.responseNodes[myClaim, oppClaim] = self.Node(1 if oppClaim == sides else 2)
self.claimNodes = np.empty((sides, sides+1), dtype=self.Node)
for oppClaim in range(sides):
for roll in range(1, sides+1):
self.claimNodes[oppClaim , roll] = self.Node(sides - oppClaim)
def train(self, iterations):
regret = np.zeros(self.sides)
rollAfterAcceptingClaim = np.zeros(self.sides, dtype=int)
for it in range(iterations):
for i in range(len(rollAfterAcceptingClaim)):
rollAfterAcceptingClaim[i] = np.random.randint(self.sides) + 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
for oppClaim in range(self.sides+1):
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.getStrategy()
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
nextNode.pPlayer += actionProb[1] * node.pPlayer
nextNode.pOpponent += node.pOpponent
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.getStrategy()
for myClaim in range(oppClaim+1, self.sides+1):
nextClaimProb = actionProb[myClaim - oppClaim - 1]
if nextClaimProb > 0:
nextNode = self.responseNodes[oppClaim, myClaim]
nextNode.pPlayer += node.pOpponent
nextNode.pOpponent += nextClaimProb * node.pPlayer
for oppClaim in reversed(range(self.sides+1)):
if oppClaim < self.sides:
node = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
actionProb = node.strategy
node.u = 0.0
for myClaim in range(oppClaim+1, self.sides+1):
actionIndex = myClaim - oppClaim - 1
nextNode = self.responseNodes[oppClaim, myClaim]
childUtil = - nextNode.u
regret[actionIndex] = childUtil
node.u += actionProb[actionIndex] * childUtil
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if oppClaim > 0:
for myClaim in range(oppClaim):
node = self.responseNodes[myClaim, oppClaim]
actionProb = node.strategy
node.u = 0.0
doubtUtil = 1 if oppClaim > rollAfterAcceptingClaim[myClaim] else -1
regret[self.DOUBT] = doubtUtil
node.u += actionProb[self.DOUBT] * doubtUtil
if oppClaim < self.sides:
nextNode = self.claimNodes[oppClaim, rollAfterAcceptingClaim[oppClaim]]
regret[self.ACCEPT] += nextNode.u
node.u += actionProb[self.ACCEPT] * nextNode.u
for a in range(len(actionProb)):
regret[a] -= node.u
node.regretSum[a] += node.pOpponent * regret[a]
node.pPlayer = node.pOpponent = 0
if it == iterations // 2:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for initialRoll in range(1, self.sides+1):
print("Initial claim policy with roll %d: %s" % (initialRoll, np.round(self.claimNodes[0, initialRoll].getAverageStrategy(), 2)))
print("nOld ClaimtNew ClaimtAction Probabilities")
for myClaim in range(self.sides):
for oppClaim in range(myClaim+1, self.sides+1):
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
print("nOld ClaimtRolltAction Probabilities")
for oppClaim in range(self.sides):
for roll in range(1, self.sides+1):
print("%dt%dt%s" % (oppClaim , roll, self.claimNodes[oppClaim , roll].getAverageStrategy()))
trainer = LiarDieTrainer(6)
trainer.train(1000)
Working example on the Ideone online compiler (factor of 1000 less iterations, apparently Python is way slower than even Java). Unfortunately, the algorithm works by randomly throwing dice, and the Java/Python random number generators give different sequences, and the dice game may not have a unique equilibrium anyway. This means I can't directly compare the outcomes.
Questions:
- how can I make my code more Pythonic?
- which other idioms / coding style should I apply?
- which other useful libraries (besides NumPy) could I have used for this exercise?
python algorithm game dice
python algorithm game dice
edited Jan 26 at 14:42
Imus
3,353223
3,353223
asked Dec 27 '17 at 20:45
TemplateRex
1,588922
1,588922
2
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35
add a comment |
2
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35
2
2
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35
add a comment |
1 Answer
1
active
oldest
votes
Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.
You have some inconsistent spacing here:
for oppClaim in range(sides):
A linter would catch both of these issues.
This:
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
should use a temporary variable:
node = self.claim_nodes[0, roll_after_accepting_claim[0]]
node.p_player = 1
node.p_opponent = 1
These two loops:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
can be refactored into one set of nested loops:
for node_source in (self.response_node, self.claim_nodes):
for nodes in node_source:
for node in nodes:
if node:
node.strategy_sum.fill(0)
Strings such as this:
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
are good candidates for being converted to f-strings:
ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()
print(f't{my_claim}t{opp_claim}t{ave_strategy}')
Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.
You should consider adding a main
function instead of calling train
from global code.
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f183735%2fpython-port-of-java-dice-game-algorithm%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.
You have some inconsistent spacing here:
for oppClaim in range(sides):
A linter would catch both of these issues.
This:
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
should use a temporary variable:
node = self.claim_nodes[0, roll_after_accepting_claim[0]]
node.p_player = 1
node.p_opponent = 1
These two loops:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
can be refactored into one set of nested loops:
for node_source in (self.response_node, self.claim_nodes):
for nodes in node_source:
for node in nodes:
if node:
node.strategy_sum.fill(0)
Strings such as this:
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
are good candidates for being converted to f-strings:
ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()
print(f't{my_claim}t{opp_claim}t{ave_strategy}')
Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.
You should consider adding a main
function instead of calling train
from global code.
add a comment |
Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.
You have some inconsistent spacing here:
for oppClaim in range(sides):
A linter would catch both of these issues.
This:
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
should use a temporary variable:
node = self.claim_nodes[0, roll_after_accepting_claim[0]]
node.p_player = 1
node.p_opponent = 1
These two loops:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
can be refactored into one set of nested loops:
for node_source in (self.response_node, self.claim_nodes):
for nodes in node_source:
for node in nodes:
if node:
node.strategy_sum.fill(0)
Strings such as this:
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
are good candidates for being converted to f-strings:
ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()
print(f't{my_claim}t{opp_claim}t{ave_strategy}')
Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.
You should consider adding a main
function instead of calling train
from global code.
add a comment |
Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.
You have some inconsistent spacing here:
for oppClaim in range(sides):
A linter would catch both of these issues.
This:
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
should use a temporary variable:
node = self.claim_nodes[0, roll_after_accepting_claim[0]]
node.p_player = 1
node.p_opponent = 1
These two loops:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
can be refactored into one set of nested loops:
for node_source in (self.response_node, self.claim_nodes):
for nodes in node_source:
for node in nodes:
if node:
node.strategy_sum.fill(0)
Strings such as this:
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
are good candidates for being converted to f-strings:
ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()
print(f't{my_claim}t{opp_claim}t{ave_strategy}')
Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.
You should consider adding a main
function instead of calling train
from global code.
Your function names and variable names are lowerCamelCase when the convention for Python is snake_case.
You have some inconsistent spacing here:
for oppClaim in range(sides):
A linter would catch both of these issues.
This:
self.claimNodes[0, rollAfterAcceptingClaim[0]].pPlayer = 1
self.claimNodes[0, rollAfterAcceptingClaim[0]].pOpponent = 1
should use a temporary variable:
node = self.claim_nodes[0, roll_after_accepting_claim[0]]
node.p_player = 1
node.p_opponent = 1
These two loops:
for nodes in self.responseNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
for nodes in self.claimNodes:
for node in nodes:
if node:
node.strategySum.fill(0)
can be refactored into one set of nested loops:
for node_source in (self.response_node, self.claim_nodes):
for nodes in node_source:
for node in nodes:
if node:
node.strategy_sum.fill(0)
Strings such as this:
print("t%dt%dt%s" % (myClaim, oppClaim, self.responseNodes[myClaim, oppClaim].getAverageStrategy()))
are good candidates for being converted to f-strings:
ave_strategy = self.response_nodes[my_claim, opp_claim].get_average_strategy()
print(f't{my_claim}t{opp_claim}t{ave_strategy}')
Also, since you're printing tabular data, you should apply fixed field widths to both your heading string and your row strings. To learn more about field widths, read about the Format Specification Mini-Language.
You should consider adding a main
function instead of calling train
from global code.
answered 29 mins ago
Reinderien
2,436619
2,436619
add a comment |
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f183735%2fpython-port-of-java-dice-game-algorithm%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
2
Was numpy really useful here? Did you try replacing it by normal Python lists? Numpy has benefits when working with a lot of data and using vectorized operations... it will lose to normal lists otherwise. Also, the typical, remarks: naming, use snake_case for variable names, method names etc. Unfortunately, I'm not familiar with the game, and it's too much code to try to figure it out from the source.
– wvxvw
Dec 28 '17 at 15:20
@wvxvw thanks, the naming was literally taken from the Java source. I guess I should change that. Re NumPy: this is because I want to expand this code into something that uses matrix inversion etc. (for Bayesian updating).
– TemplateRex
Dec 28 '17 at 15:35