ValueIteration coding project, reinforcement learning

Clear instructions in the pdf file

Code given:

def createRewardTable(transitionTable, normalCost, trapDict, bonusDict):

rewardTable={s:{action:{sPrime:normalCost for sPrime in transitionTable[s][action].keys()} for action in transitionTable[s].keys()} for s in transitionTable.keys()}

for s in rewardTable.keys():

for a in rewardTable[s].keys():

for sPrime in trapDict.keys():

if rewardTable.get(s).get(a).get(sPrime) != None:

rewardTable[s][a][sPrime]=trapDict[sPrime]

for sPrime in bonusDict.keys():

if rewardTable.get(s).get(a).get(sPrime) != None:

rewardTable[s][a][sPrime]=bonusDict[sPrime]

return rewardTable

import numpy as np

import drawHeatMap as hm

import rewardTable as rt

import transitionTable as tt

##################################################

##################################################

def main():

minX, maxX, minY, maxY=(0, 3, 0, 2)

convergenceTolerance = 10e-7

roundingTolerance= 10e-7

gamma = 0.8

possibleAction=[(0,1), (0,-1), (1,0), (-1,0)]

possibleState=[(i,j) for i in range(maxX+1) for j in range(maxY+1)]

V={s:0 for s in possibleState}

normalCost=-0.04

trapDict={(3,1):-1}

bonusDict={(3,0):1}

blockList=[(1,1)]

p=0.8

transitionProbability={'forward':p, 'left':(1-p)/2, 'right':(1-p)/2, 'back':0}

transitionProbability={move: p for move, p in transitionProbability.items() if transitionProbability[move]!=0}

transitionTable=tt.createTransitionTable(minX, minY, maxX, maxY, trapDict, bonusDict, blockList, possibleAction, transitionProbability)

rewardTable=rt.createRewardTable(transitionTable, normalCost, trapDict, bonusDict)

"""

levelsReward = ["state", "action", "next state", "reward"]

levelsTransition = ["state", "action", "next state", "probability"]

viewDictionaryStructure(transitionTable, levelsTransition)

viewDictionaryStructure(rewardTable, levelsReward)

"""

##################################################

##################################################

hm.drawFinalMap(V, policy, trapDict, bonusDict, blockList, normalCost)

if __name__=='__main__':

main()

def transitionFull(s, move, minX, minY, maxX, maxY, blockList):

x, y=s

dx, dy=move

def boundary(x, minX, maxX):

return max(minX, min(x, maxX))

sPrimeConsideringBoundary=(boundary(x+dx, minX, maxX), boundary(y+dy, minY, maxY))

def blocking(sPrime, blockList):

if sPrime in blockList:

return s

else:

return sPrime

sPrime=blocking(sPrimeConsideringBoundary, blockList)

return sPrime

def createTransitionTable(minX, minY, maxX, maxY, trapDict, bonusDict, blockList, possibleAction, transitionProbability):

possibleState=[(i,j) for i in range(minX, maxX+1) for j in range(minY, maxY+1)]

for trap in trapDict.keys():

possibleState.remove(trap)

for bonus in bonusDict.keys():

possibleState.remove(bonus)

for block in blockList:

possibleState.remove(block)

moves={'forward':{(1,0):(1,0),(0,-1):(0,-1),(-1,0):(-1,0),(0,1):(0,1)},\

'left':{(1,0):(0,-1),(0,-1):(-1,0),(-1,0):(0,1),(0,1):(1,0)},\

'right':{(1,0):(0,1),(0,-1):(1,0),(-1,0):(0,-1),(0,1):(-1,0)},\

'back':{(1,0):(-1,0),(0,-1):(0,1),(-1,0):(1,0),(0,1):(0,-1)}}

def transition(s, move):

return transitionFull(s, move, minX, minY, maxX, maxY, blockList)

def transitionFunction(s, action, sPrime, transitionProbability, moves):

moveDictionary={moves[move][action]:transitionProbability[move] for move in transitionProbability.keys()}

sPrimeProbability=sum([p for move, p in moveDictionary.items() if transition(s, move)==sPrime])

return sPrimeProbability

emptyTransitionTable={s:{action:{transition(s, moves[move][action]):transitionProbability[move] for move in transitionProbability.keys()} for action in possibleAction} for s in possibleState}

transitionTable={s:{action:{sPrime:transitionFunction(s, action, sPrime, transitionProbability, moves) for sPrime in emptyTransitionTable[s][action].keys()} for action in possibleAction} for s in possibleState}

return transitionTable

import numpy as np

import seaborn as sb

import matplotlib.pyplot as plt

import matplotlib.animation as animation

def drawHeatMap(V, policy, vmin, vmax, trapDict, bonusDict, blockList):

VPlot=V.copy()

for bonus, bonusReward in bonusDict.items():

VPlot[bonus]=bonusReward

for trap, trapCost in trapDict.items():

VPlot[trap]=trapCost

x ,y, v=([x for (x, y), v in VPlot.items()], [y for (x, y), v in VPlot.items()], [v for (x, y), v in VPlot.items()])

maxX, maxY=(max(x)+1, max(y)+1)

label=[str(round(value,3)) for key,value in VPlot.items()]

label, v=(np.array(label).reshape(maxX,maxY).transpose(), np.array(v).reshape(maxX,maxY).transpose())

mask=np.array([(vi in blockList) for vi in V.keys()]).reshape(maxX,maxY).transpose()

for trap in trapDict.keys():

xTrap, yTrap=trap

for bonus in bonusDict.keys():

xBonus, yBonus=bonus

for s in [s for s in V.keys() if s not in list(trapDict.keys())+list(bonusDict.keys())+blockList]:

x, y=s

actions=policy[s].keys()

for action in actions:

return heatMap

def drawFinalMap(V, policy, trapDict, bonusDict, blockList, normalCost):

vmin=min([min(V.values())]+list(trapDict.values())+list(bonusDict.values()))

vmax=max([max(V.values())]+list(trapDict.values())+list(bonusDict.values()))

fig, ax=plt.subplots(figsize=(12,7))

title=f"Value Map: R={normalCost}"

plt.title(title, fontsize=18)

ttl=ax.title

ttl.set_position([0.5, 1.05])

drawHeatMap(V, policy, vmin, vmax, trapDict, bonusDict, blockList)

plt.savefig(f'valueIterationHeatMap_R={normalCost}.jpg')

def createAnimation(VRecord, policyRecord, trapDict, bonusDict, blockList, normalCost):

vmin=min([min(V.values()) for V in VRecord]+list(trapDict.values())+list(bonusDict.values()))

vmax=max([max(V.values()) for V in VRecord]+list(trapDict.values())+list(bonusDict.values()))

def animate(i):

fig.clear()

title=f"Value Map: Round {i}, R={normalCost}"

plt.title(title, fontsize=18)

ttl=ax.title

ttl.set_position([0.5, 1.05])

heatmap=drawHeatMap(VRecord[i], policyRecord[i], vmin, vmax, trapDict, bonusDict, blockList)

return heatmap

fig, ax=plt.subplots(figsize=(12,7))

ani = animation.FuncAnimation(fig, animate, len(VRecord))

ani.save(f'valueIteration.gif',writer='pillow')

Related Questions

Similar orders to ValueIteration coding project, reinforcement learning
26
Views
0
Biostatistics Project
MUST know how to use SPSS. this is a project I have added the requirements that my professor has asked for...
27
Views
0
Statistics 1 assignment
21
Views
0
statistics assignment
Please do the assignment in attachment...
41
Views
0