from scipy import *
import pylab
from matplotlib.patches import Rectangle
import time
from random import choice

#############################################################
# QLearning
# implements the Q-Learning algorithm on a maze task.
# by: Thomas Rueckstiess, WS 09/10 - Machine Learning I, TUM
#############################################################
class QLearning:
		
	# start and goal in maze
	start = array([4, 5])
	goal = array([6, 6])
	
	# current state, action and reward
	state = start
	action = 0
	reward = 0
	
	# history keeps track of last state,action,reward
	history = None

	# actions (right, left, down, up)			   
	actions = [(0, 1), (0, -1), (1, 0), (-1, 0)]
	
	# maze (0 = free, 1 = wall)
	maze = array([[1, 1, 1, 1, 1, 1, 1, 1],
				  [1, 0, 1, 0, 1, 0, 0, 1],
				  [1, 0, 1, 0, 1, 1, 0, 1],
				  [1, 0, 1, 0, 0, 0, 0, 1],
				  [1, 0, 0, 0, 1, 0, 0, 1],
				  [1, 0, 1, 1, 1, 1, 1, 1],
				  [1, 0, 0, 0, 0, 0, 0, 1],
				  [1, 1, 1, 1, 1, 1, 1, 1]])


	qtable = zeros((8, 8, 4))
	# qtable = random.random((8, 8, 4))

	# parameters
	epsilon = 0.5
	epsilondecay = 0.9999
	gamma = 1
	alpha = 0.5

	# visualization
	image = None
	rectangle = None
	manager = None


	def __init__(self):
		pylab.ion()
		pylab.hold(True)
		pylab.hot()
		
		pylab.subplot(121)
		# self.manager = pylab.get_current_fig_manager()
		pylab.imshow(-self.maze, interpolation='nearest')
		
		self.rectangle = Rectangle(xy=(self.state[1]-0.25, self.state[0]-0.25), width=0.5, height=0.5, facecolor='red')
		pylab.gca().add_artist(self.rectangle)

		pylab.subplot(122)
		self.image = pylab.imshow(self.qtable.max(axis=2), interpolation='nearest')

	def agent(self, state):
		# epsilon-greedy policy
		
		r = random.uniform(0, 1)
		
		if r > self.epsilon:
			# greedy
			action = where(self.qtable[state[0], state[1], :] == max(self.qtable[state[0], state[1], :]))[0]
			action = choice(action)

		else:
			# random
			action = random.randint(0, 4)
		
		self.action = action

	def environment(self, action):
		self.reward = 0
		
		# go to new state
		newstate = self.state + self.actions[action]
		# check walls
		if self.maze[newstate[0], newstate[1]] != 0:
			return False
		
		self.state = newstate
		
		if all(self.state == self.goal):
			self.reward = 1
		
		return True


	def updateplot(self):
		# rectangle position
		self.rectangle.set_x(self.state[1]-0.25)
		self.rectangle.set_y(self.state[0]-0.25)
		
		# qvalues
		self.image.set_array(self.qtable.max(axis=2))
		self.image.autoscale()
		
		# redraw
		pylab.gcf().canvas.draw()
		

	def iteration(self):
		self.agent(self.state)
		
		while not self.environment(self.action):
			self.agent(self.state)
		
		# Q-value update
		if self.history:
			laststate, lastaction, lastreward = self.history
			
			self.qtable[laststate[0], laststate[1], lastaction] += self.alpha *\
			 (self.reward + self.gamma * max(self.qtable[self.state[0], self.state[1], :]) -\
			 self.qtable[laststate[0], laststate[1], lastaction])
				
		
		self.history = (self.state, self.action, self.reward)
		
		# reset agent
		if all(self.goal == self.state):
			self.state = self.start
			self.history = None
		
		# reduce epsilon
		self.epsilon *= self.epsilondecay
		print "epsilon:", self.epsilon
		print "vtable:", self.qtable.max(axis=2)

		# update plot
		self.updateplot()

if __name__ == "__main__":
	# main program
	
	qlearning = QLearning()
	while True:
		qlearning.iteration()
				
	
	
	
	
