I'm trying to implement a Q-learning table so that the kart moves by itself, avoiding grass or lava. However, I don't understand why my kart keeps running into the grass and doesn't turn while I reward him if he reaches checkpoints and avoids obstacles such as the grass. I can't see what the problem is.
Here's the code in my ai.py
file where I've implemented Q-Learning:
import mathimport pygameimport numpy as npMAX_ANGLE_VELOCITY = 0.05BLOCK_SIZE = 50class AI(): def __init__(self): self.kart = None self.q_table = {} # Q table initialization self.learning_rate = 0.5 # Learning rate self.discount_factor = 0.95 # Discount factor for future rewards self.epsilon = 0.1 # Exploration rate self.epsilon_decay = 0.01 # Epsilon decay rate self.current_string = None def get_state(self, string): surface_type = self.kart._Kart__get_surface_type(string) state = (self.kart.x, self.kart.y, self.kart._Kart__v, self.kart._Kart__vtheta, self.kart.theta, self.kart.next_checkpoint_id, surface_type) return state def get_possible_actions(self): # The action space corresponds to the possible commands the kart can take actions = ['up', 'down', 'left', 'right'] return actions def execute_action(self, action):""" This method takes an action, updates the game state and returns a reward""" # Execute the action if action == 'up': self.kart.forward() elif action == 'down': self.kart.backward() elif action == 'left': self.kart.turn_left() elif action == 'right': self.kart.turn_right() # Get reward reward = self.get_reward() return reward def get_reward(self): # The reward function is based on the kart's progress through the game. # A positive reward is given when the kart gets closer to the next checkpoint, and a negative reward when it moves further away. # A negative reward is also given when the kart hits an obstacle. if self.kart.reached_checkpoint(): reward = 100 elif self.kart.hit_obstacle(): reward = -100 elif self.kart.on_grass(self.current_string): reward = -100 else: reward = -1 # A small negative reward for each time step to encourage faster execution return reward def update_q_values(self, old_state, action, reward, new_state):""" Implementation of the Q learning formula here to update Q values Q(s, a) := Q(s, a) +α * (r +γ * max(Q(s', a') - Q(s, a)))""" old_q_value = self.q_table.get((old_state, action), 0) max_new_q_value = max([self.q_table.get((new_state, a), 0) for a in self.get_possible_actions()]) # Learning formula Q new_q_value = old_q_value + self.learning_rate * (reward + self.discount_factor * max_new_q_value - old_q_value) self.q_table[(old_state, action)] = new_q_value def choose_action(self, state, string):""" Choose an action based on the relative angle to the next checkpoint and Q-values.""" # First find the checkpoint's position if self.kart.next_checkpoint_id == 0: char = 'C' elif self.kart.next_checkpoint_id == 1: char = 'D' elif self.kart.next_checkpoint_id == 2: char = 'E' elif self.kart.next_checkpoint_id == 3: char = 'F' # We use x and y to describe the coordinates in the string # x indicates column number # y indicates row number x, y = 0, 0 for c in string: # If we find the correct character for the checkpoint, we stop if c == char: break # If we find the return character "\n" we increment y and reset x to 0 # Otherwise increment x if c == "\n": y += 1 x = 0 else: x += 1 next_checkpoint_position = [x * BLOCK_SIZE + .5 * BLOCK_SIZE, y * BLOCK_SIZE + .5 * BLOCK_SIZE] # Next, find the angle to the checkpoint relative_x = next_checkpoint_position[0] - self.kart.x relative_y = next_checkpoint_position[1] - self.kart.y # The arctangent function is used to calculate the angle of the vector [relative_x, relative_y]. next_checkpoint_angle = math.atan2(relative_y, relative_x) # The relative angle corresponds to the rotation the kart must make to face the checkpoint. # Apply the operation (a + pi) % (2*pi) - pi to obtain an angle between -pi and pi relative_angle = (next_checkpoint_angle - self.kart.theta + math.pi) % (2 * math.pi) - math.pi # Choose an action based on the relative angle and Q-values if np.random.rand() < self.epsilon: # Exploration: Choose an action based on the relative angle if relative_angle > MAX_ANGLE_VELOCITY * 2: action = 'right' elif relative_angle < -MAX_ANGLE_VELOCITY * 2: action = 'left' else: action = 'up' else: # Exploitation: Choose the action with the highest Q-value q_values = {action: self.q_table.get((state, action), 0) for action in self.get_possible_actions()} action = max(q_values, key=q_values.get) return action def move(self, string): self.current_string = string # Get current state state = self.get_state(string) # Choose action action = self.choose_action(state, string) # Execute action and obtain reward # We have a method that takes an action, updates the game state and returns a reward reward = self.execute_action(action) # Get a new state new_state = self.get_state(string) # Update Q values self.update_q_values(state, action, reward, new_state) # Epsilon decay self.epsilon *= self.epsilon_decay # Conversion of selected action command = [False, False, False, False] if action == 'up': command[0] = True elif action == 'down': command[1] = True elif action == 'left': command[2] = True elif action == 'right': command[3] = True key_list = [pygame.K_UP, pygame.K_DOWN, pygame.K_LEFT, pygame.K_RIGHT] keys = {key: command[i] for i, key in enumerate(key_list)} return keys
Here is the code of my kart.py
in which I have implement the behaviour of the kart:
import pygameimport mathfrom track import Trackfrom surface import Surfacefrom grass import GrassBLOCK_SIZE = 50 # Block size in pixelsclass Kart: # Global constants unique to Kart MAX_ANGLE_VELOCITY = 0.05 # Max angular speed MAX_ACCELERATION = 0.25 # Max acceleration HEIGHT = 60 # Kart size WIDTH = 80 # Initialize kart with controller def __init__(self, controller): self.controller = controller # Controller for kart self.controller.kart = self # Ensures controller refers to this kart instance self.kart_image = pygame.image.load('kart_v2.png') self.kart_image = pygame.transform.scale(self.kart_image, (Kart.WIDTH, Kart.HEIGHT)) # Reset kart to specific position and orientation def reset(self, initial_position, initial_orientation): self.__x, self.__y = initial_position # Initial kart position self.__theta = initial_orientation # Initial kart orientation (in radians) self.__v = 0 # Initial kart speed = 0 self.__vtheta = 0 # Initial angular speed = 0 self.__has_finished = False self.__ac = 0 # Initial kart acceleration self.__x_check, self.__y_check = initial_position # Position of first checkpoint position self.__theta_check = initial_orientation # Orientation at first checkpoint self.__next_checkpoint_id = 0 # Variable that identifies the finish line, set to self.__next_checkpoint_id first self.__last_checkpoint = self.__next_checkpoint_id # Boolean attributes of check methods to move, if true, kart executes this method self.__foward = False self.__backward = False self.__turn_left = False self.__turn_right = False self.__first_iter = True # getters # All private variables that are read outside the class by Kart objects # As in track (for self.__has_finished) or in AI for positions and orientations @property def x(self): return self.__x @property def y(self): return self.__y @property def theta(self): return self.__theta @property def next_checkpoint_id(self): return self.__next_checkpoint_id @property def has_finished(self): return self.__has_finished # Method for moving the kart # Accelerates kart forward def forward(self): self.__ac = Kart.MAX_ACCELERATION self.__foward = True # Moves kart backward def backward(self): self.__ac = -Kart.MAX_ACCELERATION self.__backward = True # Turns kart left def turn_left(self): self.__vtheta = -Kart.MAX_ANGLE_VELOCITY self.__turn_left = True # Turns the kart to the right def turn_right(self): self.__vtheta = Kart.MAX_ANGLE_VELOCITY self.__turn_right = True # Update kart position according to its current state and the circuit def update_position(self, string, screen): # Determines the number of checkpoints on the first iteration, the aim being to know # the id of the last checkpoint, i.e. the finish line if self.__first_iter: self.__combien_checkpoint(string) self.__first_iter = False # Determines the surface (class and id) the kart is currently on surface_type, surface_id = self.__get_surface_type(string) # If asked to turn left and right, the kart doesn't move if self.__turn_left and self.__turn_right: self.__turn_right = False self.__turn_left = False self.__vtheta = 0 # Calculation of current kart angle with angular velocity self.__theta += self.__vtheta # If you ask the kart to move forward and backward at the same time, it won't move. if self.__backward and self.__foward: self.__backward = False self.__foward = False self.__ac = 0 # Calculation of speed as a function of surface area self.__v = surface_type.vitesse(self.__ac, self.__v, self.__vtheta) # Resets acceleration and angular velocity after use self.__ac = 0 self.__vtheta = 0 # Update kart position according to speed and angle theta self.__x += self.__v * math.cos(self.__theta) self.__y += self.__v * math.sin(self.__theta) # Variable that introduces whether there's a special action to do, like falling into a lava # Or passing a checkpoint action = surface_type.action() # Checks if the kart passes a checkpoint if action == 'Checkpoint': self.__pass_the_right_checkpoint(surface_id) # Checks if kart touches lava and resets if necessary elif action == 'Lava': self.__reset_to_previous_checkpoint() # Checks if kart is still within track limits # If not, returns it to the position of the last checkpoint self.__check_screen_limit(string) # Draws kart on screen to end at new position self.draw(screen) # Method resets kart to last registered checkpoint def __reset_to_previous_checkpoint(self): self.__x, self.__y, self.__theta = self.__x_check, self.__y_check, self.__theta_check self.__v = 0 self.__vtheta = 0 # Method saves current position as last checkpoint def __update_checkpoint_position(self): self.__x_check, self.__y_check, self.__theta_check = self.__x, self.__y, self.__theta # Method that determines how many checkpoints there are and the ID of the last checkpoint def __combien_checkpoint(self, string): checkpoints = {'C': False, 'D': False, 'E': False, 'F': False} # It scans the character string and determines how many different checkpoints there are for char in string: if char in checkpoints: checkpoints[char] = True # Assigns the incoming line ID according to how many checkpoints there are # We assume that if there's only one checkpoint, it must be "C". # If there are two, it must be 'C' and 'D' and so on... if checkpoints['C'] and checkpoints['D'] and checkpoints['E'] and checkpoints['F']: self.__last_checkpoint_id = 3 elif checkpoints['E'] and checkpoints['D'] and checkpoints['C']: self.__last_checkpoint_id = 2 elif checkpoints['D'] and checkpoints['C']: self.__last_checkpoint_id = 1 else: self.__last_checkpoint_id = 0 # Method that checks whether the passed checkpoint corresponds to the next expected checkpoint def __pass_the_right_checkpoint(self, checkpoint_id): # Finish the race if this is the last checkpoint if checkpoint_id == self.__last_checkpoint_id and self.__last_checkpoint_id == self.__next_checkpoint_id: self.__has_finished = True # If not, it saves the next checkpoint id if it's the right one elif checkpoint_id == self.__next_checkpoint_id: # Saves current position and orientation as last checkpoint self.__update_checkpoint_position() # Met à jour l'ID du prochain checkpoint attendu self.__next_checkpoint_id += 1 # Method that retrieves the surface type at the kart's current position def __get_surface_type(self, string): # Split string to determine dimensions terrain = string.split('\n') # Then determine the string index, to find out the character # This calculation may give some approximation, as it rounds to the nearest integer ligne = int(self.__y / BLOCK_SIZE) col = int(self.__x / BLOCK_SIZE) if ligne < len(terrain) and col < len(terrain[ligne]): caractere = terrain[ligne][col] # We use the dictionary available in Track to retrieve the class and id of the character # on which the kart is located track_element = Track.char_to_track_element[caractere] track_class = track_element['class'] if 'params' in track_element and track_element['params']: track_id = track_element['params'][0] else: track_id = None return track_class, track_id # Method that checks whether the kart is still on the track (game screen). If not, it resets to the last checkpoint. def __check_screen_limit(self, string): lines = string.split('\n') width = len(lines[0]) * BLOCK_SIZE # Calcul la largeur du track height = len(lines) * BLOCK_SIZE # Calcul la longueur du track if self.__x < 0 or self.__x > width or self.__y < 0 or self.__y > height: self.__reset_to_previous_checkpoint() # Method that draws with the kart image def draw(self, screen): # Obtenir le rectangle de l'image pour le positionnement kart_rect = self.kart_image.get_rect(center=(int(self.__x), int(self.__y))) # Rotation de l'image en fonction de l'orientation du kart rotated_kart_image = pygame.transform.rotate(self.kart_image, -math.degrees(self.__theta)) # Dessiner l'image sur l'écran screen.blit(rotated_kart_image, kart_rect) # Method for checking whether the kart has reached the checkpoint def reached_checkpoint(self): return self.__has_finished # Method for checking whether the kart has hit an obstacle def hit_obstacle(self): return False # Method for checking whether the kart is currently on grass def on_grass(self, string): surface_type = self.__get_surface_type(string)[0] return isinstance(surface_type, Grass)
Here is the code of my grass.py
file that I import into the kart.py
file:
from surface import Surfaceimport mathclass Grass(Surface): FRICTION = 0.2 # coefficient of friction on grass def __init__(self, x, y): super().__init__(x, y, 'image_herbe2.png') # init in class surface # Method for calculating speed on this given surface # Static method, as it doesn't use its own attribute @staticmethod def vitesse(ac, v, vtheta): ac = ac - (v * Grass.FRICTION * math.cos(vtheta)) v = ac + (v * math.cos(vtheta)) return v # Method that returns a character string # Static method, as it doesn't use its own attribute @staticmethod def action(): return 'Grass'
Here is the code of the track.py
file that is import into the main.py
file:
from grass import Grassfrom checkpoint import Checkpointfrom boost import Boostfrom lava import Lavafrom road import Roadimport pygameBLOCK_SIZE = 50BACKGROUND_COLOR = (0, 0, 0)class Track(object):""" Class that manages the running and display of a game ===================================== WARNING ==================================== THE DEFINITION OF THIS CLASS SHOULD NOT BE MODIFIED (it is however possible to reorganize the imports at the top, but that's all). ====================================================================================""" # This dictionary provides the class and instantiation parameters # corresponding to each letter in the string describing the circuit char_to_track_element = {'G': {'class': Grass,'params': [] },'B': {'class': Boost,'params': [] },'C': {'class': Checkpoint, # le C indique le checkpoint d'id 0'params': [0] },'D': {'class': Checkpoint, # Le D indique le checkpoint d'id 1'params': [1] },'E': {'class': Checkpoint, # etc.'params': [2] },'F': {'class': Checkpoint,'params': [3] },'L': {'class': Lava,'params': [] },'R': {'class': Road,'params': [] } } def __init__(self, string, initial_position, initial_angle): self.string = string # The string describing the circuit # Start position and orientation self.__initial_position = initial_position self.__initial_angle = initial_angle # Instantiation of the objects making up the circuit # In passing, we can calculate the circuit's dimensions self.track_objects, self.width, self.height = self.parse_string(string) # We instantiate the kart controlled by the player self.__karts = [] @property def initial_position(self): return self.__initial_position @property def initial_angle(self): return self.__initial_angle @property def karts(self): return self.__karts def add_kart(self, kart): self.__karts.append(kart) def parse_string(self, string):""" This method instantiates components and calculates circuit dimensions :param string: The string describing the circuit :returns: A tuple (track_objects, width, height) track_objects: array of objects making up the circuit width: circuit width height: circuit height""" track_objects = [] width = 0 height = 0 # We use x and y to describe the coordinates in the string # x indicates column number # y indicates row number x = 0 y = 0 for c in string: # For each character we add an object to track_objects if c in Track.char_to_track_element.keys(): track_element = Track.char_to_track_element[c] track_class = track_element['class'] track_params = [x, y] + track_element['params'] track_objects.append(track_class(*track_params)) x += BLOCK_SIZE width += BLOCK_SIZE elif c == '\n': x = 0 y += BLOCK_SIZE width = 0 height += BLOCK_SIZE height += BLOCK_SIZE return track_objects, width, height def play(self):""" This method launches the game. If the player is an AI, the game will play itself. if it's a human, you have to play with the keyboard. :param string: The string describing the circuit. :returns: A tuple (track_objects, width, height) track_objects: array of objects making up the circuit width: circuit width height: circuit height""" # Pygame initialization pygame.init() # Screen creation screen = pygame.display.set_mode((self.width, self.height)) # A method is called to return the kart to its initial position for kart in self.karts: kart.reset(self.initial_position, self.initial_angle) # While loop for running the game running = True compteur = 0 while running: # Close window for event in pygame.event.get(): if event.type == pygame.QUIT: running = False # Clear everything from the screen screen.fill(BACKGROUND_COLOR) # Draw circuit elements for track_object in self.track_objects: track_object.draw(screen) for kart in self.karts: # Recover player command (human or AI) keys = kart.controller.move(self.string) if keys[pygame.K_UP]: kart.forward() if keys[pygame.K_DOWN]: kart.backward() if keys[pygame.K_LEFT]: kart.turn_left() if keys[pygame.K_RIGHT]: kart.turn_right() # Kart position and orientation are updated # This calculation can be based on the circuit description and/or what is displayed on the screen # Note that the kart is not yet displayed on the screen, so you can retrieve what's beneath it # under the kart easily kart.update_position(self.string, screen) # We draw the karts if not kart.has_finished: kart.draw(screen) # Check if all karts have crossed the finish line if all([k.has_finished for k in self.karts]): running = False # Update pygame display pygame.display.flip() # Increase counter compteur += 1 print("Finished in", compteur, "etapes !") # Close window at end of circuit pygame.quit() return compteur
Here is the boost.py
file that is import into the track.py
file:
from surface import Surfaceclass Boost(Surface): VITESSE = 25 def __init__(self, x, y): super().__init__(x, y, 'image_boost.png') # Method for calculating speed on this given surface @staticmethod def vitesse(ac, v, vtheta) : v = Boost.VITESSE return v # Method that returns a character string @staticmethod def action(): return 'Boost'
Here is the checkpoint.py
file that is import into the track.py
file:
from surface import Surfaceimport mathclass Checkpoint(Surface): FRICTION = 0.02 def __init__(self, x, y, checkpoint_id): super().__init__(x, y, 'ligne_arrivee.png') # Method for calculating speed on this given surface @staticmethod def vitesse(ac, v, vtheta): ac = ac - (v * Checkpoint.FRICTION * math.cos(vtheta)) v = ac + (v * math.cos(vtheta)) return v # Method that returns a character string @staticmethod def action(): return 'Checkpoint'
Here is the human.py
file that is import into the main.py
file:
import timeimport pygameclass Human(): def __init__(self): self.kart = None def move(self, string): time.sleep(0.02) return pygame.key.get_pressed()
Here is the code of the lava.py
file that is import into the track.py
file:
from surface import Surfaceclass Lava(Surface): def __init__(self, x, y): super().__init__(x, y, 'image_lave.png') # Method for calculating speed on this given surface @staticmethod def vitesse(ac, v, vtheta): v = 0 return v # Method that returns a character string @staticmethod def action(): return 'Lava'
Here is the code of the road.py
file that is import into the track.py
file:
import mathfrom surface import Surfaceclass Road(Surface): FRICTION = 0.02 def __init__(self, x, y): super().__init__(x, y, 'image_road.png') # Method for calculating speed on this given surface @staticmethod def vitesse(ac, v, vtheta): ac = ac - (v * Road.FRICTION * math.cos(vtheta)) v = ac + (v * math.cos(vtheta)) return v # Method that returns a character string @staticmethod def action(): return 'Road'
Here is the surface.py
file:
import pygamefrom abc import ABC, abstractmethodBLOCK_SIZE = 50 # block size in pixelsclass Surface(ABC): WIDTH = BLOCK_SIZE # surface width HEIGHT = BLOCK_SIZE # surface height # Create surface with image def __init__(self, x, y, image_circuit): self.__x = x self.__y = y self.__image_circuit = image_circuit self.__rect = pygame.Rect(self.__x, self.__y, Surface.WIDTH, Surface.HEIGHT) self.__image = pygame.image.load(image_circuit) # load image # adapts image to desired scale self.__image = pygame.transform.scale(self.__image, (Surface.WIDTH, Surface.HEIGHT)) # Function to draw surface on screen def draw(self, screen): screen.blit(self.__image, self.__rect) # Calculated speed is specific to each surface @abstractmethod def vitesse(self, ac, v, vtheta): pass # This method just returns a special action to execute if the surface has one # It returns a string defining the action @abstractmethod def action(self): pass
Finally, here's the code for my main.py
file, which is used to launch the game:
from track import Trackfrom ai import AIfrom human import Humanfrom kart import Kartimport pygame# The string describing the terrainstring = """GGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGFFFFGGGGGGGGGGGGGGGGRRRRGGRRRRGGGGGGGGGGGGGGGGDDDDGGRRRRGGGGGLLLLLLGGGGGGGRRGGRRRRGGGGGLLLLLLGGGGGRRRRGGBBBBGGGGGLLLLLLGGGGGRRRRGGRRRRGGGGGLLLLLLGGGGGRRRRGGRRRRRERRRRRRRBRRRRRRRRLLGGRRRRRERRRRRRRBRRRRRRRRRRGGLRRRRERRRRRRRBRRRRRRRRRRGGLLRRRERRRRRRRBRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGG"""# If we're using the go-kart image, we use the function below to set up the pygame interface# The kart's initial position and orientation#if __name__ == "__main__": #pygame.display.set_mode((800, 600)) # Example of display mode definition# Initial position and orientation of kartinitial_position = [100, 150]initial_angle = 0controller = AI() # or Human()"""==================== WARNING =====================You must not modify these four lines of code ===================================================="""kart = Kart(controller)track = Track(string, initial_position, initial_angle)track.add_kart(kart)track.play()