Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 13891

Why does my kart keep going into the grass and not turn?

$
0
0

I'm trying to implement a Q-learning table so that the kart moves by itself, avoiding grass or lava. However, I don't understand why my kart keeps running into the grass and doesn't turn while I reward him if he reaches checkpoints and avoids obstacles such as the grass. I can't see what the problem is.

Here's the code in my ai.py file where I've implemented Q-Learning:

import mathimport pygameimport numpy as npMAX_ANGLE_VELOCITY = 0.05BLOCK_SIZE = 50class AI():    def __init__(self):        self.kart = None        self.q_table = {} # Q table initialization        self.learning_rate = 0.5 # Learning rate        self.discount_factor = 0.95 # Discount factor for future rewards        self.epsilon = 0.1 # Exploration rate        self.epsilon_decay = 0.01 # Epsilon decay rate        self.current_string = None    def get_state(self, string):        surface_type = self.kart._Kart__get_surface_type(string)        state = (self.kart.x, self.kart.y, self.kart._Kart__v, self.kart._Kart__vtheta, self.kart.theta, self.kart.next_checkpoint_id, surface_type)        return state    def get_possible_actions(self):        # The action space corresponds to the possible commands the kart can take        actions = ['up', 'down', 'left', 'right']        return actions    def execute_action(self, action):"""        This method takes an action, updates the game state and returns a reward"""        # Execute the action        if action == 'up':            self.kart.forward()        elif action == 'down':            self.kart.backward()        elif action == 'left':            self.kart.turn_left()        elif action == 'right':            self.kart.turn_right()        # Get reward        reward = self.get_reward()        return reward    def get_reward(self):        # The reward function is based on the kart's progress through the game.        # A positive reward is given when the kart gets closer to the next checkpoint, and a negative reward when it moves further away.        # A negative reward is also given when the kart hits an obstacle.        if self.kart.reached_checkpoint():            reward = 100        elif self.kart.hit_obstacle():            reward = -100        elif self.kart.on_grass(self.current_string):            reward = -100        else:            reward = -1  # A small negative reward for each time step to encourage faster execution        return reward    def update_q_values(self, old_state, action, reward, new_state):"""        Implementation of the Q learning formula here to update Q values        Q(s, a) := Q(s, a) +α * (r +γ * max(Q(s', a') - Q(s, a)))"""        old_q_value = self.q_table.get((old_state, action), 0)        max_new_q_value = max([self.q_table.get((new_state, a), 0) for a in self.get_possible_actions()])        # Learning formula Q        new_q_value = old_q_value + self.learning_rate * (reward + self.discount_factor * max_new_q_value - old_q_value)        self.q_table[(old_state, action)] = new_q_value    def choose_action(self, state, string):"""        Choose an action based on the relative angle to the next checkpoint and Q-values."""        # First find the checkpoint's position        if self.kart.next_checkpoint_id == 0:            char = 'C'        elif self.kart.next_checkpoint_id == 1:            char = 'D'        elif self.kart.next_checkpoint_id == 2:            char = 'E'        elif self.kart.next_checkpoint_id == 3:            char = 'F'        # We use x and y to describe the coordinates in the string        # x indicates column number        # y indicates row number        x, y = 0, 0        for c in string:            # If we find the correct character for the checkpoint, we stop            if c == char:                break            # If we find the return character "\n" we increment y and reset x to 0            # Otherwise increment x            if c == "\n":                y += 1                x = 0            else:                x += 1        next_checkpoint_position = [x * BLOCK_SIZE + .5 * BLOCK_SIZE, y * BLOCK_SIZE + .5 * BLOCK_SIZE]        # Next, find the angle to the checkpoint        relative_x = next_checkpoint_position[0] - self.kart.x        relative_y = next_checkpoint_position[1] - self.kart.y        # The arctangent function is used to calculate the angle of the vector [relative_x, relative_y].        next_checkpoint_angle = math.atan2(relative_y, relative_x)        # The relative angle corresponds to the rotation the kart must make to face the checkpoint.        # Apply the operation (a + pi) % (2*pi) - pi to obtain an angle between -pi and pi        relative_angle = (next_checkpoint_angle - self.kart.theta + math.pi) % (2 * math.pi) - math.pi        # Choose an action based on the relative angle and Q-values        if np.random.rand() < self.epsilon:            # Exploration: Choose an action based on the relative angle            if relative_angle > MAX_ANGLE_VELOCITY * 2:                action = 'right'            elif relative_angle < -MAX_ANGLE_VELOCITY * 2:                action = 'left'            else:                action = 'up'        else:            # Exploitation: Choose the action with the highest Q-value            q_values = {action: self.q_table.get((state, action), 0) for action in self.get_possible_actions()}            action = max(q_values, key=q_values.get)        return action    def move(self, string):        self.current_string = string        # Get current state        state = self.get_state(string)        # Choose action        action = self.choose_action(state, string)        # Execute action and obtain reward        # We have a method that takes an action, updates the game state and returns a reward        reward = self.execute_action(action)        # Get a new state        new_state = self.get_state(string)        # Update Q values        self.update_q_values(state, action, reward, new_state)        # Epsilon decay        self.epsilon *= self.epsilon_decay        # Conversion of selected action        command = [False, False, False, False]        if action == 'up':            command[0] = True        elif action == 'down':            command[1] = True        elif action == 'left':            command[2] = True        elif action == 'right':            command[3] = True        key_list = [pygame.K_UP, pygame.K_DOWN, pygame.K_LEFT, pygame.K_RIGHT]        keys = {key: command[i] for i, key in enumerate(key_list)}        return keys

Here is the code of my kart.py in which I have implement the behaviour of the kart:

import pygameimport mathfrom track import Trackfrom surface import Surfacefrom grass import GrassBLOCK_SIZE = 50 # Block size in pixelsclass Kart:    # Global constants unique to Kart    MAX_ANGLE_VELOCITY = 0.05 # Max angular speed    MAX_ACCELERATION = 0.25 # Max acceleration    HEIGHT = 60 # Kart size    WIDTH = 80      # Initialize kart with controller     def __init__(self, controller):        self.controller = controller # Controller for kart        self.controller.kart = self  # Ensures controller refers to this kart instance        self.kart_image = pygame.image.load('kart_v2.png')        self.kart_image = pygame.transform.scale(self.kart_image, (Kart.WIDTH, Kart.HEIGHT))    # Reset kart to specific position and orientation    def reset(self, initial_position, initial_orientation):       self.__x, self.__y = initial_position # Initial kart position       self.__theta = initial_orientation  # Initial kart orientation (in radians)       self.__v = 0 # Initial kart speed = 0       self.__vtheta = 0 # Initial angular speed = 0       self.__has_finished = False       self.__ac = 0  # Initial kart acceleration                                  self.__x_check, self.__y_check = initial_position # Position of first checkpoint position       self.__theta_check = initial_orientation # Orientation at first checkpoint       self.__next_checkpoint_id = 0        # Variable that identifies the finish line, set to self.__next_checkpoint_id first       self.__last_checkpoint = self.__next_checkpoint_id        # Boolean attributes of check methods to move, if true, kart executes this method       self.__foward = False        self.__backward = False       self.__turn_left = False       self.__turn_right = False       self.__first_iter = True     # getters     # All private variables that are read outside the class by Kart objects    # As in track (for self.__has_finished) or in AI for positions and orientations    @property    def x(self):        return self.__x    @property    def y(self):        return self.__y    @property    def theta(self):        return self.__theta    @property    def next_checkpoint_id(self):        return self.__next_checkpoint_id    @property    def has_finished(self):        return self.__has_finished    # Method for moving the kart    # Accelerates kart forward    def forward(self):        self.__ac = Kart.MAX_ACCELERATION        self.__foward = True    # Moves kart backward     def backward(self):        self.__ac = -Kart.MAX_ACCELERATION        self.__backward = True    # Turns kart left    def turn_left(self):         self.__vtheta = -Kart.MAX_ANGLE_VELOCITY         self.__turn_left = True    # Turns the kart to the right      def turn_right(self):        self.__vtheta = Kart.MAX_ANGLE_VELOCITY        self.__turn_right = True     # Update kart position according to its current state and the circuit    def update_position(self, string, screen):        # Determines the number of checkpoints on the first iteration, the aim being to know        # the id of the last checkpoint, i.e. the finish line        if self.__first_iter:            self.__combien_checkpoint(string)            self.__first_iter = False        # Determines the surface (class and id) the kart is currently on               surface_type, surface_id = self.__get_surface_type(string)         # If asked to turn left and right, the kart doesn't move         if self.__turn_left and self.__turn_right:            self.__turn_right = False            self.__turn_left = False            self.__vtheta = 0        # Calculation of current kart angle with angular velocity        self.__theta += self.__vtheta        # If you ask the kart to move forward and backward at the same time, it won't move.         if self.__backward and self.__foward:            self.__backward = False            self.__foward = False            self.__ac = 0        # Calculation of speed as a function of surface area        self.__v = surface_type.vitesse(self.__ac, self.__v, self.__vtheta)        # Resets acceleration and angular velocity after use        self.__ac = 0        self.__vtheta = 0        # Update kart position according to speed and angle theta        self.__x += self.__v * math.cos(self.__theta)        self.__y += self.__v * math.sin(self.__theta)        # Variable that introduces whether there's a special action to do, like falling into a lava        # Or passing a checkpoint        action = surface_type.action()         # Checks if the kart passes a checkpoint        if action == 'Checkpoint':             self.__pass_the_right_checkpoint(surface_id)            # Checks if kart touches lava and resets if necessary        elif action == 'Lava':             self.__reset_to_previous_checkpoint()        # Checks if kart is still within track limits        # If not, returns it to the position of the last checkpoint        self.__check_screen_limit(string)         # Draws kart on screen to end at new position        self.draw(screen)    # Method resets kart to last registered checkpoint    def __reset_to_previous_checkpoint(self):        self.__x, self.__y, self.__theta = self.__x_check, self.__y_check, self.__theta_check        self.__v = 0        self.__vtheta = 0    # Method saves current position as last checkpoint    def __update_checkpoint_position(self):        self.__x_check, self.__y_check, self.__theta_check = self.__x, self.__y, self.__theta    # Method that determines how many checkpoints there are and the ID of the last checkpoint     def __combien_checkpoint(self, string):        checkpoints = {'C': False, 'D': False, 'E': False, 'F': False}        # It scans the character string and determines how many different checkpoints there are        for char in string:            if char in checkpoints:                checkpoints[char] = True          # Assigns the incoming line ID according to how many checkpoints there are        # We assume that if there's only one checkpoint, it must be "C".        # If there are two, it must be 'C' and 'D' and so on...        if checkpoints['C'] and checkpoints['D'] and checkpoints['E'] and checkpoints['F']:            self.__last_checkpoint_id = 3        elif checkpoints['E'] and checkpoints['D'] and checkpoints['C']:            self.__last_checkpoint_id = 2        elif checkpoints['D'] and checkpoints['C']:            self.__last_checkpoint_id = 1        else:            self.__last_checkpoint_id = 0    # Method that checks whether the passed checkpoint corresponds to the next expected checkpoint    def __pass_the_right_checkpoint(self, checkpoint_id):        # Finish the race if this is the last checkpoint        if checkpoint_id == self.__last_checkpoint_id and self.__last_checkpoint_id == self.__next_checkpoint_id:            self.__has_finished = True        # If not, it saves the next checkpoint id if it's the right one        elif checkpoint_id == self.__next_checkpoint_id:        # Saves current position and orientation as last checkpoint            self.__update_checkpoint_position()        # Met à jour l'ID du prochain checkpoint attendu            self.__next_checkpoint_id += 1    # Method that retrieves the surface type at the kart's current position            def __get_surface_type(self, string):        # Split string to determine dimensions        terrain = string.split('\n')        # Then determine the string index, to find out the character        # This calculation may give some approximation, as it rounds to the nearest integer        ligne = int(self.__y / BLOCK_SIZE)        col = int(self.__x / BLOCK_SIZE)        if ligne < len(terrain) and col < len(terrain[ligne]):            caractere = terrain[ligne][col]            # We use the dictionary available in Track to retrieve the class and id of the character             # on which the kart is located            track_element = Track.char_to_track_element[caractere]            track_class = track_element['class']            if 'params' in track_element and track_element['params']:                track_id = track_element['params'][0]            else:                track_id = None            return track_class, track_id    # Method that checks whether the kart is still on the track (game screen). If not, it resets to the last checkpoint.      def __check_screen_limit(self, string):        lines = string.split('\n')        width = len(lines[0]) * BLOCK_SIZE # Calcul la largeur du track        height = len(lines) * BLOCK_SIZE  # Calcul la longueur du track        if self.__x < 0 or self.__x > width or self.__y < 0 or self.__y > height:             self.__reset_to_previous_checkpoint()    # Method that draws with the kart image    def draw(self, screen):        # Obtenir le rectangle de l'image pour le positionnement        kart_rect = self.kart_image.get_rect(center=(int(self.__x), int(self.__y)))        # Rotation de l'image en fonction de l'orientation du kart        rotated_kart_image = pygame.transform.rotate(self.kart_image, -math.degrees(self.__theta))         # Dessiner l'image sur l'écran        screen.blit(rotated_kart_image, kart_rect)    # Method for checking whether the kart has reached the checkpoint    def reached_checkpoint(self):        return self.__has_finished    # Method for checking whether the kart has hit an obstacle    def hit_obstacle(self):        return False    # Method for checking whether the kart is currently on grass    def on_grass(self, string):        surface_type = self.__get_surface_type(string)[0]        return isinstance(surface_type, Grass)

Here is the code of my grass.py file that I import into the kart.py file:

from surface import Surfaceimport mathclass Grass(Surface):    FRICTION = 0.2 # coefficient of friction on grass    def __init__(self, x, y):        super().__init__(x, y, 'image_herbe2.png') # init in class surface    # Method for calculating speed on this given surface    # Static method, as it doesn't use its own attribute    @staticmethod      def vitesse(ac, v, vtheta):        ac = ac - (v  * Grass.FRICTION * math.cos(vtheta))        v = ac + (v * math.cos(vtheta))        return v    # Method that returns a character string    # Static method, as it doesn't use its own attribute    @staticmethod    def action():        return 'Grass'

Here is the code of the track.py file that is import into the main.py file:

from grass import Grassfrom checkpoint import Checkpointfrom boost import Boostfrom lava import Lavafrom road import Roadimport pygameBLOCK_SIZE = 50BACKGROUND_COLOR = (0, 0, 0)class Track(object):"""    Class that manages the running and display of a game    ===================================== WARNING ====================================    THE DEFINITION OF THIS CLASS SHOULD NOT BE MODIFIED    (it is however possible to reorganize the imports at the top, but that's all).    ===================================================================================="""    # This dictionary provides the class and instantiation parameters    # corresponding to each letter in the string describing the circuit    char_to_track_element = {'G': {'class': Grass,'params': []        },'B': {'class': Boost,'params': []        },'C': {'class': Checkpoint,  # le C indique le checkpoint d'id 0'params': [0]        },'D': {'class': Checkpoint,  # Le D indique le checkpoint d'id 1'params': [1]        },'E': {'class': Checkpoint,  # etc.'params': [2]        },'F': {'class': Checkpoint,'params': [3]        },'L': {'class': Lava,'params': []        },'R': {'class': Road,'params': []        }    }    def __init__(self, string, initial_position, initial_angle):        self.string = string  # The string describing the circuit        # Start position and orientation        self.__initial_position = initial_position        self.__initial_angle = initial_angle        # Instantiation of the objects making up the circuit        # In passing, we can calculate the circuit's dimensions        self.track_objects, self.width, self.height = self.parse_string(string)        # We instantiate the kart controlled by the player        self.__karts = []    @property    def initial_position(self):        return self.__initial_position    @property    def initial_angle(self):        return self.__initial_angle    @property    def karts(self):        return self.__karts    def add_kart(self, kart):        self.__karts.append(kart)    def parse_string(self, string):"""        This method instantiates components and calculates circuit dimensions        :param string: The string describing the circuit        :returns: A tuple (track_objects, width, height)            track_objects: array of objects making up the circuit            width: circuit width            height: circuit height"""        track_objects = []        width = 0        height = 0        # We use x and y to describe the coordinates in the string        # x indicates column number        # y indicates row number        x = 0        y = 0        for c in string:            # For each character we add an object to track_objects            if c in Track.char_to_track_element.keys():                track_element = Track.char_to_track_element[c]                track_class = track_element['class']                track_params = [x, y] + track_element['params']                track_objects.append(track_class(*track_params))                x += BLOCK_SIZE                width += BLOCK_SIZE            elif c == '\n':                x = 0                y += BLOCK_SIZE                width = 0                height += BLOCK_SIZE        height += BLOCK_SIZE        return track_objects, width, height    def play(self):"""        This method launches the game. If the player is an AI, the game will play itself.        if it's a human, you have to play with the keyboard.        :param string: The string describing the circuit.        :returns: A tuple (track_objects, width, height)            track_objects: array of objects making up the circuit            width: circuit width            height: circuit height"""        # Pygame initialization        pygame.init()        # Screen creation        screen = pygame.display.set_mode((self.width, self.height))        # A method is called to return the kart to its initial position        for kart in self.karts:            kart.reset(self.initial_position, self.initial_angle)        # While loop for running the game        running = True        compteur = 0        while running:            # Close window            for event in pygame.event.get():                if event.type == pygame.QUIT:                    running = False            # Clear everything from the screen            screen.fill(BACKGROUND_COLOR)            # Draw circuit elements            for track_object in self.track_objects:                track_object.draw(screen)            for kart in self.karts:                # Recover player command (human or AI)                keys = kart.controller.move(self.string)                if keys[pygame.K_UP]:                    kart.forward()                if keys[pygame.K_DOWN]:                    kart.backward()                if keys[pygame.K_LEFT]:                    kart.turn_left()                if keys[pygame.K_RIGHT]:                    kart.turn_right()                # Kart position and orientation are updated                # This calculation can be based on the circuit description and/or what is displayed on the screen                # Note that the kart is not yet displayed on the screen, so you can retrieve what's beneath it                # under the kart easily                kart.update_position(self.string, screen)                # We draw the karts                if not kart.has_finished:                    kart.draw(screen)            # Check if all karts have crossed the finish line            if all([k.has_finished for k in self.karts]):                running = False            # Update pygame display            pygame.display.flip()            # Increase counter            compteur += 1        print("Finished in", compteur, "etapes !")        # Close window at end of circuit        pygame.quit()        return compteur

Here is the boost.py file that is import into the track.py file:

from surface import Surfaceclass Boost(Surface):    VITESSE = 25    def __init__(self, x, y):            super().__init__(x, y, 'image_boost.png')    # Method for calculating speed on this given surface    @staticmethod    def vitesse(ac, v, vtheta) :            v = Boost.VITESSE             return v    # Method that returns a character string    @staticmethod     def action():        return 'Boost'

Here is the checkpoint.py file that is import into the track.py file:

from surface import Surfaceimport mathclass Checkpoint(Surface):    FRICTION = 0.02    def __init__(self, x, y, checkpoint_id):        super().__init__(x, y, 'ligne_arrivee.png')    # Method for calculating speed on this given surface    @staticmethod    def vitesse(ac, v, vtheta):        ac = ac - (v  * Checkpoint.FRICTION * math.cos(vtheta))        v = ac + (v * math.cos(vtheta))        return v    # Method that returns a character string    @staticmethod    def action():        return 'Checkpoint'

Here is the human.py file that is import into the main.py file:

import timeimport pygameclass Human():    def __init__(self):        self.kart = None    def move(self, string):        time.sleep(0.02)        return pygame.key.get_pressed()

Here is the code of the lava.py file that is import into the track.py file:

from surface import Surfaceclass Lava(Surface):    def __init__(self, x, y):         super().__init__(x, y, 'image_lave.png')    # Method for calculating speed on this given surface    @staticmethod     def vitesse(ac, v, vtheta):        v = 0        return v     # Method that returns a character string    @staticmethod    def action():        return 'Lava'

Here is the code of the road.py file that is import into the track.py file:

import mathfrom surface import Surfaceclass Road(Surface):     FRICTION = 0.02    def __init__(self, x, y):         super().__init__(x, y, 'image_road.png')     # Method for calculating speed on this given surface    @staticmethod       def vitesse(ac, v, vtheta):        ac = ac - (v  * Road.FRICTION  * math.cos(vtheta))        v = ac + (v * math.cos(vtheta))        return v    # Method that returns a character string    @staticmethod    def action():        return 'Road'

Here is the surface.py file:

import pygamefrom abc import ABC, abstractmethodBLOCK_SIZE = 50 # block size in pixelsclass Surface(ABC):    WIDTH = BLOCK_SIZE # surface width    HEIGHT = BLOCK_SIZE # surface height    # Create surface with image    def __init__(self, x, y, image_circuit):            self.__x = x        self.__y = y        self.__image_circuit = image_circuit        self.__rect = pygame.Rect(self.__x, self.__y, Surface.WIDTH, Surface.HEIGHT)        self.__image = pygame.image.load(image_circuit) # load image        # adapts image to desired scale        self.__image = pygame.transform.scale(self.__image, (Surface.WIDTH, Surface.HEIGHT))     # Function to draw surface on screen    def draw(self, screen):        screen.blit(self.__image, self.__rect)    # Calculated speed is specific to each surface    @abstractmethod    def vitesse(self, ac, v, vtheta):        pass    # This method just returns a special action to execute if the surface has one    # It returns a string defining the action    @abstractmethod    def action(self):        pass

Finally, here's the code for my main.py file, which is used to launch the game:

from track import Trackfrom ai import AIfrom human import Humanfrom kart import Kartimport pygame# The string describing the terrainstring = """GGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGRRRRRRCRRRRRRRRRBRRRRRRRGGFFFFGGGGGGGGGGGGGGGGRRRRGGRRRRGGGGGGGGGGGGGGGGDDDDGGRRRRGGGGGLLLLLLGGGGGGGRRGGRRRRGGGGGLLLLLLGGGGGRRRRGGBBBBGGGGGLLLLLLGGGGGRRRRGGRRRRGGGGGLLLLLLGGGGGRRRRGGRRRRRERRRRRRRBRRRRRRRRLLGGRRRRRERRRRRRRBRRRRRRRRRRGGLRRRRERRRRRRRBRRRRRRRRRRGGLLRRRERRRRRRRBRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGG"""# If we're using the go-kart image, we use the function below to set up the pygame interface# The kart's initial position and orientation#if __name__ == "__main__":   #pygame.display.set_mode((800, 600))  # Example of display mode definition# Initial position and orientation of kartinitial_position = [100, 150]initial_angle = 0controller = AI() # or   Human()"""==================== WARNING =====================You must not modify these four lines of code ===================================================="""kart = Kart(controller)track = Track(string, initial_position, initial_angle)track.add_kart(kart)track.play()

Viewing all articles
Browse latest Browse all 13891

Trending Articles