OpenAI gym error -> AttributeError: 'dict' object has no attribute 'all'

import osimport tensorflow as tfimport gymimport numpy as npimport matplotlib.pyplot as pltfrom gym import envsprint(envs.registry.all())# TODO: Load an environmentenv = gym.make("MsPacman-ram-v0")# TODO: Print observation and action spacesprint(env.observation_space)print(env.action_space)# TODO Build the policy gradient neural networkclass Agent:    def __init__(self, num_actions, state_size):        initializer = tf.contrib.layers.xavier_initializer()        self.input_layer = tf.placeholder(dtype=tf.float32, shape=[None, state_size])        # Neural net starts here        hidden_layer = tf.layers.dense(self.input_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)        hidden_layer_2 = tf.layers.dense(hidden_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)        # Output of neural net        dropout2 = tf.layers.dropout(hidden_layer_2, rate=0.6, training=True)        out = tf.layers.dense(dropout2, num_actions, activation=None)        self.outputs = tf.nn.softmax(out)        self.choice = tf.argmax(self.outputs, axis=1)        # Training Procedure        self.rewards = tf.placeholder(shape=[None, ], dtype=tf.float32)        self.actions = tf.placeholder(shape=[None, ], dtype=tf.int32)        one_hot_actions = tf.one_hot(self.actions, num_actions)        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=one_hot_actions)        self.loss = tf.reduce_mean(cross_entropy * self.rewards)        self.gradients = tf.gradients(self.loss, tf.trainable_variables())        # Create a placeholder list for gradients        self.gradients_to_apply = []        for index, variable in enumerate(tf.trainable_variables()):            gradient_placeholder = tf.placeholder(tf.float32)            self.gradients_to_apply.append(gradient_placeholder)        # Create the operation to update gradients with the gradients placeholder.        optimizer = tf.train.AdamOptimizer(learning_rate=2e-2)        self.update_gradients = optimizer.apply_gradients(zip(self.gradients_to_apply, tf.trainable_variables()))# TODO Create the discounted and normalized rewards functiondiscount_rate = 0.95def discount_normalize_rewards(rewards):    discounted_rewards = np.zeros_like(rewards)    total_rewards = 0    for i in reversed(range(len(rewards))):        total_rewards = total_rewards * discount_rate + rewards[i]        discounted_rewards[i] = total_rewards    discounted_rewards -= np.mean(discounted_rewards)    discounted_rewards /= np.std(discounted_rewards)    return discounted_rewards# Loops the entire process, including the testingfor i in range(10):    # TODO Create the training loop    tf.reset_default_graph()    # Modify these to match shape of actions and states in your environment    num_actions = 7    state_size = 128    path = "./cartpole-pg/"    training_episodes = 50    max_steps_per_episode = 10000    episode_batch_size = 10    agent = Agent(num_actions, state_size)    init = tf.global_variables_initializer()    saver = tf.train.Saver(max_to_keep=2)    if not os.path.exists(path):        os.makedirs(path)    with tf.Session() as sess:        sess.run(init)        total_episode_rewards = []        # Create a buffer of 0'd gradients        gradient_buffer = sess.run(tf.trainable_variables())        for index, gradient in enumerate(gradient_buffer):            gradient_buffer[index] = gradient * 0        for episode in range(training_episodes):            state = env.reset()            episode_history = []            episode_rewards = 0            for step in range(max_steps_per_episode):                if episode % 10 == 0:                    env.render()                # Get weights for each action                action_probabilities = sess.run(agent.outputs, feed_dict={agent.input_layer: [state]})                action_choice = np.random.choice(range(num_actions), p=action_probabilities[0])                state_next, reward, done, _ = env.step(action_choice)                episode_history.append([state, action_choice, reward, state_next])                state = state_next                episode_rewards += reward                if done or step + 1 == max_steps_per_episode:                    total_episode_rewards.append(episode_rewards)                    episode_history = np.array(episode_history)                    episode_history[:, 2] = discount_normalize_rewards(episode_history[:, 2])                    ep_gradients = sess.run(agent.gradients, feed_dict={agent.input_layer: np.vstack(episode_history[:, 0]),                                                                        agent.actions: episode_history[:, 1],                                                                        agent.rewards: episode_history[:, 2]                                                                    })                # add the gradients to the grad buffer:                    for index, gradient in enumerate(ep_gradients):                        gradient_buffer[index] += gradient                    break            if episode % episode_batch_size == 0:                feed_dict_gradients = dict(zip(agent.gradients_to_apply, gradient_buffer))                sess.run(agent.update_gradients, feed_dict=feed_dict_gradients)                for index, gradient in enumerate(gradient_buffer):                    gradient_buffer[index] = gradient * 0            if episode % 10 == 0:                saver.save(sess, path +"pg-checkpoint", episode)                print("Average reward / 10 eps: " + str(np.mean(total_episode_rewards[-100:])))    # TODO Create the testing loop    testing_episodes = 1    with tf.Session() as sess:        checkpoint = tf.train.get_checkpoint_state(path)        saver.restore(sess, checkpoint.model_checkpoint_path)        for episode in range(testing_episodes):            state = env.reset()            episode_rewards = 0            for step in range(max_steps_per_episode):                env.render()                # Get Action                action_argmax = sess.run(agent.choice, feed_dict={agent.input_layer: [state]})                action_choice = action_argmax[0]                state_next, reward, done, _ = env.step(action_choice)                state = state_next                episode_rewards += reward                if done or step + 1 == max_steps_per_episode:                    print("Rewards for episode " + str(episode) +": " + str(episode_rewards))                    break

This is the code and the error is:

AttributeError: 'dict' object has no attribute 'all'

I am trying to train an AI that plays Pacman.

OpenAI gym error -> AttributeError: 'dict' object has no attribute 'all'

Trending Articles

Bath man appears in court charged with attempted murder of a man...

MACLEAN, Allan

Black Angus Grilled Artichokes

Practice Sheet of Right form of verbs for HSC Students

Police blotter for Jan. 12

99 God Status for Whatsapp, Facebook

Rajasthan Board 12th Science Result 2018 name wise- RBSE 12th commerce result...

Notorious Naushad of Ippa gang nabbed

Child Kidnapping: Amy McNeil was kidnapped on her way to school by 5 adults;...

Sonible Smartlimit v1.1.5-R2R

NCERT Solutions for Class 9th Sanskrit Chapter 3 पाथेयम्

मतलबी दोस्त स्टेट्स | Matlabi Dost Status in Hindi – Selfish Friends Status

Arrow Flash 2 – Sinhala Dubbed – Episode 23 – 20th March 2016

[GET] AI Traffic Goldmine

[E² Plugin] HDF-Radio

Universal Multi-Patch v1.3 By RADIXX11

IWAN – Thanks and Praise ( Throw Back Thursday )

RONALD P SONDERGAARD Arrested by Miami-Dade County Corrections on Mar 03, 2017

मुख मैथुन से उठाएं सेक्स का भरपूर मज़ा, जानें क्या है इसका सही तरीकामुख मैथुन...

HSSC Excise & Taxation Inspector Result 2017 Scorecard/ Category Wise Merit List