blotto_2players.py

import numpy as np
from utils import *
import os

'''In this blotto game, '''
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

# ------------------hypers------------------- #
batch_size = 100
game_dimension = 5
epoch_num = 2000
inner_loop = 1
player_num = 2
network_input_dimension = 20
# ------------------------------------------- #

class Blotto(object):
    def __init__(self, sess):
        self.model_name = 'Blotto'
        self.player_num = player_num
        self.epoch_num = epoch_num
        self.sess = sess
        self.inner_loop = inner_loop
        self.player_network_dict = {}
        self.sdim = network_input_dimension
        self.batch_size = batch_size

    def player_network(self, scope):
        random_sample = tf.random_uniform(shape = [self.batch_size, self.sdim], minval = -1.0, maxval = 1.0)
        # self.random_sample = tf.placeholder(shape = [self.batch_size, self.sdim], dtype = tf.float32, name = scope + '_input')
        self.W1 = tf.Variable(initial_value = tf.random_uniform(shape = [self.sdim, 2 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_W1')
        self.b1 = tf.Variable(initial_value = tf.random_uniform(shape = [2 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_b1')
        hidden_layer_1 = tf.nn.tanh(tf.matmul(random_sample, self.W1) + tf.tile(tf.expand_dims(self.b1, axis = 0), [self.batch_size, 1]))
        self.W2 = tf.Variable(initial_value = tf.random_uniform(shape = [2 * self.sdim, 4 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_W2')
        self.b2 = tf.Variable(initial_value = tf.random_uniform(shape = [4 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_b2')
        hidden_layer_2 = tf.nn.tanh(tf.matmul(hidden_layer_1, self.W2) + tf.tile(tf.expand_dims(self.b2, axis = 0), [self.batch_size, 1]))
        self.W3 = tf.Variable(initial_value = tf.random_uniform(shape = [4 * self.sdim, 16 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_W3')
        self.b3 = tf.Variable(initial_value = tf.random_uniform(shape = [16 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_b3')
        hidden_layer_3 = tf.nn.tanh(tf.matmul(hidden_layer_2, self.W3) + tf.tile(tf.expand_dims(self.b3, axis = 0), [self.batch_size, 1]))
        self.W4 = tf.Variable(initial_value = tf.random_uniform(shape = [16 * self.sdim, 16 * self.sdim], minval = -0.1, maxval = 0.1), name = scope + '_W4')
        self.b4 = tf.Variable(initial_value = tf.random_uniform(shape = [16 * self.sdim], minval = -0.1, maxval = 0.1), name = scope + '_b4')
        hidden_layer_4 = tf.nn.relu(tf.matmul(hidden_layer_3, self.W4) + tf.tile(tf.expand_dims(self.b4, axis = 0), [self.batch_size, 1]))
        self.W5 = tf.Variable(initial_value = tf.random_uniform(shape = [16 * self.sdim, 4 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_W5')
        self.b5 = tf.Variable(initial_value = tf.random_uniform(shape = [4 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_b5')
        hidden_layer_5 = tf.nn.tanh(tf.matmul(hidden_layer_4, self.W5) + tf.tile(tf.expand_dims(self.b5, axis = 0), [self.batch_size, 1]))
        self.W6 = tf.Variable(initial_value = tf.random_uniform(shape = [4 * self.sdim, 2 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_W6')
        self.b6 = tf.Variable(initial_value = tf.random_uniform(shape = [2 * self.sdim], minval = -0.2, maxval = 0.2), name = scope + '_b6')
        hidden_layer_6 = tf.nn.tanh(tf.matmul(hidden_layer_5, self.W6) + tf.tile(tf.expand_dims(self.b6, axis = 0), [self.batch_size, 1]))
        self.W7 = tf.Variable(initial_value = tf.random_uniform(shape = [2 * self.sdim, game_dimension], minval = -0.2, maxval = 0.2), name = scope + '_W7')
        self.b7 = tf.Variable(initial_value = tf.random_uniform(shape = [game_dimension], minval = -0.2, maxval = 0.2), name = scope + '_b7')
        hidden_layer_7 = tf.matmul(hidden_layer_6, self.W7) + tf.tile(tf.expand_dims(self.b7, axis = 0), [self.batch_size, 1])

        output_layer = tf.nn.softmax(hidden_layer_7, axis = 1)

        return output_layer

    def player_network_setting(self):
        self.player_network_dict0 = self.player_network(scope = 'player_number_' + str(1))
        self.player_network_dict1 = self.player_network(scope = 'player_number_' + str(2))

    def player_network_local(self, scope):
        if scope == 'player_number_1':
            L = self.expected_utility1
        else:
            L = self.expected_utility2
        with tf.name_scope(scope):
            random_sample = tf.random_uniform(shape = [self.batch_size, self.sdim], minval = -1.0, maxval = 1.0)
            W1_ = self.W1 - self.Lambda * tf.gradients(L, self.W1)[0]
            b1_ = self.b1 - self.Lambda * tf.gradients(L, self.b1)[0]
            hidden_layer_1 = tf.nn.tanh(tf.matmul(random_sample, W1_) + tf.tile(tf.expand_dims(b1_, axis = 0), [self.batch_size, 1]))
            W2_ = self.W2 - self.Lambda * tf.gradients(L, self.W2)[0]
            b2_ = self.b2 - self.Lambda * tf.gradients(L, self.b2)[0]
            hidden_layer_2 = tf.nn.tanh(tf.matmul(hidden_layer_1, W2_) + tf.tile(tf.expand_dims(b2_, axis = 0), [self.batch_size, 1]))
            W3_ = self.W3 - self.Lambda * tf.gradients(L, self.W3)[0]
            b3_ = self.b3 - self.Lambda * tf.gradients(L, self.b3)[0]
            hidden_layer_3 = tf.nn.tanh(tf.matmul(hidden_layer_2, W3_) + tf.tile(tf.expand_dims(b3_, axis = 0), [self.batch_size, 1]))
            W4_ = self.W4 - self.Lambda * tf.gradients(L, self.W4)[0]
            b4_ = self.b4 - self.Lambda * tf.gradients(L, self.b4)[0]
            hidden_layer_4 = tf.nn.relu(tf.matmul(hidden_layer_3, W4_) + tf.tile(tf.expand_dims(b4_, axis = 0), [self.batch_size, 1]))
            W5_ = self.W5 - self.Lambda * tf.gradients(L, self.W5)[0]
            b5_ = self.b5 - self.Lambda * tf.gradients(L, self.b5)[0]
            hidden_layer_5 = tf.nn.tanh(tf.matmul(hidden_layer_4, W5_) + tf.tile(tf.expand_dims(b5_, axis = 0), [self.batch_size, 1]))
            W6_ = self.W6 - self.Lambda * tf.gradients(L, self.W6)[0]
            b6_ = self.b6 - self.Lambda * tf.gradients(L, self.b6)[0]
            hidden_layer_6 = tf.nn.tanh(tf.matmul(hidden_layer_5, W6_) + tf.tile(tf.expand_dims(b6_, axis = 0), [self.batch_size, 1]))
            W7_ = self.W7 - self.Lambda * tf.gradients(L, self.W7)[0]
            b7_ = self.b7 - self.Lambda * tf.gradients(L, self.b7)[0]
            hidden_layer_7 = tf.nn.tanh(tf.matmul(hidden_layer_6, W7_) + tf.tile(tf.expand_dims(b7_, axis = 0), [self.batch_size, 1]))

            output_layer = tf.nn.softmax(hidden_layer_7, axis=1)

        return output_layer


    def build_model(self):
        self.Lambda = tf.placeholder(dtype = tf.float32)
        player1_action_batch = self.player_network_dict0
        player2_action_batch = self.player_network_dict1


        score = tf.reduce_mean(tf.reduce_sum(tf.nn.tanh(player1_action_batch - player2_action_batch), axis = 1))
        self.expected_utility1 = score
        self.expected_utility2 = -score

        self.player1_action_local = self.player_network_local(scope='player_number_1')
        self.player2_action_local = self.player_network_local(scope='player_number_2')

        local_score1 = tf.reduce_mean(tf.reduce_sum(tf.nn.tanh(self.player1_action_local - player2_action_batch), axis = 1))
        local_score2 = tf.reduce_mean(tf.reduce_sum(tf.nn.tanh(player1_action_batch - self.player2_action_local), axis = 1))
        self.local_expected_utility1 = local_score1
        self.local_expected_utility2 = -local_score2

        self.loss_function = tf.abs(self.expected_utility1 + self.expected_utility2 - self.local_expected_utility1 - self.local_expected_utility2)

    def optimize_model(self):
        file_out_min = open('output_blotto_3_3.txt','w')
        self.optimizer = tf.train.MomentumOptimizer(1e-3,0.9)
        self.minimize = self.optimizer.minimize(self.loss_function, var_list = tf.trainable_variables())
        tf.global_variables_initializer().run()

        Lambda = 1e-4
        for epoch in range(self.epoch_num):
            self.sess.run(self.minimize, feed_dict = {self.Lambda : Lambda})
            loss = self.sess.run(self.loss_function, feed_dict = {self.Lambda : Lambda})
            file_out_min.write(str(loss) + '\n')
            print('------------------ epoch '+str(epoch)+' has finished ---------------')
            print(loss)


def main():
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        # ------------game setting------------- #
        blotto = Blotto(sess)
        # ------------------------------------- #

        blotto.player_network_setting()
        blotto.build_model()
        blotto.optimize_model()

if __name__ == '__main__':
    main()