Skip to content

Commit

Permalink
changes before updating likelihood function
Browse files Browse the repository at this point in the history
  • Loading branch information
kylajones committed May 31, 2024
1 parent a59b27a commit 19e9a35
Show file tree
Hide file tree
Showing 10 changed files with 1,266 additions and 49 deletions.
2 changes: 1 addition & 1 deletion linfa/models/discrepancy_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def genDataFile(self, dataFileNamePrefix='observations', use_true_model=True, st
coverage = def_out.repeat(1,num_observations)

for loopA in range(num_observations):
noise = torch.randn((len(coverage),1))*stdDev
noise = torch.randn((len(coverage),1)) * stdDev
coverage[:,loopA] = coverage[:,loopA] + noise.flatten()

if store:
Expand Down
76 changes: 32 additions & 44 deletions linfa/plot_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,61 +3,49 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Plot format
fs=8
plt.rc('font', family='serif')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.rc('text', usetex = False)

def plot_log(log_file,out_dir,fig_format='png',use_dark_mode=False):
from matplotlib.ticker import ScalarFormatter, MaxNLocator

# Set global plot settings
plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['lines.linewidth'] = 3
plt.rcParams['lines.markersize'] = 16
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['xtick.top'] = True
plt.rcParams['ytick.right'] = True
plt.rcParams['savefig.bbox'] = 'tight'

def plot_log(log_file,out_dir,fig_format='png', use_dark_mode=False):

log_data = np.loadtxt(log_file)

# Set dark mode
if(use_dark_mode):
plt.style.use('dark_background')

# loss profile
plt.figure(figsize=(2,2))
# plt.semilogy(log_data[:,1],log_data[:,2],'b-')
plt.plot(log_data[:,1],log_data[:,2],'b-')
plt.xlabel('Iterations',fontsize=fs)
plt.ylabel('log loss',fontsize=fs)
plt.gca().tick_params(axis='both', labelsize=fs)
plt.tight_layout()
plt.savefig(os.path.join(out_dir,'log_plot.'+fig_format),bbox_inches='tight',dpi=200)
plt.figure()
plt.plot(log_data[:, 1],log_data[:, 2],'b-')
plt.xlabel('Iterations')
plt.ylabel('Log Loss')
plt.savefig(os.path.join(out_dir,'log_plot.'+fig_format))
plt.close()

def plot_params(param_data,LL_data,idx1,idx2,out_dir,out_info,fig_format='png',use_dark_mode=False):
def plot_params(param_data,LL_data,idx1,idx2,out_dir,out_info,fig_format='png', use_dark_mode=False):

# Read data
param_data = np.loadtxt(param_data)
dent_data = np.loadtxt(LL_data)

# Set dark mode
if(use_dark_mode):
plt.style.use('dark_background')

# Plot figure
plt.figure(figsize=(3,2))
plt.scatter(param_data[:,idx1],param_data[:,idx2],s=1.5,lw=0,marker='o',c=np.exp(dent_data))
# plt.scatter(param_data[:,idx1],param_data[:,idx2],s=1.5,lw=0,marker='o')
plt.figure()
plt.scatter(param_data[:,idx1], param_data[:,idx2]/1000, lw = 0, s =7, marker = 'o', c = np.exp(dent_data))
plt.plot(1000, -21.0, 'r*')
plt.colorbar()
plt.gca().xaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1e'))
plt.gca().tick_params(axis='both', labelsize=fs)
plt.xlabel('$z_{K,'+str(idx1+1)+'}$',fontsize=fs)
plt.ylabel('$z_{K,'+str(idx2+1)+'}$',fontsize=fs)
# Set limits based on avg and std
avg_1 = np.mean(param_data[:,idx1])
std_1 = np.std(param_data[:,idx1])
avg_2 = np.mean(param_data[:,idx2])
std_2 = np.std(param_data[:,idx2])
plt.xlim([avg_1-3*std_1,avg_1+3*std_1])
plt.ylim([avg_2-3*std_2,avg_2+3*std_2])
plt.tight_layout()
plt.savefig(os.path.join(out_dir,'params_plot_' + out_info + '_'+str(idx1)+'_'+str(idx2)+'.'+fig_format),bbox_inches='tight',dpi=200)
plt.xlabel('Std. Pressure, '+r'$P_0$'+' [Pa]')
plt.ylabel('Ads. Energy, '+r'$E$'+' [kJ'+r'$\cdot$'+'mol'+r'$^{-1}$'+']')
plt.savefig(os.path.join(out_dir,'params_plot_' + out_info + '_'+str(idx1)+'_'+str(idx2)+'.'+fig_format))
plt.close()

def plot_outputs(sample_file,obs_file,idx1,idx2,out_dir,out_info,fig_format='png',use_dark_mode=False):
Expand Down
209 changes: 209 additions & 0 deletions linfa/tests/test_no_disc_TP15_gaussian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
from linfa.run_experiment import experiment
from linfa.transform import Transformation
from linfa.discrepancy import Discrepancy
import torch
import random
import numpy as np

# Import rcr model
from linfa.models.discrepancy_models import PhysChem

def run_test():

exp = experiment()
exp.name = "TP15_no_disc_gaussian"
exp.flow_type = 'realnvp' # str: Type of flow (default 'realnvp') # TODO: generalize to work for TP1
exp.n_blocks = 15 # int: Number of hidden layers
exp.hidden_size = 100 # int: Hidden layer size for MADE in each layer (default 100)
exp.n_hidden = 1 # int: Number of hidden layers in each MADE
exp.activation_fn = 'relu' # str: Activation function used (default 'relu')
exp.input_order = 'sequential' # str: Input oder for create_mask (default 'sequential')
exp.batch_norm_order = True # bool: Order to decide if batch_norm is used (default True)
exp.save_interval = 1000 # int: How often to sample from normalizing flow

exp.input_size = 2 # int: Dimensionalty of input (default 2)
exp.batch_size = 200 # int: Number of samples generated (default 100)
exp.true_data_num = 2 # double: Number of true model evaluted (default 2)
exp.n_iter = 10000 # int: Number of iterations (default 25001)
exp.lr = 0.001 # float: Learning rate (default 0.003)
exp.lr_decay = 0.9999 # float: Learning rate decay (default 0.9999)
exp.log_interal = 10 # int: How often to show loss stat (default 10)

exp.run_nofas = False # normalizing flow with adaptive surrogate
exp.surrogate_type = 'discrepancy' # type of surrogate we are using
exp.surr_pre_it = 1000 # int: Number of pre-training iterations for surrogate model
exp.surr_upd_it = 2000 # int: Number of iterations for the surrogate model update
exp.calibrate_interval = 1000 #:int: How often the surrogate model is updated

exp.annealing = False
exp.budget = 216 # int: Total number of true model evaulations
exp.surr_folder = "./"
exp.use_new_surr = True

exp.output_dir = './results/' + exp.name
exp.log_file = 'log.txt'

exp.seed = 35435 # int: Random seed used
exp.n_sample = 10000 # int: Batch size to generate final results/plots
exp.no_cuda = True # Running on CPU by default but teste on CUDA

exp.optimizer = 'RMSprop'
exp.lr_scheduler = 'ExponentialLR'

exp.device = torch.device('cuda:0' if torch.cuda.is_available() and not exp.no_cuda else 'cpu')

# Define transformation
trsf_info = [['tanh', -30.0, 30.0, 500.0, 1500.0],
['tanh', -30.0, 30.0, -30000.0, -15000.0]]
trsf = Transformation(trsf_info)

# Apply the transformation
exp.transform = trsf

# Add temperatures and pressures for each evaluation
variable_inputs = [[350.0, 400.0, 450.0],
[1.0, 2.0, 3.0, 4.0, 5.0]]

# Define model
langmuir_model = PhysChem(variable_inputs)

# Assign as experiment model
exp.model = langmuir_model

# Read data
exp.model.data = np.loadtxt('observations.csv', delimiter = ',', skiprows = 1)

if(len(exp.model.data.shape) < 2):
exp.model.data = np.expand_dims(exp.model.data, axis=0)

# Form tensors for variables and results in observations
var_grid_in = torch.tensor(exp.model.data[:,:2])
var_grid_out = torch.tensor(exp.model.data[:,2:])

# Define surrogate
if(exp.run_nofas):

# Create new discrepancy
exp.surrogate = Discrepancy(model_name = exp.name,
model_folder = exp.output_dir,
lf_model = exp.model.solve_t,
input_size = exp.model.var_in.size(1),
output_size = 1,
var_grid_in = var_grid_in,
var_grid_out = var_grid_out)
# Initially tune on the default values of the calibration variables
# exp.surrogate.update(langmuir_model.defParams, exp.surr_pre_it, 0.03, 0.9999, 100, store=True)
# exp.surrogate.update(langmuir_model.defParams, 1, 0.03, 0.9999, 100, store=True)
# Load the surrogate
# exp.surrogate.surrogate_loaCCd()
else:
exp.surrogate = None

# Define log density
def log_density(calib_inputs, model, surrogate, transform):

# Compute transformation by log Jacobian
adjust = transform.compute_log_jacob_func(calib_inputs)

# Initialize negative log likelihood
total_nll = torch.zeros((calib_inputs.size(0), 1))

# Initialize total number of variable inputs
total_var_inputs = len(model.var_in)

# Evaluate model response - (num_var x num_batch)
modelOut = langmuir_model.solve_t(transform.forward(calib_inputs)).t()

# Evaluate discrepancy
if (surrogate is None):
discrepancy = torch.zeros(modelOut.shape).t()
else:
# (num_var)
discrepancy = surrogate.forward(model.var_in)

# Get the absolute values of the standard deviation (num_var)
stds = langmuir_model.defOut * langmuir_model.stdRatio

# Get data - (num_var x num_obs)
Data = torch.tensor(langmuir_model.data[:,2:]).to(exp.device)
num_obs = Data.size(1)

# Evaluate log-likelihood:
# Loop on the available observations
for loopA in range(num_obs):
# -1 / 2 * n * log ( 2 pi )
l1 = -0.5 * np.prod(langmuir_model.data.shape[1]) * np.log(2.0 * np.pi)

# - 1 / 2 * sum_{i=1} ^ N log (sigma_i)
l2 = (-0.5 * langmuir_model.data.shape[1] * torch.log(torch.prod(stds))).item()

# - 1 / 2 * sum_{i = 1} ^ N {(eta_i + disc_i - y_i)^2 / sigma_i^2)}
l3 = -0.5 * torch.sum(((modelOut + discrepancy.t() - Data[:,loopA].unsqueeze(0)) / stds.t())**2, dim = 1)

if(False):
print('Compare')
print('%15s %15s %15s %15s' % ('lf out','discrep','lf+discr','obs'))
for loopB in range(discrepancy.size(0)):
test1 = modelOut[0,:]
test2 = discrepancy[:,0]
test3 = Data[:,loopA]
print('%15.3f %15.3f %15.3f %15.3f' % (modelOut[0,loopB],discrepancy[loopB,0],modelOut[0,loopB]+discrepancy[loopB,0],Data[loopB,loopA]))
print('')

# Compute negative ll (num_batch x 1)
negLL = -(l1 + l2 + l3) # sum contributions
res = -negLL.reshape(calib_inputs.size(0), 1) # reshape

# Accumulate
total_nll += res

# Return log-likelihood
return total_nll/num_obs + adjust

# Assign log density model
exp.model_logdensity = lambda x: log_density(x, exp.model, exp.surrogate, exp.transform)

# Define log prior
# TODO: can we use a half-normal or truncated normal prior? How to enforce bounds?
def log_prior(calib_inputs, transform):
# Compute transformation log Jacobian
adjust = transform.compute_log_jacob_func(calib_inputs)
# Compute the calibration inputs in the physical domain
phys_inputs = transform.forward(calib_inputs)
# Define prior moments
pr_avg = torch.tensor([[1E3, -21E3]])
pr_std = torch.tensor([[1E2, 500]])
# Eval log prior
l1 = -0.5 * calib_inputs.size(1) * np.log(2.0 * np.pi)
l2 = (-0.5 * torch.log(torch.prod(pr_std))).item()
l3 = -0.5 * torch.sum(((phys_inputs - pr_avg)/pr_std)**2, dim = 1).unsqueeze(1)
# Return
res = l1 + l2 + l3 + adjust
return res

exp.model_logprior = lambda x: log_prior(x, exp.transform)

# Run VI
exp.run()

def generate_data(use_true_model=False,num_observations=50):

# Set variable grid
var_grid = [[350.0, 400.0, 450.0],
[1.0, 2.0, 3.0, 4.0, 5.0]]

# Create model
model = PhysChem(var_grid)

# Generate data
model.genDataFile(use_true_model = use_true_model, num_observations = num_observations)

# Main code
if __name__ == "__main__":

generate_data(use_true_model = False, num_observations = 1)

run_test()



Loading

0 comments on commit 19e9a35

Please sign in to comment.