Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • nmansard/sobolev-experiments
  • aparag/sobolev-experiments
2 results
Show changes
Commits on Source (3)
This is a minimal reproduction of Sobolev learning to highlight that the hessian of the approximated function does not get better.
For instance, see the loss curves ( in images folder) while training different functions.
To run experiments, change function_name in sobolev_training.py and run in python3.
\ No newline at end of file
To run experiments, change function_name in sobolev_training.py and run in python3.
The conclusion is :
1: To use sobolev to guarantee that the derivatives also become better, set create_graph = True in the calculation of hessian and jacobian and use jacobian and hessian in forward in neural network.
2: To use Sobolev loss as a regularizer to the function, set create_graph = True. This wil just guarantee that the function approximation is better.
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -2,7 +2,8 @@
import numpy as np
import torch
from neural_network import Model
#from neural_network import Model
from derivative_network import TanhDerivNet
from datagen import dataGenerator
import torch.autograd.functional as F
import matplotlib.pyplot as plt
......@@ -10,7 +11,8 @@ import matplotlib.pyplot as plt
# ..............................................................................
torch.manual_seed(0)
np.random.seed(0)
EPOCHS = 50000 # Number of Epochs
......@@ -36,7 +38,8 @@ dataloader = torch.utils.data.DataLoader(dataset, batch_size = number
shuffle=True, num_workers=4)
network = Model(ninput=X.shape[1])
#network = Model(ninput=X.shape[1])
network = TanhDerivNet(ninput=X.shape[1])
optimizer = torch.optim.Adam(params = network.parameters(), lr = lr)
......@@ -47,62 +50,58 @@ epoch_loss_in_der1 = []
epoch_loss_in_der2 = []
floss = torch.nn.functional.mse_loss
for epoch in range(EPOCHS):
network.train()
batch_loss_in_value = 0
batch_loss_in_der1 = 0
batch_loss_in_der2 = 0
for idx,(data) in enumerate(dataloader):
x,y,dy,d2y = data
y_hat = network(x)
dy_hat = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] ) # Gradient of net
y_hat,dy_hat = network(x)
dy_hat = dy_hat.squeeze()
#dy_hat = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] ) # Gradient of net
#d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] ) # Hessian of net
loss1 = torch.nn.functional.mse_loss(y_hat,y)
loss2 = torch.nn.functional.mse_loss(dy_hat, dy)
loss3 = 0#torch.nn.functional.mse_loss(d2y_hat, d2y)
loss1 = floss(y_hat,y)
loss2 = floss(dy_hat, dy)
loss = loss1 + 10*loss2 + loss3 # Can add a sobolev factor to give weight to each loss term.
# But it does not really change anything
loss = loss1 + loss2 # Can add a sobolev factor to give weight to each loss term.
#loss = loss2
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_loss_in_value += loss1.item()
batch_loss_in_der1 += loss2.item()
#batch_loss_in_der2 += loss3.item()
epoch_loss_in_value.append( batch_loss_in_value / number_of_batches )
epoch_loss_in_der1.append( batch_loss_in_der1 / number_of_batches )
#epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )
if epoch % 10 == 0:
print(f"EPOCH : {epoch}")
print(f"Loss Values: {loss1.item()}, Loss Grad : {loss2.item()}") #, Loss Hessian : {loss3.item()}")
#print(dy_hat-dy)
plt.ion()
fig, (ax1, ax2, ax3) = plt.subplots(1,3)
fig, (ax1, ax2, ax3) = plt.subplots(1,2)
fig.suptitle(function_name.upper())
ax1.semilogy(range(len(epoch_loss_in_value)), epoch_loss_in_value, c = "red")
#ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
#ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")
ax1.set(title='Loss in Value')
ax2.set(title='Loss in Gradient')
ax3.set(title='Loss in Hessian')
ax1.set_ylabel('Loss')
ax1.set_xlabel('Epochs')
ax2.set_xlabel('Epochs')
ax3.set_xlabel('Epochs')
......@@ -118,13 +117,16 @@ xplt = torch.tensor(LOAD['x'])
yplt = torch.tensor(LOAD['y'])
dyplt = torch.tensor(LOAD['dy'])
ypred = network(xplt)
ypred,dypred = network(xplt)
plt.figure()
plt.subplot(131)
plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
plt.scatter(x[:,0],x[:,1],c=y[:,0].detach(),lw=1,s=200,edgecolor='k')
plt.subplot(132)
plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
plt.scatter(x[:,0],x[:,1],c=y[:,0].detach(),lw=1,s=200,edgecolor='k')
plt.subplot(133)
plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
plt.colorbar()
plt.scatter(xplt[:,0],xplt[:,1],c=abs(ypred-yplt)[:,0].detach())
plt.scatter(x[:,0],x[:,1],color= 'none',lw=2,s=200,edgecolor='w')
#plt.colorbar()
......@@ -13,14 +13,15 @@ import matplotlib.pyplot as plt
EPOCHS = 50000 # Number of Epochs
EPOCHS = 1000 # Number of Epochs
lr = 1e-3 # Learning rate
number_of_batches = 1 # Number of batches per epoch
function_name = 'simple_bumps' # See datagen.py or function_definitions.py for other functions to use
number_of_data_points = 5
#function_name = 'simple_bumps' # See datagen.py or function_definitions.py for other functions to use
function_name = 'perm'
number_of_data_points = 20
......@@ -59,13 +60,13 @@ for epoch in range(EPOCHS):
y_hat = network(x)
dy_hat = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] ) # Gradient of net
#d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] ) # Hessian of net
dy_hat = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] ) # Gradient of net, set create_graph = True
d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] ) # Hessian of net, set create_graph = True
loss1 = torch.nn.functional.mse_loss(y_hat,y)
loss2 = torch.nn.functional.mse_loss(dy_hat, dy)
loss3 = 0#torch.nn.functional.mse_loss(d2y_hat, d2y)
loss3 = torch.nn.functional.mse_loss(d2y_hat, d2y)
loss = loss1 + 10*loss2 + loss3 # Can add a sobolev factor to give weight to each loss term.
# But it does not really change anything
......@@ -75,16 +76,16 @@ for epoch in range(EPOCHS):
batch_loss_in_value += loss1.item()
batch_loss_in_der1 += loss2.item()
#batch_loss_in_der2 += loss3.item()
batch_loss_in_der2 += loss3.item()
epoch_loss_in_value.append( batch_loss_in_value / number_of_batches )
epoch_loss_in_der1.append( batch_loss_in_der1 / number_of_batches )
#epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )
epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )
if epoch % 10 == 0:
print(f"EPOCH : {epoch}")
print(f"Loss Values: {loss1.item()}, Loss Grad : {loss2.item()}") #, Loss Hessian : {loss3.item()}")
print(f"Loss Values: {loss1.item()}, Loss Grad : {loss2.item()} , Loss Hessian : {loss3.item()}")
plt.ion()
......@@ -92,8 +93,8 @@ fig, (ax1, ax2, ax3) = plt.subplots(1,3)
fig.suptitle(function_name.upper())
ax1.semilogy(range(len(epoch_loss_in_value)), epoch_loss_in_value, c = "red")
#ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
#ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")
ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")
ax1.set(title='Loss in Value')
ax2.set(title='Loss in Gradient')
......@@ -113,18 +114,18 @@ fig.tight_layout()
#xplt,yplt,dyplt,_ = dataGenerator(function_name, 10000)
#np.save('plt2.npy',{ "x": xplt.numpy(),"y": yplt.numpy(),"dy": dyplt.numpy()})
LOAD = np.load( 'plt2.npy',allow_pickle=True).flat[0]
xplt = torch.tensor(LOAD['x'])
yplt = torch.tensor(LOAD['y'])
dyplt = torch.tensor(LOAD['dy'])
ypred = network(xplt)
plt.figure()
plt.subplot(131)
plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
plt.subplot(132)
plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
plt.subplot(133)
plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
plt.colorbar()
#LOAD = np.load( 'plt2.npy',allow_pickle=True).flat[0]
#xplt = torch.tensor(LOAD['x'])
#yplt = torch.tensor(LOAD['y'])
#dyplt = torch.tensor(LOAD['dy'])
#ypred = network(xplt)
#plt.figure()
#plt.subplot(131)
#plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
#plt.subplot(132)
#plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
#plt.subplot(133)
#plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
#plt.colorbar()