Compare revisions

Nicolas Mansard · Nicolas Mansard · Amit Parag · Amit Parag · 8da05fa3 · 8da05fa3
--- a/README.md
+++ b/README.md
 This is a minimal reproduction of Sobolev learning to highlight that the hessian of the approximated function does not get better.
 For instance, see the loss curves ( in images folder) while training different functions.

-To run experiments, change function_name in sobolev_training.py and run in python3.
\ No newline at end of file
+To run experiments, change function_name in sobolev_training.py and run in python3.
+
+The conclusion is :
+
+1: To use sobolev to guarantee that the derivatives also become better, set create_graph = True in the calculation of hessian and jacobian and use jacobian and hessian in forward in neural network.
+
+2: To use Sobolev loss as a regularizer to the function, set create_graph = True. This wil just guarantee that the function approximation is better.
\ No newline at end of file
--- a/__pycache__/datagen.cpython-36.pyc
+++ b/__pycache__/datagen.cpython-36.pyc
--- a/__pycache__/function_definitions.cpython-36.pyc
+++ b/__pycache__/function_definitions.cpython-36.pyc
--- a/__pycache__/neural_network.cpython-36.pyc
+++ b/__pycache__/neural_network.cpython-36.pyc
--- a/sobolev_grad.py
+++ b/sobolev_grad.py
@@ -2,7 +2,8 @@

 import numpy as np
 import torch
-from neural_network import Model
+#from neural_network import Model
+from derivative_network import TanhDerivNet
 from datagen import dataGenerator
 import torch.autograd.functional as F
 import matplotlib.pyplot as plt
@@ -10,7 +11,8 @@ import matplotlib.pyplot as plt


 # ..............................................................................
-
+torch.manual_seed(0)
+np.random.seed(0)


 EPOCHS                = 50000                        # Number of Epochs
@@ -36,7 +38,8 @@ dataloader            = torch.utils.data.DataLoader(dataset, batch_size = number
                        shuffle=True, num_workers=4)


-network   = Model(ninput=X.shape[1])
+#network   = Model(ninput=X.shape[1])
+network = TanhDerivNet(ninput=X.shape[1])
 optimizer = torch.optim.Adam(params = network.parameters(), lr = lr)


@@ -47,62 +50,58 @@ epoch_loss_in_der1  = []
 epoch_loss_in_der2  = []


+floss = torch.nn.functional.mse_loss

 for epoch in range(EPOCHS):
    network.train()
    batch_loss_in_value = 0
    batch_loss_in_der1  = 0
-    batch_loss_in_der2  = 0
    for idx,(data) in enumerate(dataloader):

        x,y,dy,d2y = data
        
-        y_hat  = network(x)
-        
-        dy_hat  = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] )   # Gradient of net
+        y_hat,dy_hat  = network(x)
+        dy_hat = dy_hat.squeeze()
+        #dy_hat  = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] )   # Gradient of net
        #d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] )     # Hessian of net
        
        
-        loss1   = torch.nn.functional.mse_loss(y_hat,y)
-        loss2   = torch.nn.functional.mse_loss(dy_hat, dy)
-        loss3   = 0#torch.nn.functional.mse_loss(d2y_hat, d2y) 
+        loss1   = floss(y_hat,y)
+        loss2   = floss(dy_hat, dy)

-        loss    = loss1 + 10*loss2 + loss3                            # Can add a sobolev factor to give weight to each loss term.
-                                                                   # But it does not really change anything     
+        loss    = loss1 + loss2                         # Can add a sobolev factor to give weight to each loss term.
+        #loss = loss2
+        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_loss_in_value += loss1.item()
        batch_loss_in_der1  += loss2.item()
-        #batch_loss_in_der2  += loss3.item()

    epoch_loss_in_value.append( batch_loss_in_value / number_of_batches )
    epoch_loss_in_der1.append( batch_loss_in_der1 / number_of_batches )
-    #epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )

    
    if epoch % 10 == 0:
            print(f"EPOCH : {epoch}")
            print(f"Loss Values:  {loss1.item()}, Loss Grad : {loss2.item()}") #, Loss Hessian : {loss3.item()}")
-
+            #print(dy_hat-dy)
+            
 plt.ion()

-fig, (ax1, ax2, ax3) = plt.subplots(1,3)
+fig, (ax1, ax2, ax3) = plt.subplots(1,2)
 fig.suptitle(function_name.upper())

 ax1.semilogy(range(len(epoch_loss_in_value)), epoch_loss_in_value, c = "red")
 #ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
-#ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")

 ax1.set(title='Loss in Value')
 ax2.set(title='Loss in Gradient')
-ax3.set(title='Loss in Hessian')

 ax1.set_ylabel('Loss')
 ax1.set_xlabel('Epochs')
 ax2.set_xlabel('Epochs')
-ax3.set_xlabel('Epochs')



@@ -118,13 +117,16 @@ xplt = torch.tensor(LOAD['x'])
 yplt = torch.tensor(LOAD['y'])
 dyplt = torch.tensor(LOAD['dy'])

-ypred = network(xplt)
+ypred,dypred = network(xplt)

 plt.figure()
 plt.subplot(131)
 plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
+plt.scatter(x[:,0],x[:,1],c=y[:,0].detach(),lw=1,s=200,edgecolor='k')
 plt.subplot(132)
 plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
+plt.scatter(x[:,0],x[:,1],c=y[:,0].detach(),lw=1,s=200,edgecolor='k')
 plt.subplot(133)
-plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
-plt.colorbar()
+plt.scatter(xplt[:,0],xplt[:,1],c=abs(ypred-yplt)[:,0].detach())
+plt.scatter(x[:,0],x[:,1],color= 'none',lw=2,s=200,edgecolor='w')
+#plt.colorbar()
--- a/sobolev_training.py
+++ b/sobolev_training.py
@@ -13,14 +13,15 @@ import matplotlib.pyplot as plt



-EPOCHS                = 50000                        # Number of Epochs
+EPOCHS                = 1000                        # Number of Epochs
 lr                    = 1e-3                       # Learning rate
 number_of_batches     = 1                         # Number of batches per epoch



-function_name         = 'simple_bumps'                   # See datagen.py or function_definitions.py for other functions to use
-number_of_data_points = 5
+#function_name         = 'simple_bumps'                   # See datagen.py or function_definitions.py for other functions to use
+function_name          = 'perm'
+number_of_data_points  = 20



@@ -59,13 +60,13 @@ for epoch in range(EPOCHS):
        
        y_hat  = network(x)
        
-        dy_hat  = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] )   # Gradient of net
-        #d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] )     # Hessian of net
+        dy_hat  = torch.vstack( [ F.jacobian(network, state).squeeze() for state in x ] )   # Gradient of net, set create_graph = True
+        d2y_hat = torch.stack( [ F.hessian(network, state).squeeze() for state in x ] )     # Hessian of net, set create_graph = True
        
        
        loss1   = torch.nn.functional.mse_loss(y_hat,y)
        loss2   = torch.nn.functional.mse_loss(dy_hat, dy)
-        loss3   = 0#torch.nn.functional.mse_loss(d2y_hat, d2y) 
+        loss3   = torch.nn.functional.mse_loss(d2y_hat, d2y) 

        loss    = loss1 + 10*loss2 + loss3                            # Can add a sobolev factor to give weight to each loss term.
                                                                   # But it does not really change anything     
@@ -75,16 +76,16 @@ for epoch in range(EPOCHS):

        batch_loss_in_value += loss1.item()
        batch_loss_in_der1  += loss2.item()
-        #batch_loss_in_der2  += loss3.item()
+        batch_loss_in_der2  += loss3.item()

    epoch_loss_in_value.append( batch_loss_in_value / number_of_batches )
    epoch_loss_in_der1.append( batch_loss_in_der1 / number_of_batches )
-    #epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )
+    epoch_loss_in_der2.append( batch_loss_in_der2 / number_of_batches )

    
    if epoch % 10 == 0:
            print(f"EPOCH : {epoch}")
-            print(f"Loss Values:  {loss1.item()}, Loss Grad : {loss2.item()}") #, Loss Hessian : {loss3.item()}")
+            print(f"Loss Values:  {loss1.item()}, Loss Grad : {loss2.item()} , Loss Hessian : {loss3.item()}")

 plt.ion()

@@ -92,8 +93,8 @@ fig, (ax1, ax2, ax3) = plt.subplots(1,3)
 fig.suptitle(function_name.upper())

 ax1.semilogy(range(len(epoch_loss_in_value)), epoch_loss_in_value, c = "red")
-#ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
-#ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")
+ax2.semilogy(range(len(epoch_loss_in_der1)), epoch_loss_in_der1, c = "green")
+ax3.semilogy(range(len(epoch_loss_in_der2)), epoch_loss_in_der2, c = "orange")

 ax1.set(title='Loss in Value')
 ax2.set(title='Loss in Gradient')
@@ -113,18 +114,18 @@ fig.tight_layout()

 #xplt,yplt,dyplt,_            = dataGenerator(function_name, 10000)
 #np.save('plt2.npy',{ "x": xplt.numpy(),"y": yplt.numpy(),"dy": dyplt.numpy()})
-LOAD = np.load( 'plt2.npy',allow_pickle=True).flat[0]
-xplt = torch.tensor(LOAD['x'])
-yplt = torch.tensor(LOAD['y'])
-dyplt = torch.tensor(LOAD['dy'])
-
-ypred = network(xplt)
-
-plt.figure()
-plt.subplot(131)
-plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
-plt.subplot(132)
-plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
-plt.subplot(133)
-plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
-plt.colorbar()
+#LOAD = np.load( 'plt2.npy',allow_pickle=True).flat[0]
+#xplt = torch.tensor(LOAD['x'])
+#yplt = torch.tensor(LOAD['y'])
+#dyplt = torch.tensor(LOAD['dy'])
+
+#ypred = network(xplt)
+
+#plt.figure()
+#plt.subplot(131)
+#plt.scatter(xplt[:,0],xplt[:,1],c=yplt[:,0])
+#plt.subplot(132)
+#plt.scatter(xplt[:,0],xplt[:,1],c=ypred[:,0].detach())
+#plt.subplot(133)
+#plt.scatter(xplt[:,0],xplt[:,1],c=(ypred-yplt)[:,0].detach())
+#plt.colorbar()
No results found