From 07fc8b286c5fcafe70e50cc1fa64b780d323604c Mon Sep 17 00:00:00 2001
From: Gernot Bauer <gernotbauer85@googlemail.com>
Date: Fri, 25 Feb 2022 17:05:06 +0100
Subject: [PATCH 1/4] Add loss function to estimator, and Antoine equation for
 vapor pressure estimation

---
 src/python/utils.rs    | 51 ++++++++++++++++++++++++++++++++++++++----
 src/utils/dataset.rs   | 32 +++++++++++++++-----------
 src/utils/estimator.rs | 15 ++++++++-----
 src/utils/loss.rs      | 30 +++++++++++++++++++++++++
 src/utils/mod.rs       |  2 ++
 5 files changed, 108 insertions(+), 22 deletions(-)
 create mode 100644 src/utils/loss.rs
diff --git a/src/python/utils.rs b/src/python/utils.rs
index ea42cd4..bcc6220 100644
--- a/src/python/utils.rs
+++ b/src/python/utils.rs
@@ -11,6 +11,46 @@ impl From<FitError> for PyErr {
 #[macro_export]
 macro_rules! impl_estimator {
     ($eos:ty, $py_eos:ty) => {
+        #[pyclass(name = "Loss", unsendable)]
+        #[derive(Clone)]
+        pub struct PyLoss(Loss);
+
+        #[pymethods]
+        impl PyLoss {
+            /// Create a linear loss function.
+            /// 
+            /// `loss = s**2 * rho(f**2 / s**2)`
+            /// where `rho(z) = z` and `s = 1`.
+            ///
+            /// Returns
+            /// -------
+            /// Loss
+            #[staticmethod]
+            pub fn linear() -> Self {
+                Self(Loss::Linear)
+            }
+
+            /// Create a loss function according to Huber's method.
+            /// 
+            /// `loss = s**2 * rho(f**2 / s**2)`
+            /// where `rho(z) = z if z <= 1 else 2*z**0.5 - 1`.
+            /// `s` is the scaling factor.
+            ///
+            /// Parameters
+            /// ----------
+            /// scaling_factor : f64
+            ///     Scaling factor for Huber loss function.
+            ///
+            /// Returns
+            /// -------
+            /// Loss
+            #[staticmethod]
+            #[pyo3(text_signature = "(scaling_factor)")]
+            pub fn huber(scaling_factor: f64) -> Self {
+                Self(Loss::Huber(scaling_factor))
+            }
+        }
+
         /// A collection of experimental data that can be used to compute
         /// cost functions and make predictions using an equation of state.
         #[pyclass(name = "DataSet", unsendable)]
@@ -268,18 +308,21 @@ macro_rules! impl_estimator {
         #[pymethods]
         impl PyEstimator {
             #[new]
-            fn new(data: Vec<PyDataSet>, weights: Vec<f64>) -> Self {
+            fn new(data: Vec<PyDataSet>, loss: Vec<PyLoss>, weights: Vec<f64>) -> Self {
                 Self(Estimator::new(
                     data.iter().map(|d| d.0.clone()).collect(),
+                    loss.iter().map(|l| l.0.clone()).collect(),
                     weights,
                 ))
             }
 
             /// Compute the cost function for each ``DataSet``.
             ///
-            /// The cost function that is used depends on the
-            /// property. See the class constructors for the ``DataSet``
-            /// to learn about the cost functions of the properties.
+            /// The cost function is:
+            /// - The relative difference between prediction and target value,
+            /// - to which a loss function is applied,
+            /// - and which is weighted according to the number of datapoints,
+            /// - and the relative weights as defined in the Estimator object.
             ///
             /// Parameters
             /// ----------
diff --git a/src/utils/dataset.rs b/src/utils/dataset.rs
index 22ffcf6..2cfd17b 100644
--- a/src/utils/dataset.rs
+++ b/src/utils/dataset.rs
@@ -6,7 +6,7 @@ use crate::equation_of_state::{EquationOfState, MolarWeight};
 use crate::phase_equilibria::{PhaseEquilibrium, VLEOptions};
 use crate::state::{DensityInitialization, State};
 use crate::utils::estimator::FitError;
-use crate::EosUnit;
+use crate::{Contributions, EosUnit};
 use ndarray::{arr1, Array1};
 use quantity::{QuantityArray1, QuantityScalar};
 use std::collections::HashMap;
@@ -147,25 +147,31 @@ impl<U: EosUnit, E: EquationOfState> DataSet<U, E> for VaporPressure<U> {
     where
         QuantityScalar<U>: std::fmt::Display + std::fmt::LowerExp,
     {
-        let tc =
+        let critical_point =
             State::critical_point(eos, None, Some(self.max_temperature), VLEOptions::default())
-                .unwrap()
-                .temperature;
+                .unwrap();
+        let tc = critical_point.temperature;
+        let pc = critical_point.pressure(Contributions::Total);
+
+        let t0 = 0.9 * tc;
+        let p0 = PhaseEquilibrium::vapor_pressure(eos, t0)[0].unwrap();
+
+        let b = pc.to_reduced(p0).unwrap().ln() / (1.0 / tc - 1.0 / t0);
+        let a = pc.to_reduced(U::reference_pressure()).unwrap() - b.to_reduced(tc).unwrap();
 
         let unit = self.target.get(0);
         let mut prediction = Array1::zeros(self.datapoints) * unit;
         for i in 0..self.datapoints {
             let t = self.temperature.get(i);
-            if t < tc {
-                if let Some(pvap) =
-                    PhaseEquilibrium::vapor_pressure(eos, self.temperature.get(i))[0]
-                {
-                    prediction.try_set(i, pvap).unwrap();
-                } else {
-                    prediction.try_set(i, f64::NAN * unit).unwrap();
-                }
+            if let Some(pvap) = PhaseEquilibrium::vapor_pressure(eos, t)[0] {
+                prediction.try_set(i, pvap).unwrap();
             } else {
-                prediction.try_set(i, f64::NAN * unit).unwrap();
+                prediction
+                    .try_set(
+                        i,
+                        pc * (a + (b * (1.0 / t - 1.0 / tc)).into_value().unwrap()).exp(),
+                    )
+                    .unwrap();
             }
         }
         Ok(prediction)
diff --git a/src/utils/estimator.rs b/src/utils/estimator.rs
index b8d678b..09c8828 100644
--- a/src/utils/estimator.rs
+++ b/src/utils/estimator.rs
@@ -1,6 +1,7 @@
 //! The [`Estimator`] struct can be used to store multiple [`DataSet`]s for convenient parameter
 //! optimization.
 use super::dataset::*;
+use super::Loss;
 use crate::equation_of_state::EquationOfState;
 use crate::EosUnit;
 use ndarray::{arr1, concatenate, Array1, ArrayView1, Axis};
@@ -31,6 +32,7 @@ pub enum FitError {
 /// evaluate an equation of state versus experimental data.
 pub struct Estimator<U: EosUnit, E: EquationOfState> {
     data: Vec<Rc<dyn DataSet<U, E>>>,
+    loss: Vec<Loss>,
     weights: Vec<f64>,
 }
 
@@ -42,8 +44,8 @@ where
     ///
     /// The weights are normalized and used as multiplicator when the
     /// cost function across all `DataSet`s is evaluated.
-    pub fn new(data: Vec<Rc<dyn DataSet<U, E>>>, weights: Vec<f64>) -> Self {
-        Self { data, weights }
+    pub fn new(data: Vec<Rc<dyn DataSet<U, E>>>, loss: Vec<Loss>, weights: Vec<f64>) -> Self {
+        Self { data, loss, weights }
     }
 
     /// Add a `DataSet` and its weight.
@@ -56,14 +58,17 @@ where
     ///
     /// Each cost contains the inverse weight.
     pub fn cost(&self, eos: &Rc<E>) -> Result<Array1<f64>, FitError> {
+        let w_sum = self.weights.iter().sum::<f64>();
+        let w = arr1(&self.weights) / w_sum;
+
         let predictions: Result<Vec<Array1<f64>>, FitError> = self
             .data
             .iter()
             .enumerate()
             .map(|(i, d)| {
-                let w_sum = self.weights.iter().sum::<f64>();
-                let w = arr1(&self.weights) / w_sum;
-                Ok(d.cost(eos)? * w[i])
+                let mut res = d.relative_difference(eos).unwrap();
+                self.loss[i].apply(&mut res.view_mut());
+                Ok(res * w[i] / d.datapoints() as f64)
             })
             .collect();
         if let Ok(p) = predictions {
diff --git a/src/utils/loss.rs b/src/utils/loss.rs
new file mode 100644
index 0000000..0392906
--- /dev/null
+++ b/src/utils/loss.rs
@@ -0,0 +1,30 @@
+use ndarray::ArrayViewMut1;
+
+#[derive(Clone, Debug)]
+pub enum Loss {
+    Linear,
+    Huber(f64),
+}
+
+impl Loss {
+    pub fn huber(scaling_factor: f64) -> Self {
+        Self::Huber(scaling_factor)
+    }
+
+    pub fn apply(&self, res: &mut ArrayViewMut1<f64>) {
+        match self {
+            Self::Linear => (),
+            Self::Huber(s) => {
+                let s2 = s * s;
+                let s2_inv = 1.0 / s2;
+                res.mapv_inplace(|ri| {
+                    if ri * ri * s2_inv <= 1.0 {
+                        ri
+                    } else {
+                        2.0 * (ri * ri * s2_inv).sqrt() - 1.0
+                    }
+                })
+            }
+        }
+    }
+}
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index e6ba3e1..54c54eb 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -5,5 +5,7 @@
 //! - [`Estimator`]: stores multiple `DataSet`
 mod dataset;
 mod estimator;
+mod loss;
 pub use dataset::*;
 pub use estimator::{Estimator, FitError};
+pub use loss::*;

From baae0ebe9fa6eacb4ec2896bbf1ee186e34d6238 Mon Sep 17 00:00:00 2001
From: Gernot Bauer <gernotbauer85@googlemail.com>
Date: Mon, 14 Mar 2022 17:34:33 +0100
Subject: [PATCH 2/4] Move python equation of states and modules to build_wheel

---
 example/user_defined_eos.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/example/user_defined_eos.ipynb b/example/user_defined_eos.ipynb
index 71467ae..6feecc7 100644
--- a/example/user_defined_eos.ipynb
+++ b/example/user_defined_eos.ipynb
@@ -1240,9 +1240,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "feos",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "feos"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

From aa423960876faecbdbaf6cee40fc261f4834673a Mon Sep 17 00:00:00 2001
From: Gernot Bauer <gernotbauer85@googlemail.com>
Date: Fri, 18 Mar 2022 13:49:54 +0100
Subject: [PATCH 3/4] Add activity coefficient

---
 src/state/properties.rs | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/state/properties.rs b/src/state/properties.rs
index b56cf73..e7d2c01 100644
--- a/src/state/properties.rs
+++ b/src/state/properties.rs
@@ -2,11 +2,12 @@ use super::{Derivative::*, PartialDerivative, State};
 use crate::equation_of_state::{EntropyScaling, EquationOfState, MolarWeight};
 use crate::errors::EosResult;
 use crate::EosUnit;
-use ndarray::{Array1, Array2};
+use ndarray::{arr1, Array1, Array2};
 use num_dual::DualNum;
 use quantity::{QuantityArray, QuantityArray1, QuantityArray2, QuantityScalar};
 use std::iter::FromIterator;
 use std::ops::{Add, Sub};
+use std::rc::Rc;
 
 #[derive(Clone, Copy)]
 pub(crate) enum Evaluate {
@@ -300,6 +301,24 @@ impl<U: EosUnit, E: EquationOfState> State<U, E> {
             .unwrap()
     }
 
+    /// Logarithm of the fugacity coefficient of all components treated as pure substance at mixture temperature and pressure.
+    pub fn ln_phi_pure(&self) -> EosResult<Array1<f64>> {
+        let pressure = self.pressure(Contributions::Total);
+        (0..self.eos.components())
+            .map(|i| {
+                let eos = Rc::new(self.eos.subset(&[i]));
+                let state = Self::new_npt(
+                    &eos,
+                    self.temperature,
+                    pressure,
+                    &(arr1(&[1.0]) * U::reference_moles()),
+                    crate::DensityInitialization::Liquid,
+                )?;
+                state.ln_phi()[0]
+            })
+            .collect()
+    }
+
     /// Partial derivative of the logarithm of the fugacity coefficient w.r.t. temperature: $\left(\frac{\partial\ln\varphi_i}{\partial T}\right)_{p,N_i}$
     pub fn dln_phi_dt(&self) -> QuantityArray1<U> {
         let func = |s: &Self, evaluate: Evaluate| {
@@ -471,6 +490,11 @@ impl<U: EosUnit, E: EquationOfState> State<U, E> {
             .unwrap()
     }
 
+    /// Activity coefficient $\ln \gamma_i = \ln \varphi_i(T, p, \mathbf{N}) - \ln \varphi_i(T, p)$
+    pub fn ln_symmetric_activity_coefficient(&self) -> Array1<f64> {
+        self.ln_phi() - self.ln_phi_pure()
+    }
+
     /// Helmholtz energy $A$ evaluated for each contribution of the equation of state.
     pub fn helmholtz_energy_contributions(&self) -> Vec<(String, QuantityScalar<U>)> {
         let new_state = self.derive0();

From 0f8ae0216797377d23dba2b8e68387ba2a61ffd0 Mon Sep 17 00:00:00 2001
From: Gernot Bauer <gernotbauer85@googlemail.com>
Date: Fri, 18 Mar 2022 16:44:00 +0100
Subject: [PATCH 4/4] Added functions to python interface

---
 src/python/state.rs     | 23 +++++++++++++++++++++++
 src/state/properties.rs | 15 +++++++++------
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/python/state.rs b/src/python/state.rs
index 8eed471..0db45cf 100644
--- a/src/python/state.rs
+++ b/src/python/state.rs
@@ -525,6 +525,29 @@ macro_rules! impl_state {
                 self.0.ln_phi().view().to_pyarray(py)
             }
 
+            /// Return logarithmic pure substance fugacity coefficient.
+            /// 
+            /// For each component, the hypothetical liquid fugacity coefficient
+            /// at mixture temperature and pressure is computed.
+            /// 
+            /// Returns
+            /// -------
+            /// numpy.ndarray
+            #[pyo3(text_signature = "($self)")]
+            fn ln_phi_pure<'py>(&self, py: Python<'py>) -> PyResult<&'py PyArray1<f64>> {
+                Ok(self.0.ln_phi_pure()?.view().to_pyarray(py))
+            }
+
+            /// Return logarithmic symmetric activity coefficient.
+            /// 
+            /// Returns
+            /// -------
+            /// numpy.ndarray
+            #[pyo3(text_signature = "($self)")]
+            fn ln_symmetric_activity_coefficient<'py>(&self, py: Python<'py>) -> PyResult<&'py PyArray1<f64>> {
+                Ok(self.0.ln_symmetric_activity_coefficient()?.view().to_pyarray(py))
+            }
+
             /// Return derivative of logarithmic fugacity coefficient w.r.t. temperature.
             ///
             /// Returns
diff --git a/src/state/properties.rs b/src/state/properties.rs
index e7d2c01..5adffa2 100644
--- a/src/state/properties.rs
+++ b/src/state/properties.rs
@@ -314,11 +314,19 @@ impl<U: EosUnit, E: EquationOfState> State<U, E> {
                     &(arr1(&[1.0]) * U::reference_moles()),
                     crate::DensityInitialization::Liquid,
                 )?;
-                state.ln_phi()[0]
+                Ok(state.ln_phi()[0])
             })
             .collect()
     }
 
+    /// Activity coefficient $\ln \gamma_i = \ln \varphi_i(T, p, \mathbf{N}) - \ln \varphi_i(T, p)$
+    pub fn ln_symmetric_activity_coefficient(&self) -> EosResult<Array1<f64>> {
+        match self.eos.components() {
+            1 => Ok(arr1(&[0.0])),
+            _ => Ok(self.ln_phi() - &self.ln_phi_pure()?),
+        }
+    }
+
     /// Partial derivative of the logarithm of the fugacity coefficient w.r.t. temperature: $\left(\frac{\partial\ln\varphi_i}{\partial T}\right)_{p,N_i}$
     pub fn dln_phi_dt(&self) -> QuantityArray1<U> {
         let func = |s: &Self, evaluate: Evaluate| {
@@ -490,11 +498,6 @@ impl<U: EosUnit, E: EquationOfState> State<U, E> {
             .unwrap()
     }
 
-    /// Activity coefficient $\ln \gamma_i = \ln \varphi_i(T, p, \mathbf{N}) - \ln \varphi_i(T, p)$
-    pub fn ln_symmetric_activity_coefficient(&self) -> Array1<f64> {
-        self.ln_phi() - self.ln_phi_pure()
-    }
-
     /// Helmholtz energy $A$ evaluated for each contribution of the equation of state.
     pub fn helmholtz_energy_contributions(&self) -> Vec<(String, QuantityScalar<U>)> {
         let new_state = self.derive0();