import openturns as ot
import numpy as np
import openturns.viewer as otv
import pylab as pl
import openturns.viewer as viewer
from matplotlib import pylab as plt


# Importation des données
sample_HR = ot.Sample_ImportFromTextFile('Humidite-relative-Bordeaux-2018.csv', ';')


# Taille de l'échantillon
sample_HR.getSize()

2779


# Histogramme de la distribution
histo_HR = ot.HistogramFactory().build(sample_HR)
histo_HR.setDescription(["Histogramme HR"])
histo_HR.drawPDF()


# Autres données
m = sample_HR.computeMean()[0]
v = sample_HR.computeVariance()[0]
q25 = sample_HR.computeQuantile(0.25)[0]
q75 = sample_HR.computeQuantile(0.75)[0]
med = sample_HR.computeMedian()[0]

print("Moyenne HR de l'échantillon : ", m)
print("Variance HR de l'échantillon : ", v)
print("Quantile à 25% de l'échantillon : ", q25)
print("Quantile à 75% de l'échantillon : ", q75)
print("Médiane de l'échantillon : ", med)

Moyenne HR de l'échantillon :  77.22058294350485
Variance HR de l'échantillon :  351.3001377450075
Quantile à 25% de l'échantillon :  64.0
Quantile à 75% de l'échantillon :  94.0
Médiane de l'échantillon :  83.0


HRBeta = ot.BetaFactory().build(sample_HR)
HRBeta


# Calcul de l'espérance et de la variance de la loi HRBeta à partir des paramètres estimés.

[alpha, beta, a, b] = HRBeta.getParameter()

m_chap = a + (b-a)*(alpha/(alpha+beta))
v_chap = (b-a)**2 * (alpha*beta/((alpha+beta)**2 * (alpha+beta+1)))

print("Moyenne de la loi HRBeta : ", m_chap)
print("Variance de la loi HRBeta : ", v_chap)

Moyenne de la loi HRBeta :  77.22058294350487
Variance de la loi HRBeta :  351.3001377450076


# Fonction d'affichage pour la superposition de PDF 

def affichagePDF_HR(histo, loi, loi_ks = None, loi_ML = None, loi_ML2 = None):
    """
        Fonction permettant de superposer un histogramme et des densités continues.
    _______
    Parameters :
        - histo : Un histogramme.
        - loi : Une loi ajustée à partir d'un échantillon.
        - loi_ks : Une estimation de loi par lissage à noyau.  

    Returns :

    """
    histo.setDescription(['Histogramme'])
    graph = histo.drawPDF()
    loi.setDescription(['Loi ajustée HRBeta'])
    graph.add(loi.drawPDF())
    graph.setXTitle("x")
    graph.setYTitle("PDF")
    graph.setTitle(
    "Ajustement de la loi à partir d'un échantillon donné")
    if(loi_ks!= None):
        loi_ks.setDescription(['Loi KernelSmoothing'])
        graph.add(loi_ks.drawPDF())
    if(loi_ML!= None):
        loi_ML.setDescription(['Loi ajustée ML'])
        graph.add(loi_ML.drawPDF())
    if(loi_ML2!= None):
        loi_ML2.setDescription(['Loi ajustée ML2'])
        graph.add(loi_ML2.drawPDF())
    graph.setColors(["blue", "red", "green", "yellow", "brown"])
    graph.setLegendPosition('topleft')
    view = viewer.View(graph)

    return view


affichagePDF_HR(histo_HR, HRBeta)

<openturns.viewer.View at 0x7faed536f880>


ot.VisualTest_DrawQQplot(sample_HR, HRBeta)


# Test de Lilliefors
beta = ot.BetaFactory()
dist, testLilliefors_result = ot.FittingTest_Lilliefors(sample_HR, beta)
testLilliefors_result


dist == HRBeta

True


testLilliefors_result.getPValue()

0.0


testLilliefors_result.getBinaryQualityMeasure()

False


fittedRes = ot.BetaFactory().buildEstimator(sample_HR)
fittedRes


dist_param = fittedRes.getParameterDistribution()


# PDF du premier paramètre de forme, alpha
dist_param.drawMarginal1DPDF(0, 1.0, 3.0, 1000)


# PDF du second paramètre de forme, beta
dist_param.drawMarginal1DPDF(1, 0.25, 1.25, 1000)


# PDF du minimum
dist_param.drawMarginal1DPDF(2, 15.0, 30.0, 1000)


# PDF du maximum
dist_param.drawMarginal1DPDF(3, 99.0, 101.0, 1000)


confInt = dist_param.computeBilateralConfidenceInterval(0.95)
confInt


longueur_int = confInt.getUpperBound() - confInt.getLowerBound()


longueur_int[0]

0.4394646966407785


longueur_int[1]

0.12502081563115963


longueur_int[2]

6.699161646883947


longueur_int[3]

0.00240803797524336


# Création d'une collection de distributions
distcoll = ot.DistributionFactory_GetContinuousUniVariateFactories()


# Taille de la collection
distcoll.getSize()

31


# Test modèle BIC
best_model, best_BIC = ot.FittingTest_BestModelBIC(sample_HR, distcoll)

WRN - Warning! Impossible to use factory BurrFactory. Reason=InvalidArgumentException : Error: cannot estimate the k parameter
WRN - Warning! Impossible to use factory DirichletFactory. Reason=InvalidArgumentException : Error: the sample contains points not in the unit simplex: x=class=Point name=Unnamed dimension=1 values=[82]
WRN - Warning! Impossible to use factory FisherSnedecorFactory. Reason=InvalidArgumentException : Error d1 of a FisherSnedecor distribution must be positive
WRN - Warning! Impossible to use factory MeixnerDistributionFactory. Reason=InvalidArgumentException : Error: cannot estimate a MeixnerDistribution distribution if the sample kurtosis=2.50025 is not greater than 2*skewness^2+3=4.14977
WRN - Warning! Impossible to use factory TriangularFactory. Reason=InvalidArgumentException : M MUST be enclosed between a and b
WRN - TNC went to an abnormal point=[nan,nan,nan]
WRN - Switch to finite difference to compute the gradient at point=[-713796,980845,713992]
WRN - Switch to finite difference to compute the gradient at point=[922891,-3254.26,-922810]
WRN - Switch to finite difference to compute the gradient at point=[1.48329e+06,-333413,-1.48324e+06]
WRN - TNC went to an abnormal point=[nan,nan,nan]


# Meilleur modèle (selon critère BIC)
best_model


best_model == HRBeta

True


ks_distHR = ot.KernelSmoothing().build(sample_HR)
ks_distHR.setDescription(["HR par KS"])


ks_distHR.drawPDF()


ks = ot.KernelSmoothing()
ks.setBoundingOption(2)
ks_distHR2 = ks.build(sample_HR)
ks_distHR2.setDescription(['HR par KS2'])
ks_distHR2.drawPDF()


# Réalisation du test de Kolmogorov-Smirnov
testKolmogorov_result = ot.FittingTest.Kolmogorov(sample_HR, ks_distHR2)


# P-Valeur
testKolmogorov_result.getPValue()

0.0077953227664465195


# Résultat
testKolmogorov_result.getBinaryQualityMeasure()

False


affichagePDF_HR(histo_HR, HRBeta, ks_distHR2)

<openturns.viewer.View at 0x7faed90768e0>


# Estimation des paramètres

def Beta_MaxLikelihood(Sample, epsilon):
    """
        Estimation des paramètres d'une loi Beta, à partir d'un échantillon, par maximum de vraisemblance.

    Parameters : 
        - Sample : L'échantillon dont on dispose.

    Returns : 
        - Beta : Distribution Beta estimée par maximum de vraisemblance.
    """
    
    # Minimum et maximum
    a = Sample.getMin()[0]* (1-epsilon)
    b = Sample.getMax()[0]* (1+epsilon)

    # Alpha et beta
    factory = ot.MaximumLikelihoodFactory(ot.Beta())
    factory.setKnownParameter([a, b], [2, 3])
    bounds_lower = [ot.SpecFunc.MinScalar] * 2
    bounds_upper = [1.0] * 2  # Disable anyway (ignored)
    interval = ot.Interval(bounds_lower, bounds_upper, [True] * 2, [False] * 2)
    factory.setOptimizationBounds(interval) 

    Beta = factory.build(Sample)

    return Beta


HRBeta_ML14 = Beta_MaxLikelihood(sample_HR, 1e-14)
HRBeta_ML14


HRBeta_ML15 = Beta_MaxLikelihood(sample_HR, 1e-15)
HRBeta_ML15


HRBeta_ML14.drawPDF()


ot.VisualTest_DrawQQplot(sample_HR, HRBeta_ML14)


HRBeta_ML15.drawPDF()


ot.VisualTest_DrawQQplot(sample_HR, HRBeta_ML15)


affichagePDF_HR(histo_HR, HRBeta_ML15)

<openturns.viewer.View at 0x7faed9a9a8b0>


def log_likelihood(theta):
    alpha, beta= theta
    epsilon = 1e-15
    n = sample_HR.getSize()
    a_chap = sample_HR.getMin() * (1-epsilon)
    b_chap = sample_HR.getMax() * (1+epsilon)
    return [(alpha-1)*np.sum(np.log(sample_HR-a_chap)) + (beta-1)*np.sum(np.log((-1)*sample_HR+b_chap))- n*(alpha+beta-1)*np.log(b_chap-a_chap)[0] - n*np.log(ot.SpecFunc_Beta(alpha,beta))]
    
log_likelihood_Py = ot.PythonFunction(2, 1, log_likelihood)
#log_likelihood_Py = ot.MemoizeFunction(log_likelihood_Py)


X = [2.0, 2.0]
Y = log_likelihood_Py(X)
print(Y)

[-14346.1]


HRBeta_ML15.getParameter()[0]

1.4514076991512714


log_likelihood_Py([HRBeta_ML15.getParameter()[0], HRBeta_ML15.getParameter()[1]])


graph_likelihood = log_likelihood_Py.draw([1.0, 0.3], [2.0, 0.8], [100]*2)
graph_likelihood.setXTitle(r"$\alpha$")
graph_likelihood.setYTitle(r"$\beta$")
cloud = ot.Cloud(ot.Sample(1,[HRBeta_ML15.getParameter()[0], HRBeta_ML15.getParameter()[1]]))
graph_likelihood.add(cloud)
view = viewer.View(graph_likelihood)


# Estimation theta_chap
FactML = ot.MaximumLikelihoodFactory(ot.Beta())
bestFittML = FactML.buildEstimator(sample_HR)
bestFittML


def Beta_MaxLikelihoodEstimParam(Sample, epsilon):
    """
        Estimation des paramètres d'une loi Beta, à partir d'un échantillon, par maximum de vraisemblance.

    Parameters : 
        - Sample : L'échantillon dont on dispose.

    Returns : 
        - Beta : Distribution Beta estimée par maximum de vraisemblance.
    """
    
    # Minimum et maximum
    a = Sample.getMin()[0]* (1-epsilon)
    b = Sample.getMax()[0]* (1+epsilon)

    # Alpha et beta
    factory = ot.MaximumLikelihoodFactory(ot.Beta())
    factory.setKnownParameter([a, b], [2, 3])
    bounds_lower = [ot.SpecFunc.MinScalar] * 2
    bounds_upper = [1.0] * 2  # Disable anyway (ignored)
    interval = ot.Interval(bounds_lower, bounds_upper, [True] * 2, [False] * 2)
    factory.setOptimizationBounds(interval) 

    dist_theta = factory.buildEstimator(Sample)

    return dist_theta


# Distribution des paramètres
bestFittMLE = Beta_MaxLikelihoodEstimParam(sample_HR, 1e-14)
dist_theta = bestFittMLE.getParameterDistribution()


alpha_beta_dist = dist_theta.getMarginal([0,1])


# Iso-valeurs de la distribution de alpha_theta_dist
graph_likelihood2 = alpha_beta_dist.drawMarginal2DPDF(0, 1, [-3.0] * 2, [4.0] * 2, [700] * 2)
graph_likelihood2.setXTitle(r"$\alpha$")
graph_likelihood2.setYTitle(r"$\beta$")
cloud = ot.Cloud(ot.Sample(1,[HRBeta_ML15.getParameter()[0], HRBeta_ML15.getParameter()[1]]))
graph_likelihood2.add(cloud)
graph_likelihood2.setLegendPosition('topleft')
view = viewer.View(graph_likelihood2)

/Users/martinduguey/opt/anaconda3/lib/python3.9/site-packages/openturns/viewer.py:440: UserWarning: No contour levels were found within the data range.
  contourset = self._ax[0].contour(X, Y, Z, **contour_kw)


ot.ResourceMap.SetAsUnsignedInteger(
    "Distribution-MinimumVolumeLevelSetSamplingSize", 1000)


# Intervalle de confiance à 95 %
alpha = 0.95
lvlset, thresh = alpha_beta_dist.computeMinimumVolumeLevelSetWithThreshold(alpha)
lvlset


def drawLevelSetContour2D(distribution, numberOfPointsInXAxis, alpha, threshold, sampleSize=500):
    '''
    Compute the minimum volume LevelSet of measure equal to alpha and get the
    corresponding density value (named threshold).
    Generate a sample of the distribution and draw it.
    Draw a contour plot for the distribution, where the PDF is equal to threshold.
    '''
    sample = distribution.getSample(sampleSize)
    X1min = sample[:, 0].getMin()[0]
    X1max = sample[:, 0].getMax()[0]
    X2min = sample[:, 1].getMin()[0]
    X2max = sample[:, 1].getMax()[0]
    xx = ot.Box([numberOfPointsInXAxis],
                ot.Interval([X1min], [X1max])).generate()
    yy = ot.Box([numberOfPointsInXAxis],
                ot.Interval([X2min], [X2max])).generate()
    xy = ot.Box([numberOfPointsInXAxis, numberOfPointsInXAxis],
                ot.Interval([X1min, X2min], [X1max, X2max])).generate()
    data = distribution.computePDF(xy)
    graph = ot.Graph('', 'X1', 'X2', True, 'topright')
    labels = ["%.2f%%" % (100*alpha)]
    contour = ot.Contour(xx, yy, data, [threshold], labels)
    contour.setColor('black')
    graph.setTitle("%.2f%% of the distribution, sample size = %d" %
                   (100*alpha, sampleSize))
    graph.add(contour)
    cloud = ot.Cloud(sample)
    graph.add(cloud)
    return graph


numberOfPointsInXAxis = 50
graph = drawLevelSetContour2D(alpha_beta_dist, numberOfPointsInXAxis, alpha, thresh)
view = viewer.View(graph)
plt.show()


HRBeta_MLE = bestFittMLE.getDistribution()
affichagePDF_HR(histo_HR, HRBeta, None, HRBeta_ML15, HRBeta_MLE)

<openturns.viewer.View at 0x7faed9acb9a0>

Paramètre	$\alpha$	$\beta$	$a$	$b$
méthode des moments (BetaFacotry)	1.88684	0.765074	20.9716	100.028
modèle BIC	1.88684	0.765074	20.9716	100.028
maximum de vraisemblance 1	1.45141	0.552986	21	100
maximum de vraisemblance 2	2.0	2.0	21	100

Exercice HR Bordeaux¶

1. Description des données¶

2. Ajustement de la loi Beta¶

3. Distribution de l'estimation des paramètres¶

4. Recherche de la meilleure loi ajustée au sens du critère BIC¶

5. Méthode d'estimation à noyau¶

6. Synthèse¶

7. Estimation d'une loi Beta par la méthode du maximum de vraisemblance¶

8 Dessin de la log-vraissemblance¶

9. Estimation de la distribution des paramètres¶

10 Synthèse¶