-
Notifications
You must be signed in to change notification settings - Fork 5
/
algo.py
86 lines (67 loc) · 3.29 KB
/
algo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import numpy.ma as ma
def over_under(P, Q):
"""np.where(P < Q, -1, 1) adds a minus sign if P < Q"""
return np.where(P < Q, -1, 1)
def KLD_distance(P: np.ndarray, Q: np.ndarray) -> np.ndarray:
"""
Kullback-Leibler distance normalized so as to be a distance complying to the triangle inequality.
P represents the data, the observations, or a measured probability distribution.
Q represents a theory, a model, a description or an approximation of P.
P - Q makes the regular KL divergence a distance complying with the triangle inequality.
The distance is always > 0.
"""
return (P - Q) * np.log2(np.where(P == 0, Q, P) / Q)
# def KLD_distance_overused(P: np.ndarray, Q: np.ndarray) -> np.ndarray:
# """
# Kullback-Leibler distance normalized so as to be a distance complying to the triangle inequality.
# P represents the data, the observations, or a measured probability distribution.
# Q represents a theory, a model, a description or an approximation of P.
# P - Q makes the regular KL divergence a distance complying with the triangle inequality.
# `over_under(P,Q)` adds a minus sign if P < Q
# """
# return over_under(P, Q) * (P - Q) * np.log2(np.where(P == 0, Q, P) / Q)
def KL_divergence(P, Q):
"""Kullback-Leibler divergence. Note that the np.where is so that the divergence == 0 when in the limit P -> 0."""
return P * np.log2(np.where(P == 0, Q, P) / Q)
def symmetrized_KLD(P, Q):
"""A symmetrized Kullback-Leibler divergence.
See https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence.
P represents the data, the observations, or a measured probability distribution.
Q represents a theory, a model, a description or an approximation of P.
P - Q makes the regular KL divergence a distance complying with the triangle inequality.
`over_under(P,Q)` adds a minus sign if P < Q"""
return over_under(P, Q) * KLD_distance(P, Q)
def entropy(P: np.ndarray) -> np.ndarray:
"""
Shannon entropy without summation.
"""
P = np.multiply(P, np.log2(P, where=P > 1e-10))
return np.where(P != 0, -P, P)
def KLD_distance_consecutive(x: np.ndarray) -> np.ndarray:
"""
Kullback-Leibler distance between a set of consecutive observations.
"""
Q, P = x[:-1], x[1:]
return (P - Q) * (np.log2(P) - np.log2(Q))
def KLD_divergence_consecutive(x: np.ndarray) -> np.ndarray:
"""
Kullback-Leibler divergence between a set of consecutive observations.
"""
Q, P = x[:-1], x[1:]
return np.sum(P * (np.log2(P) - np.log2(Q)))
# def KLD_distance_overused(P: np.ndarray, Q: np.ndarray) -> np.ndarray:
# """
# Kullback-Leibler distance normalized so as to be a distance complying to the triangle inequality.
# P represents the data, the observations, or a measured probability distribution.
# Q represents a theory, a model, a description or an approximation of P.
# P - Q makes the regular KL divergence a distance complying with the triangle inequality.
# np.where(P < Q, -1, 1) adds a minus sign if P < Q
# """
# return (P - Q) * np.log2(np.where(P == 0, Q, P) / Q)
# arr = np.subtract(P, Q)
# arr2 = np.divide(P, Q)
# np.log2(arr2, out=arr2)
# np.multiply(arr, arr2, out=arr)
# np.multiply(np.where(P < Q, -1, 1), arr, out=arr)
# return arr