scipy#

稀疏矩阵#

import numpy as np
from scipy import sparse

# 1️⃣ 记录非零元素的行索引
row = np.array([0, 2, 2, 4])  # 3 在第0行,4 在第2行,5 在第2行,6 在第4行

# 2️⃣ 记录非零元素的列索引
col = np.array([2, 0, 3, 2])  # 3 在第2列,4 在第0列,5 在第3列,6 在第2列

# 3️⃣ 记录非零元素的值
data = np.array([3, 4, 5, 6])  # 这些索引位置对应的值

# 4️⃣ 生成 5x5 的稀疏矩阵
matrix = sparse.coo_matrix((data, (row, col)), shape=(5, 5))

# 打印矩阵
print(matrix.toarray())  # 转换回普通的二维数组看下
print(matrix.row)
print(matrix.col)
print(matrix.data)
[[0 0 3 0 0]
 [0 0 0 0 0]
 [4 0 0 5 0]
 [0 0 0 0 0]
 [0 0 6 0 0]]
[0 2 2 4]
[2 0 3 2]
[3 4 5 6]

概率分布#

norm为例

from scipy.stats import norm
# help
print(norm.__doc__)
A normal continuous random variable.

    The location (``loc``) keyword specifies the mean.
    The scale (``scale``) keyword specifies the standard deviation.

    As an instance of the `rv_continuous` class, `norm` object inherits from it
    a collection of generic methods (see below for the full list),
    and completes them with details specific for this particular distribution.
    
    Methods
    -------
    rvs(loc=0, scale=1, size=1, random_state=None)
        Random variates.
    pdf(x, loc=0, scale=1)
        Probability density function.
    logpdf(x, loc=0, scale=1)
        Log of the probability density function.
    cdf(x, loc=0, scale=1)
        Cumulative distribution function.
    logcdf(x, loc=0, scale=1)
        Log of the cumulative distribution function.
    sf(x, loc=0, scale=1)
        Survival function  (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
    logsf(x, loc=0, scale=1)
        Log of the survival function.
    ppf(q, loc=0, scale=1)
        Percent point function (inverse of ``cdf`` --- percentiles).
    isf(q, loc=0, scale=1)
        Inverse survival function (inverse of ``sf``).
    moment(order, loc=0, scale=1)
        Non-central moment of the specified order.
    stats(loc=0, scale=1, moments='mv')
        Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
    entropy(loc=0, scale=1)
        (Differential) entropy of the RV.
    fit(data)
        Parameter estimates for generic data.
        See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
        keyword arguments.
    expect(func, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
        Expected value of a function (of one argument) with respect to the distribution.
    median(loc=0, scale=1)
        Median of the distribution.
    mean(loc=0, scale=1)
        Mean of the distribution.
    var(loc=0, scale=1)
        Variance of the distribution.
    std(loc=0, scale=1)
        Standard deviation of the distribution.
    interval(confidence, loc=0, scale=1)
        Confidence interval with equal areas around the median.

    Notes
    -----
    The probability density function for `norm` is:

    .. math::

        f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}

    for a real number :math:`x`.

    The probability density above is defined in the "standardized" form. To shift
    and/or scale the distribution use the ``loc`` and ``scale`` parameters.
    Specifically, ``norm.pdf(x, loc, scale)`` is identically
    equivalent to ``norm.pdf(y) / scale`` with
    ``y = (x - loc) / scale``. Note that shifting the location of a distribution
    does not make it a "noncentral" distribution; noncentral generalizations of
    some distributions are available in separate classes.

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.stats import norm
    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots(1, 1)
    
    Calculate the first four moments:
    
    
    >>> mean, var, skew, kurt = norm.stats(moments='mvsk')
    
    Display the probability density function (``pdf``):
    
    >>> x = np.linspace(norm.ppf(0.01),
    ...                 norm.ppf(0.99), 100)
    >>> ax.plot(x, norm.pdf(x),
    ...        'r-', lw=5, alpha=0.6, label='norm pdf')
    
    Alternatively, the distribution object can be called (as a function)
    to fix the shape, location and scale parameters. This returns a "frozen"
    RV object holding the given parameters fixed.
    
    Freeze the distribution and display the frozen ``pdf``:
    
    >>> rv = norm()
    >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    
    Check accuracy of ``cdf`` and ``ppf``:
    
    >>> vals = norm.ppf([0.001, 0.5, 0.999])
    >>> np.allclose([0.001, 0.5, 0.999], norm.cdf(vals))
    True
    
    Generate random numbers:
    
    >>> r = norm.rvs(size=1000)
    
    And compare the histogram:
    
    >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
    >>> ax.set_xlim([x[0], x[-1]])
    >>> ax.legend(loc='best', frameon=False)
    >>> plt.show()
    

    
# 属性 方法
rv = norm()
print(dir(rv))
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'a', 'args', 'b', 'cdf', 'dist', 'entropy', 'expect', 'interval', 'isf', 'kwds', 'logcdf', 'logpdf', 'logsf', 'mean', 'median', 'moment', 'pdf', 'ppf', 'random_state', 'rvs', 'sf', 'stats', 'std', 'support', 'var']

主要方法#

  • rvs:随机变量 随机采样

  • pdf:概率密度函数。 (单点处的概率是0,但可以讨论区间上的概率,即pdf区间积分. pdf值表示一点处的可能性密度)

  • cdf:累积分布函数

  • sf:生存函数 (1-CDF)

  • ppf:百分点函数(CDF 的逆函数)

  • isf:逆生存函数(SF 的逆函数)

  • stats:返回均值、方差、(Fisher 的)偏度或(Fisher 的)峰度

  • moment:分布的非中心矩

norm.cdf(0)
np.float64(0.5)
norm.cdf([-1, 0, 1]) # 多个点的cdf
array([0.15865525, 0.5       , 0.84134475])
norm.mean()
np.float64(0.0)
norm.std()
np.float64(1.0)
norm.var()
np.float64(1.0)
norm.ppf(0.5) # 寻找对应分位点
np.float64(0.0)
norm.pdf(0)
np.float64(0.3989422804014327)

生成分布随机变量#

from numpy.random import default_rng
rng = default_rng()
norm.rvs(size = 5, random_state= rng)
array([ 1.54168013,  0.63091442,  0.63829445, -0.40356971, -1.61088559])

移位和缩放#

使用均值和标准差。

norm.stats(loc=3, scale=4)
(np.float64(3.0), np.float64(16.0))

形状参数#

gamma类似分布

from scipy.stats import gamma
gamma(a=1, scale=2.)
<scipy.stats._distn_infrastructure.rv_continuous_frozen at 0x1f2f3719d00>

zscore 标准分数,(常在正态分布)#

衡量一数值在总体中相对位置。

$$ z = \frac{x - \mu}{\sigma} $$

假设一个班的数学成绩均值是 70,标准差是 10。你考了 85 分:$z = \frac{85 - 70}{10} = 1.5$.表示你比平均水平高了 1.5 个标准差。