博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
曲奇饼问题
阅读量:747 次
发布时间:2019-03-22

本文共 42745 字,大约阅读时间需要 142 分钟。

  案例1,假设有两碗曲奇饼,碗1包含30个香草曲奇饼和10个巧克力曲奇饼,碗2有香草曲奇饼10个和巧克力曲奇饼10个。碗1和碗2都放在黑箱中,问从黑箱里的碗1中取到香草曲奇饼的概率?

  解:根据贝叶斯定理:

P(A|B)=P(A)P(B|A)P(B)
  设
B1
=”曲奇饼属于碗1”,
V
=”曲奇饼是香草曲奇饼”
  则从碗1中取到香草曲奇饼的概率为:
P(B1|V)=P(B1)P(V|B1)P(V)
  又
P(B1)
= 1/2 = 0.5
  
P(V|B1)
=30/(30+10) = 3/4 = 0.75
  
P(V)
=(30+20)/(40+40) = 5/8 = 0.625
  即
P(B1|V)=P(B1)P(V|B1)P(V)=0.50.750.625=0.6
  使用Python语言实求条件概率。
  1. 库文件 thinkbayes.py  

"""This file contains class definitions for:Hist: represents a histogram (map from values to integer frequencies).Pmf: represents a probability mass function (map from values to probs)._DictWrapper: private parent class for Hist and Pmf.Cdf: represents a discrete cumulative distribution functionPdf: represents a continuous probability density function"""import bisectimport copyimport loggingimport mathimport numpyimport randomimport scipy.statsfrom scipy.special import erf, erfinv, gammalnROOT2 = math.sqrt(2)def RandomSeed(x):    """Initialize the random and numpy.random generators.    x: int seed    """    random.seed(x)    numpy.random.seed(x)def Odds(p):    """Computes odds for a given probability.    Example: p=0.75 means 75 for and 25 against, or 3:1 odds in favor.    Note: when p=1, the formula for odds divides by zero, which is    normally undefined.  But I think it is reasonable to define Odds(1)    to be infinity, so that's what this function does.    p: float 0-1    Returns: float odds    """    if p == 1:        return float('inf')    return p / (1 - p)def Probability(o):    """Computes the probability corresponding to given odds.    Example: o=2 means 2:1 odds in favor, or 2/3 probability    o: float odds, strictly positive    Returns: float probability    """    return o / (o + 1)def Probability2(yes, no):    """Computes the probability corresponding to given odds.    Example: yes=2, no=1 means 2:1 odds in favor, or 2/3 probability.    yes, no: int or float odds in favor    """    return float(yes) / (yes + no)class Interpolator(object):    """Represents a mapping between sorted sequences; performs linear interp.    Attributes:        xs: sorted list        ys: sorted list    """    def __init__(self, xs, ys):        self.xs = xs        self.ys = ys    def Lookup(self, x):        """Looks up x and returns the corresponding value of y."""        return self._Bisect(x, self.xs, self.ys)    def Reverse(self, y):        """Looks up y and returns the corresponding value of x."""        return self._Bisect(y, self.ys, self.xs)    def _Bisect(self, x, xs, ys):        """Helper function."""        if x <= xs[0]:            return ys[0]        if x >= xs[-1]:            return ys[-1]        i = bisect.bisect(xs, x)        frac = 1.0 * (x - xs[i - 1]) / (xs[i] - xs[i - 1])        y = ys[i - 1] + frac * 1.0 * (ys[i] - ys[i - 1])        return yclass _DictWrapper(object):    """An object that contains a dictionary."""    def __init__(self, values=None, name=''):        """Initializes the distribution.        hypos: sequence of hypotheses        """        self.name = name        self.d = {}        # flag whether the distribution is under a log transform        self.log = False        if values is None:            return        init_methods = [            self.InitPmf,            self.InitMapping,            self.InitSequence,            self.InitFailure,            ]        for method in init_methods:            try:                method(values)                break            except AttributeError:                continue        if len(self) > 0:            self.Normalize()    def InitSequence(self, values):        """Initializes with a sequence of equally-likely values.        values: sequence of values        """        for value in values:            self.Set(value, 1)    def InitMapping(self, values):        """Initializes with a map from value to probability.        values: map from value to probability        """        for value, prob in values.iteritems():            self.Set(value, prob)    def InitPmf(self, values):        """Initializes with a Pmf.        values: Pmf object        """        for value, prob in values.Items():            self.Set(value, prob)    def InitFailure(self, values):        """Raises an error."""        raise ValueError('None of the initialization methods worked.')    def __len__(self):        return len(self.d)    def __iter__(self):        return iter(self.d)    def iterkeys(self):        return iter(self.d)    def __contains__(self, value):        return value in self.d    def Copy(self, name=None):        """Returns a copy.        Make a shallow copy of d.  If you want a deep copy of d,        use copy.deepcopy on the whole object.        Args:            name: string name for the new Hist        """        new = copy.copy(self)        new.d = copy.copy(self.d)        new.name = name if name is not None else self.name        return new    def Scale(self, factor):        """Multiplies the values by a factor.        factor: what to multiply by        Returns: new object        """        new = self.Copy()        new.d.clear()        for val, prob in self.Items():            new.Set(val * factor, prob)        return new    def Log(self, m=None):        """Log transforms the probabilities.        Removes values with probability 0.        Normalizes so that the largest logprob is 0.        """        if self.log:            raise ValueError("Pmf/Hist already under a log transform")        self.log = True        if m is None:            m = self.MaxLike()        for x, p in self.d.iteritems():            if p:                self.Set(x, math.log(p / m))            else:                self.Remove(x)    def Exp(self, m=None):        """Exponentiates the probabilities.        m: how much to shift the ps before exponentiating        If m is None, normalizes so that the largest prob is 1.        """        if not self.log:            raise ValueError("Pmf/Hist not under a log transform")        self.log = False        if m is None:            m = self.MaxLike()        for x, p in self.d.iteritems():            self.Set(x, math.exp(p - m))    def GetDict(self):        """Gets the dictionary."""        return self.d    def SetDict(self, d):        """Sets the dictionary."""        self.d = d    def Values(self):        """Gets an unsorted sequence of values.        Note: one source of confusion is that the keys of this        dictionary are the values of the Hist/Pmf, and the        values of the dictionary are frequencies/probabilities.        """        return self.d.keys()    def Items(self):        """Gets an unsorted sequence of (value, freq/prob) pairs."""        return self.d.items()    def Render(self):        """Generates a sequence of points suitable for plotting.        Returns:            tuple of (sorted value sequence, freq/prob sequence)        """        return zip(*sorted(self.Items()))    def Print(self):        """Prints the values and freqs/probs in ascending order."""        for val, prob in sorted(self.d.iteritems()):            print val, prob    def Set(self, x, y=0):        """Sets the freq/prob associated with the value x.        Args:            x: number value            y: number freq or prob        """        self.d[x] = y    def Incr(self, x, term=1):        """Increments the freq/prob associated with the value x.        Args:            x: number value            term: how much to increment by        """        self.d[x] = self.d.get(x, 0) + term    def Mult(self, x, factor):        """Scales the freq/prob associated with the value x.        Args:            x: number value            factor: how much to multiply by        """        self.d[x] = self.d.get(x, 0) * factor    def Remove(self, x):        """Removes a value.        Throws an exception if the value is not there.        Args:            x: value to remove        """        del self.d[x]    def Total(self):        """Returns the total of the frequencies/probabilities in the map."""        total = sum(self.d.itervalues())        return total    def MaxLike(self):        """Returns the largest frequency/probability in the map."""        return max(self.d.itervalues())class Hist(_DictWrapper):    """Represents a histogram, which is a map from values to frequencies.    Values can be any hashable type; frequencies are integer counters.    """    def Freq(self, x):        """Gets the frequency associated with the value x.        Args:            x: number value        Returns:            int frequency        """        return self.d.get(x, 0)    def Freqs(self, xs):        """Gets frequencies for a sequence of values."""        return [self.Freq(x) for x in xs]    def IsSubset(self, other):        """Checks whether the values in this histogram are a subset of        the values in the given histogram."""        for val, freq in self.Items():            if freq > other.Freq(val):                return False        return True    def Subtract(self, other):        """Subtracts the values in the given histogram from this histogram."""        for val, freq in other.Items():            self.Incr(val, -freq)class Pmf(_DictWrapper):    """Represents a probability mass function.    Values can be any hashable type; probabilities are floating-point.    Pmfs are not necessarily normalized.    """    def Prob(self, x, default=0):        """Gets the probability associated with the value x.        Args:            x: number value            default: value to return if the key is not there        Returns:            float probability        """        return self.d.get(x, default)    def Probs(self, xs):        """Gets probabilities for a sequence of values."""        return [self.Prob(x) for x in xs]    def MakeCdf(self, name=None):        """Makes a Cdf."""        return MakeCdfFromPmf(self, name=name)    def ProbGreater(self, x):        """Probability that a sample from this Pmf exceeds x.        x: number        returns: float probability        """        t = [prob for (val, prob) in self.d.iteritems() if val > x]        return sum(t)    def ProbLess(self, x):        """Probability that a sample from this Pmf is less than x.        x: number        returns: float probability        """        t = [prob for (val, prob) in self.d.iteritems() if val < x]        return sum(t)    def __lt__(self, obj):        """Less than.        obj: number or _DictWrapper        returns: float probability        """        if isinstance(obj, _DictWrapper):            return PmfProbLess(self, obj)        else:            return self.ProbLess(obj)    def __gt__(self, obj):        """Greater than.        obj: number or _DictWrapper        returns: float probability        """        if isinstance(obj, _DictWrapper):            return PmfProbGreater(self, obj)        else:            return self.ProbGreater(obj)    def __ge__(self, obj):        """Greater than or equal.        obj: number or _DictWrapper        returns: float probability        """        return 1 - (self < obj)    def __le__(self, obj):        """Less than or equal.        obj: number or _DictWrapper        returns: float probability        """        return 1 - (self > obj)    def __eq__(self, obj):        """Equal to.        obj: number or _DictWrapper        returns: float probability        """        if isinstance(obj, _DictWrapper):            return PmfProbEqual(self, obj)        else:            return self.Prob(obj)    def __ne__(self, obj):        """Not equal to.        obj: number or _DictWrapper        returns: float probability        """        return 1 - (self == obj)    def Normalize(self, fraction=1.0):        """Normalizes this PMF so the sum of all probs is fraction.        Args:            fraction: what the total should be after normalization        Returns: the total probability before normalizing        """        if self.log:            raise ValueError("Pmf is under a log transform")        total = self.Total()        if total == 0.0:            raise ValueError('total probability is zero.')            logging.warning('Normalize: total probability is zero.')            return total        factor = float(fraction) / total        for x in self.d:            self.d[x] *= factor        return total    def Random(self):        """Chooses a random element from this PMF.        Returns:            float value from the Pmf        """        if len(self.d) == 0:            raise ValueError('Pmf contains no values.')        target = random.random()        total = 0.0        for x, p in self.d.iteritems():            total += p            if total >= target:                return x        # we shouldn't get here        assert False    def Mean(self):        """Computes the mean of a PMF.        Returns:            float mean        """        mu = 0.0        for x, p in self.d.iteritems():            mu += p * x        return mu    def Var(self, mu=None):        """Computes the variance of a PMF.        Args:            mu: the point around which the variance is computed;                if omitted, computes the mean        Returns:            float variance        """        if mu is None:            mu = self.Mean()        var = 0.0        for x, p in self.d.iteritems():            var += p * (x - mu) ** 2        return var    def MaximumLikelihood(self):        """Returns the value with the highest probability.        Returns: float probability        """        prob, val = max((prob, val) for val, prob in self.Items())        return val    def CredibleInterval(self, percentage=90):        """Computes the central credible interval.        If percentage=90, computes the 90% CI.        Args:            percentage: float between 0 and 100        Returns:            sequence of two floats, low and high        """        cdf = self.MakeCdf()        return cdf.CredibleInterval(percentage)    def __add__(self, other):        """Computes the Pmf of the sum of values drawn from self and other.        other: another Pmf        returns: new Pmf        """        try:            return self.AddPmf(other)        except AttributeError:            return self.AddConstant(other)    def AddPmf(self, other):        """Computes the Pmf of the sum of values drawn from self and other.        other: another Pmf        returns: new Pmf        """        pmf = Pmf()        for v1, p1 in self.Items():            for v2, p2 in other.Items():                pmf.Incr(v1 + v2, p1 * p2)        return pmf    def AddConstant(self, other):        """Computes the Pmf of the sum a constant and  values from self.        other: a number        returns: new Pmf        """        pmf = Pmf()        for v1, p1 in self.Items():            pmf.Set(v1 + other, p1)        return pmf    def __sub__(self, other):        """Computes the Pmf of the diff of values drawn from self and other.        other: another Pmf        returns: new Pmf        """        pmf = Pmf()        for v1, p1 in self.Items():            for v2, p2 in other.Items():                pmf.Incr(v1 - v2, p1 * p2)        return pmf    def Max(self, k):        """Computes the CDF of the maximum of k selections from this dist.        k: int        returns: new Cdf        """        cdf = self.MakeCdf()        cdf.ps = [p ** k for p in cdf.ps]        return cdfclass Joint(Pmf):    """Represents a joint distribution.    The values are sequences (usually tuples)    """    def Marginal(self, i, name=''):        """Gets the marginal distribution of the indicated variable.        i: index of the variable we want        Returns: Pmf        """        pmf = Pmf(name=name)        for vs, prob in self.Items():            pmf.Incr(vs[i], prob)        return pmf    def Conditional(self, i, j, val, name=''):        """Gets the conditional distribution of the indicated variable.        Distribution of vs[i], conditioned on vs[j] = val.        i: index of the variable we want        j: which variable is conditioned on        val: the value the jth variable has to have        Returns: Pmf        """        pmf = Pmf(name=name)        for vs, prob in self.Items():            if vs[j] != val: continue            pmf.Incr(vs[i], prob)        pmf.Normalize()        return pmf    def MaxLikeInterval(self, percentage=90):        """Returns the maximum-likelihood credible interval.        If percentage=90, computes a 90% CI containing the values        with the highest likelihoods.        percentage: float between 0 and 100        Returns: list of values from the suite        """        interval = []        total = 0        t = [(prob, val) for val, prob in self.Items()]        t.sort(reverse=True)        for prob, val in t:            interval.append(val)            total += prob            if total >= percentage / 100.0:                break        return intervaldef MakeJoint(pmf1, pmf2):    """Joint distribution of values from pmf1 and pmf2.    Args:        pmf1: Pmf object        pmf2: Pmf object    Returns:        Joint pmf of value pairs    """    joint = Joint()    for v1, p1 in pmf1.Items():        for v2, p2 in pmf2.Items():            joint.Set((v1, v2), p1 * p2)    return jointdef MakeHistFromList(t, name=''):    """Makes a histogram from an unsorted sequence of values.    Args:        t: sequence of numbers        name: string name for this histogram    Returns:        Hist object    """    hist = Hist(name=name)    [hist.Incr(x) for x in t]    return histdef MakeHistFromDict(d, name=''):    """Makes a histogram from a map from values to frequencies.    Args:        d: dictionary that maps values to frequencies        name: string name for this histogram    Returns:        Hist object    """    return Hist(d, name)def MakePmfFromList(t, name=''):    """Makes a PMF from an unsorted sequence of values.    Args:        t: sequence of numbers        name: string name for this PMF    Returns:        Pmf object    """    hist = MakeHistFromList(t)    d = hist.GetDict()    pmf = Pmf(d, name)    pmf.Normalize()    return pmfdef MakePmfFromDict(d, name=''):    """Makes a PMF from a map from values to probabilities.    Args:        d: dictionary that maps values to probabilities        name: string name for this PMF    Returns:        Pmf object    """    pmf = Pmf(d, name)    pmf.Normalize()    return pmfdef MakePmfFromItems(t, name=''):    """Makes a PMF from a sequence of value-probability pairs    Args:        t: sequence of value-probability pairs        name: string name for this PMF    Returns:        Pmf object    """    pmf = Pmf(dict(t), name)    pmf.Normalize()    return pmfdef MakePmfFromHist(hist, name=None):    """Makes a normalized PMF from a Hist object.    Args:        hist: Hist object        name: string name    Returns:        Pmf object    """    if name is None:        name = hist.name    # make a copy of the dictionary    d = dict(hist.GetDict())    pmf = Pmf(d, name)    pmf.Normalize()    return pmfdef MakePmfFromCdf(cdf, name=None):    """Makes a normalized Pmf from a Cdf object.    Args:        cdf: Cdf object        name: string name for the new Pmf    Returns:        Pmf object    """    if name is None:        name = cdf.name    pmf = Pmf(name=name)    prev = 0.0    for val, prob in cdf.Items():        pmf.Incr(val, prob - prev)        prev = prob    return pmfdef MakeMixture(metapmf, name='mix'):    """Make a mixture distribution.    Args:      metapmf: Pmf that maps from Pmfs to probs.      name: string name for the new Pmf.    Returns: Pmf object.    """    mix = Pmf(name=name)    for pmf, p1 in metapmf.Items():        for x, p2 in pmf.Items():            mix.Incr(x, p1 * p2)    return mixdef MakeUniformPmf(low, high, n):    """Make a uniform Pmf.    low: lowest value (inclusive)    high: highest value (inclusize)    n: number of values    """    pmf = Pmf()    for x in numpy.linspace(low, high, n):        pmf.Set(x, 1)    pmf.Normalize()    return pmfclass Cdf(object):    """Represents a cumulative distribution function.    Attributes:        xs: sequence of values        ps: sequence of probabilities        name: string used as a graph label.    """    def __init__(self, xs=None, ps=None, name=''):        self.xs = [] if xs is None else xs        self.ps = [] if ps is None else ps        self.name = name    def Copy(self, name=None):        """Returns a copy of this Cdf.        Args:            name: string name for the new Cdf        """        if name is None:            name = self.name        return Cdf(list(self.xs), list(self.ps), name)    def MakePmf(self, name=None):        """Makes a Pmf."""        return MakePmfFromCdf(self, name=name)    def Values(self):        """Returns a sorted list of values.        """        return self.xs    def Items(self):        """Returns a sorted sequence of (value, probability) pairs.        Note: in Python3, returns an iterator.        """        return zip(self.xs, self.ps)    def Append(self, x, p):        """Add an (x, p) pair to the end of this CDF.        Note: this us normally used to build a CDF from scratch, not        to modify existing CDFs.  It is up to the caller to make sure        that the result is a legal CDF.        """        self.xs.append(x)        self.ps.append(p)    def Shift(self, term):        """Adds a term to the xs.        term: how much to add        """        new = self.Copy()        new.xs = [x + term for x in self.xs]        return new    def Scale(self, factor):        """Multiplies the xs by a factor.        factor: what to multiply by        """        new = self.Copy()        new.xs = [x * factor for x in self.xs]        return new    def Prob(self, x):        """Returns CDF(x), the probability that corresponds to value x.        Args:            x: number        Returns:            float probability        """        if x < self.xs[0]: return 0.0        index = bisect.bisect(self.xs, x)        p = self.ps[index - 1]        return p    def Value(self, p):        """Returns InverseCDF(p), the value that corresponds to probability p.        Args:            p: number in the range [0, 1]        Returns:            number value        """        if p < 0 or p > 1:            raise ValueError('Probability p must be in range [0, 1]')        if p == 0: return self.xs[0]        if p == 1: return self.xs[-1]        index = bisect.bisect(self.ps, p)        if p == self.ps[index - 1]:            return self.xs[index - 1]        else:            return self.xs[index]    def Percentile(self, p):        """Returns the value that corresponds to percentile p.        Args:            p: number in the range [0, 100]        Returns:            number value        """        return self.Value(p / 100.0)    def Random(self):        """Chooses a random value from this distribution."""        return self.Value(random.random())    def Sample(self, n):        """Generates a random sample from this distribution.        Args:            n: int length of the sample        """        return [self.Random() for i in range(n)]    def Mean(self):        """Computes the mean of a CDF.        Returns:            float mean        """        old_p = 0        total = 0.0        for x, new_p in zip(self.xs, self.ps):            p = new_p - old_p            total += p * x            old_p = new_p        return total    def CredibleInterval(self, percentage=90):        """Computes the central credible interval.        If percentage=90, computes the 90% CI.        Args:            percentage: float between 0 and 100        Returns:            sequence of two floats, low and high        """        prob = (1 - percentage / 100.0) / 2        interval = self.Value(prob), self.Value(1 - prob)        return interval    def _Round(self, multiplier=1000.0):        """        An entry is added to the cdf only if the percentile differs        from the previous value in a significant digit, where the number        of significant digits is determined by multiplier.  The        default is 1000, which keeps log10(1000) = 3 significant digits.        """        # TODO(write this method)        raise UnimplementedMethodException()    def Render(self):        """Generates a sequence of points suitable for plotting.        An empirical CDF is a step function; linear interpolation        can be misleading.        Returns:            tuple of (xs, ps)        """        xs = [self.xs[0]]        ps = [0.0]        for i, p in enumerate(self.ps):            xs.append(self.xs[i])            ps.append(p)            try:                xs.append(self.xs[i + 1])                ps.append(p)            except IndexError:                pass        return xs, ps    def Max(self, k):        """Computes the CDF of the maximum of k selections from this dist.        k: int        returns: new Cdf        """        cdf = self.Copy()        cdf.ps = [p ** k for p in cdf.ps]        return cdfdef MakeCdfFromItems(items, name=''):    """Makes a cdf from an unsorted sequence of (value, frequency) pairs.    Args:        items: unsorted sequence of (value, frequency) pairs        name: string name for this CDF    Returns:        cdf: list of (value, fraction) pairs    """    runsum = 0    xs = []    cs = []    for value, count in sorted(items):        runsum += count        xs.append(value)        cs.append(runsum)    total = float(runsum)    ps = [c / total for c in cs]    cdf = Cdf(xs, ps, name)    return cdfdef MakeCdfFromDict(d, name=''):    """Makes a CDF from a dictionary that maps values to frequencies.    Args:       d: dictionary that maps values to frequencies.       name: string name for the data.    Returns:        Cdf object    """    return MakeCdfFromItems(d.iteritems(), name)def MakeCdfFromHist(hist, name=''):    """Makes a CDF from a Hist object.    Args:       hist: Pmf.Hist object       name: string name for the data.    Returns:        Cdf object    """    return MakeCdfFromItems(hist.Items(), name)def MakeCdfFromPmf(pmf, name=None):    """Makes a CDF from a Pmf object.    Args:       pmf: Pmf.Pmf object       name: string name for the data.    Returns:        Cdf object    """    if name == None:        name = pmf.name    return MakeCdfFromItems(pmf.Items(), name)def MakeCdfFromList(seq, name=''):    """Creates a CDF from an unsorted sequence.    Args:        seq: unsorted sequence of sortable values        name: string name for the cdf    Returns:       Cdf object    """    hist = MakeHistFromList(seq)    return MakeCdfFromHist(hist, name)class UnimplementedMethodException(Exception):    """Exception if someone calls a method that should be overridden."""class Suite(Pmf):    """Represents a suite of hypotheses and their probabilities."""    def Update(self, data):        """Updates each hypothesis based on the data.        data: any representation of the data        returns: the normalizing constant        """        for hypo in self.Values():            like = self.Likelihood(data, hypo)            self.Mult(hypo, like)        return self.Normalize()    def LogUpdate(self, data):        """Updates a suite of hypotheses based on new data.        Modifies the suite directly; if you want to keep the original, make        a copy.        Note: unlike Update, LogUpdate does not normalize.        Args:            data: any representation of the data        """        for hypo in self.Values():            like = self.LogLikelihood(data, hypo)            self.Incr(hypo, like)    def UpdateSet(self, dataset):        """Updates each hypothesis based on the dataset.        This is more efficient than calling Update repeatedly because        it waits until the end to Normalize.        Modifies the suite directly; if you want to keep the original, make        a copy.        dataset: a sequence of data        returns: the normalizing constant        """        for data in dataset:            for hypo in self.Values():                like = self.Likelihood(data, hypo)                self.Mult(hypo, like)        return self.Normalize()    def LogUpdateSet(self, dataset):        """Updates each hypothesis based on the dataset.        Modifies the suite directly; if you want to keep the original, make        a copy.        dataset: a sequence of data        returns: None        """        for data in dataset:            self.LogUpdate(data)    def Likelihood(self, data, hypo):        """Computes the likelihood of the data under the hypothesis.        hypo: some representation of the hypothesis        data: some representation of the data        """        raise UnimplementedMethodException()    def LogLikelihood(self, data, hypo):        """Computes the log likelihood of the data under the hypothesis.        hypo: some representation of the hypothesis        data: some representation of the data        """        raise UnimplementedMethodException()    def Print(self):        """Prints the hypotheses and their probabilities."""        for hypo, prob in sorted(self.Items()):            print hypo, prob    def MakeOdds(self):        """Transforms from probabilities to odds.        Values with prob=0 are removed.        """        for hypo, prob in self.Items():            if prob:                self.Set(hypo, Odds(prob))            else:                self.Remove(hypo)    def MakeProbs(self):        """Transforms from odds to probabilities."""        for hypo, odds in self.Items():            self.Set(hypo, Probability(odds))def MakeSuiteFromList(t, name=''):    """Makes a suite from an unsorted sequence of values.    Args:        t: sequence of numbers        name: string name for this suite    Returns:        Suite object    """    hist = MakeHistFromList(t)    d = hist.GetDict()    return MakeSuiteFromDict(d)def MakeSuiteFromHist(hist, name=None):    """Makes a normalized suite from a Hist object.    Args:        hist: Hist object        name: string name    Returns:        Suite object    """    if name is None:        name = hist.name    # make a copy of the dictionary    d = dict(hist.GetDict())    return MakeSuiteFromDict(d, name)def MakeSuiteFromDict(d, name=''):    """Makes a suite from a map from values to probabilities.    Args:        d: dictionary that maps values to probabilities        name: string name for this suite    Returns:        Suite object    """    suite = Suite(name=name)    suite.SetDict(d)    suite.Normalize()    return suitedef MakeSuiteFromCdf(cdf, name=None):    """Makes a normalized Suite from a Cdf object.    Args:        cdf: Cdf object        name: string name for the new Suite    Returns:        Suite object    """    if name is None:        name = cdf.name    suite = Suite(name=name)    prev = 0.0    for val, prob in cdf.Items():        suite.Incr(val, prob - prev)        prev = prob    return suiteclass Pdf(object):    """Represents a probability density function (PDF)."""    def Density(self, x):        """Evaluates this Pdf at x.        Returns: float probability density        """        raise UnimplementedMethodException()    def MakePmf(self, xs, name=''):        """Makes a discrete version of this Pdf, evaluated at xs.        xs: equally-spaced sequence of values        Returns: new Pmf        """        pmf = Pmf(name=name)        for x in xs:            pmf.Set(x, self.Density(x))        pmf.Normalize()        return pmfclass GaussianPdf(Pdf):    """Represents the PDF of a Gaussian distribution."""    def __init__(self, mu, sigma):        """Constructs a Gaussian Pdf with given mu and sigma.        mu: mean        sigma: standard deviation        """        self.mu = mu        self.sigma = sigma    def Density(self, x):        """Evaluates this Pdf at x.        Returns: float probability density        """        return EvalGaussianPdf(x, self.mu, self.sigma)class EstimatedPdf(Pdf):    """Represents a PDF estimated by KDE."""    def __init__(self, sample):        """Estimates the density function based on a sample.        sample: sequence of data        """        self.kde = scipy.stats.gaussian_kde(sample)    def Density(self, x):        """Evaluates this Pdf at x.        Returns: float probability density        """        return self.kde.evaluate(x)    def MakePmf(self, xs, name=''):        ps = self.kde.evaluate(xs)        pmf = MakePmfFromItems(zip(xs, ps), name=name)        return pmfdef Percentile(pmf, percentage):    """Computes a percentile of a given Pmf.    percentage: float 0-100    """    p = percentage / 100.0    total = 0    for val, prob in pmf.Items():        total += prob        if total >= p:            return valdef CredibleInterval(pmf, percentage=90):    """Computes a credible interval for a given distribution.    If percentage=90, computes the 90% CI.    Args:        pmf: Pmf object representing a posterior distribution        percentage: float between 0 and 100    Returns:        sequence of two floats, low and high    """    cdf = pmf.MakeCdf()    prob = (1 - percentage / 100.0) / 2    interval = cdf.Value(prob), cdf.Value(1 - prob)    return intervaldef PmfProbLess(pmf1, pmf2):    """Probability that a value from pmf1 is less than a value from pmf2.    Args:        pmf1: Pmf object        pmf2: Pmf object    Returns:        float probability    """    total = 0.0    for v1, p1 in pmf1.Items():        for v2, p2 in pmf2.Items():            if v1 < v2:                total += p1 * p2    return totaldef PmfProbGreater(pmf1, pmf2):    """Probability that a value from pmf1 is less than a value from pmf2.    Args:        pmf1: Pmf object        pmf2: Pmf object    Returns:        float probability    """    total = 0.0    for v1, p1 in pmf1.Items():        for v2, p2 in pmf2.Items():            if v1 > v2:                total += p1 * p2    return totaldef PmfProbEqual(pmf1, pmf2):    """Probability that a value from pmf1 equals a value from pmf2.    Args:        pmf1: Pmf object        pmf2: Pmf object    Returns:        float probability    """    total = 0.0    for v1, p1 in pmf1.Items():        for v2, p2 in pmf2.Items():            if v1 == v2:                total += p1 * p2    return totaldef RandomSum(dists):    """Chooses a random value from each dist and returns the sum.    dists: sequence of Pmf or Cdf objects    returns: numerical sum    """    total = sum(dist.Random() for dist in dists)    return totaldef SampleSum(dists, n):    """Draws a sample of sums from a list of distributions.    dists: sequence of Pmf or Cdf objects    n: sample size    returns: new Pmf of sums    """    pmf = MakePmfFromList(RandomSum(dists) for i in xrange(n))    return pmfdef EvalGaussianPdf(x, mu, sigma):    """Computes the unnormalized PDF of the normal distribution.    x: value    mu: mean    sigma: standard deviation    returns: float probability density    """    return scipy.stats.norm.pdf(x, mu, sigma)def MakeGaussianPmf(mu, sigma, num_sigmas, n=201):    """Makes a PMF discrete approx to a Gaussian distribution.    mu: float mean    sigma: float standard deviation    num_sigmas: how many sigmas to extend in each direction    n: number of values in the Pmf    returns: normalized Pmf    """    pmf = Pmf()    low = mu - num_sigmas * sigma    high = mu + num_sigmas * sigma    for x in numpy.linspace(low, high, n):        p = EvalGaussianPdf(x, mu, sigma)        pmf.Set(x, p)    pmf.Normalize()    return pmfdef EvalBinomialPmf(k, n, p):    """Evaluates the binomial pmf.    Returns the probabily of k successes in n trials with probability p.    """    return scipy.stats.binom.pmf(k, n, p)def EvalPoissonPmf(k, lam):    """Computes the Poisson PMF.    k: number of events    lam: parameter lambda in events per unit time    returns: float probability    """    return scipy.stats.poisson.pmf(k, lam)def MakePoissonPmf(lam, high, step=1):    """Makes a PMF discrete approx to a Poisson distribution.    lam: parameter lambda in events per unit time    high: upper bound of the Pmf    returns: normalized Pmf    """    pmf = Pmf()    for k in xrange(0, high + 1, step):        p = EvalPoissonPmf(k, lam)        pmf.Set(k, p)    pmf.Normalize()    return pmfdef EvalExponentialPdf(x, lam):    """Computes the exponential PDF.    x: value    lam: parameter lambda in events per unit time    returns: float probability density    """    return lam * math.exp(-lam * x)def EvalExponentialCdf(x, lam):    """Evaluates CDF of the exponential distribution with parameter lam."""    return 1 - math.exp(-lam * x)def MakeExponentialPmf(lam, high, n=200):    """Makes a PMF discrete approx to an exponential distribution.    lam: parameter lambda in events per unit time    high: upper bound    n: number of values in the Pmf    returns: normalized Pmf    """    pmf = Pmf()    for x in numpy.linspace(0, high, n):        p = EvalExponentialPdf(x, lam)        pmf.Set(x, p)    pmf.Normalize()    return pmfdef StandardGaussianCdf(x):    """Evaluates the CDF of the standard Gaussian distribution.    See http://en.wikipedia.org/wiki/Normal_distribution    #Cumulative_distribution_function    Args:        x: float    Returns:        float    """    return (erf(x / ROOT2) + 1) / 2def GaussianCdf(x, mu=0, sigma=1):    """Evaluates the CDF of the gaussian distribution.    Args:        x: float        mu: mean parameter        sigma: standard deviation parameter    Returns:        float    """    return StandardGaussianCdf(float(x - mu) / sigma)def GaussianCdfInverse(p, mu=0, sigma=1):    """Evaluates the inverse CDF of the gaussian distribution.    See http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function      Args:        p: float        mu: mean parameter        sigma: standard deviation parameter    Returns:        float    """    x = ROOT2 * erfinv(2 * p - 1)    return mu + x * sigmaclass Beta(object):    """Represents a Beta distribution.    See http://en.wikipedia.org/wiki/Beta_distribution    """    def __init__(self, alpha=1, beta=1, name=''):        """Initializes a Beta distribution."""        self.alpha = alpha        self.beta = beta        self.name = name    def Update(self, data):        """Updates a Beta distribution.        data: pair of int (heads, tails)        """        heads, tails = data        self.alpha += heads        self.beta += tails    def Mean(self):        """Computes the mean of this distribution."""        return float(self.alpha) / (self.alpha + self.beta)    def Random(self):        """Generates a random variate from this distribution."""        return random.betavariate(self.alpha, self.beta)    def Sample(self, n):        """Generates a random sample from this distribution.        n: int sample size        """        size = n,        return numpy.random.beta(self.alpha, self.beta, size)    def EvalPdf(self, x):        """Evaluates the PDF at x."""        return x ** (self.alpha - 1) * (1 - x) ** (self.beta - 1)    def MakePmf(self, steps=101, name=''):        """Returns a Pmf of this distribution.        Note: Normally, we just evaluate the PDF at a sequence        of points and treat the probability density as a probability        mass.        But if alpha or beta is less than one, we have to be        more careful because the PDF goes to infinity at x=0        and x=1.  In that case we evaluate the CDF and compute        differences.        """        if self.alpha < 1 or self.beta < 1:            cdf = self.MakeCdf()            pmf = cdf.MakePmf()            return pmf        xs = [i / (steps - 1.0) for i in xrange(steps)]        probs = [self.EvalPdf(x) for x in xs]        pmf = MakePmfFromDict(dict(zip(xs, probs)), name)        return pmf    def MakeCdf(self, steps=101):        """Returns the CDF of this distribution."""        xs = [i / (steps - 1.0) for i in xrange(steps)]        ps = [scipy.special.betainc(self.alpha, self.beta, x) for x in xs]        cdf = Cdf(xs, ps)        return cdfclass Dirichlet(object):    """Represents a Dirichlet distribution.    See http://en.wikipedia.org/wiki/Dirichlet_distribution    """    def __init__(self, n, conc=1, name=''):        """Initializes a Dirichlet distribution.        n: number of dimensions        conc: concentration parameter (smaller yields more concentration)        name: string name        """        if n < 2:            raise ValueError('A Dirichlet distribution with '                             'n<2 makes no sense')        self.n = n        self.params = numpy.ones(n, dtype=numpy.float) * conc        self.name = name    def Update(self, data):        """Updates a Dirichlet distribution.        data: sequence of observations, in order corresponding to params        """        m = len(data)        self.params[:m] += data    def Random(self):        """Generates a random variate from this distribution.        Returns: normalized vector of fractions        """        p = numpy.random.gamma(self.params)        return p / p.sum()    def Likelihood(self, data):        """Computes the likelihood of the data.        Selects a random vector of probabilities from this distribution.        Returns: float probability        """        m = len(data)        if self.n < m:            return 0        x = data        p = self.Random()        q = p[:m] ** x        return q.prod()    def LogLikelihood(self, data):        """Computes the log likelihood of the data.        Selects a random vector of probabilities from this distribution.        Returns: float log probability        """        m = len(data)        if self.n < m:            return float('-inf')        x = self.Random()        y = numpy.log(x[:m]) * data        return y.sum()    def MarginalBeta(self, i):        """Computes the marginal distribution of the ith element.        See http://en.wikipedia.org/wiki/Dirichlet_distribution        #Marginal_distributions        i: int        Returns: Beta object        """        alpha0 = self.params.sum()        alpha = self.params[i]        return Beta(alpha, alpha0 - alpha)    def PredictivePmf(self, xs, name=''):        """Makes a predictive distribution.        xs: values to go into the Pmf        Returns: Pmf that maps from x to the mean prevalence of x        """        alpha0 = self.params.sum()        ps = self.params / alpha0        return MakePmfFromItems(zip(xs, ps), name=name)def BinomialCoef(n, k):    """Compute the binomial coefficient "n choose k".    n: number of trials    k: number of successes    Returns: float    """    return scipy.misc.comb(n, k)def LogBinomialCoef(n, k):    """Computes the log of the binomial coefficient.    http://math.stackexchange.com/questions/64716/    approximating-the-logarithm-of-the-binomial-coefficient    n: number of trials    k: number of successes    Returns: float    """    return n * log(n) - k * log(k) - (n - k) * log(n - k)

  2. 调用函数

  //Charter2.py  

#-*- coding: UTF-8 -*-from thinkbayes import Pmfpmf = Pmf()pmf.Set('Bow1',0.5)pmf.Set("Bow2",0.5)pmf.Mult('Bow1',0.75)pmf.Mult('Bow2',0.5)#归一化处理pmf.Normalize()#输出概率print pmf.Prob('Bow1')

  效果如下:

  

这里写图片描述
图(1) 使用贝叶斯公式,计算条件概率

你可能感兴趣的文章