1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
| def parameterLearning(): data = pd.DataFrame(data={'fruit': ["banana", "apple", "banana", "apple", "banana", "apple", "banana", "apple", "apple", "apple", "banana", "banana", "apple", "banana", ], 'tasty': ["yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no"], 'size': ["large", "large", "large", "small", "large", "large", "large", "small", "large", "large", "large", "large", "small", "small"]})
model = BayesianNetwork([('fruit', 'tasty'), ('size', 'tasty')])
print("========================================================") pe = ParameterEstimator(model, data) print("\n", pe.state_counts('fruit')) print("\n", pe.state_counts('size')) print("\n", pe.state_counts('tasty')) print("========================================================") mle = MaximumLikelihoodEstimator(model, data) print(mle.estimate_cpd('fruit')) print(mle.estimate_cpd('tasty'))
print("========================================================") est = BayesianEstimator(model, data) print(est.estimate_cpd('tasty', prior_type='BDeu', equivalent_sample_size=10))
print("========================================================") model.fit(data, estimator=MaximumLikelihoodEstimator) print("========================================================")
def structuralLearning_Score(): """ score-based structure learning constraint-based structure learning The combination of both techniques allows further improvement: hybrid structure learning """ print("===================基于评分=================================") data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 2)), columns=list('XY')) data['Z'] = data['X'] + data['Y']
bdeu = BDeuScore(data, equivalent_sample_size=5) k2 = K2Score(data) bic = BicScore(data)
model1 = BayesianNetwork([('X', 'Z'), ('Y', 'Z')]) model2 = BayesianNetwork([('X', 'Z'), ('X', 'Y')]) print("==========基于评分===model1===============") print(bdeu.score(model1)) print(k2.score(model1)) print(bic.score(model1)) print("==========基于评分===model2===============") print(bdeu.score(model2)) print(k2.score(model2)) print(bic.score(model2)) print("==========基于评分===局部评分==============") print(bdeu.local_score('Z', parents=[])) print(bdeu.local_score('Z', parents=['X'])) print(bdeu.local_score('Z', parents=['X', 'Y'])) print("==========基于评分===穷举搜索算法==============") es = ExhaustiveSearch(data, scoring_method=bic) best_model = es.estimate() print(best_model.edges()) print("\n 遍历所有的分数:") for score, dag in reversed(es.all_scores()): print(score, dag.edges()) print("==========基于评分===爬山搜索算法==============") data = pd.DataFrame(np.random.randint(0, 3, size=(2500, 8)), columns=list('ABCDEFGH')) data['A'] += data['B'] + data['C'] data['H'] = data['G'] - data['A'] hc = HillClimbSearch(data, scoring_method=BicScore(data)) best_model = hc.estimate() print(best_model.edges())
def structuralLearning_Hybrid(): """ MMHC算法[3]结合了基于约束和基于分数的方法。它有两部分: 1. 使用基于约束的构造过程MMPC学习无向图骨架 2. 基于分数的优化(BDeu分数+修改爬山) """ print("===================混合方法=================================") data = pd.DataFrame(np.random.randint(0, 3, size=(2500, 8)), columns=list('ABCDEFGH')) data['A'] += data['B'] + data['C'] data['H'] = data['G'] - data['A'] data['E'] *= data['F'] mmhc = MmhcEstimator(data) skeleton = mmhc.mmpc() print("Part 1) Skeleton: ", skeleton.edges()) hc = HillClimbSearch(data, scoring_method=BDeuScore(data)) model = hc.estimate(tabu_length=10, white_list=skeleton.to_directed().edges()) print("Part 2) Model: ", model.edges()) print("===================两步划为一步=================================")
|