1、机器学习实验报告材料机器学习课实验报告(1) ID算法实现决策树2015 - 2016学年第2学期专业:智能科学与技术班级:智能1301班06133029学号:争辉一、 实验目的:理解ID3算法的基本原理,并且编程实现。二、 实验要求:使用C/C+/MATLAB实现ID3算法。输入:若干行,每行5个字符串,表示Outlook Temperature Humidity Wind Play ball 如上表。输出:决策树。实验结果如下:输入:SunnyHotHighWeak 1NoSunnyHotHighStro ngNoOvercastHotHighWeakYesRai nMildHighWea
2、k YesRai nCoolNormalWeakYesRai nCoolNormalStro ngNoOvercastCoolNormalStro ng YesSunnyMildHighWeakNoSunnyCoolNormalWeakYesRai nMildNormalWeakYesSunnyMildNormalStro ngYesOvercastMildHighStro ngYesOvercastHotNormalWeak YesRai nMildHighStro ngNo输出:OutlookRai nWindStro ngNoWeakYesOvercast YesSunny Humidi
3、tyNormalHighYesNo三、具体实现:实现算法如下:#i nclude #in elude #in elude #in elude using n amespace std;#define ROW 14#define COL 5#define log2 0.69314718055typedef struct TNodechar data15;char weight15;TNode * firstchild,* nextsibli ng; *tree;typedef struct LNodechar OutLook15;char Temperature15;char Humidity1
4、5;char Wind15;char PlayTennis5;LNode *n ext;*li nk;typedef struct AttrNodechar attributes15; 属性int attr_Num;属性的个数AttrNode *n ext;*Attributes;char * ExamplesROWCOL = /OverCast,Cool,High,Stro ng,No, / Rai n,Hot,Normal,Stro ng,Yes,Sunn y,Hot,High,Weak,No, Sunn y,Hot,High,Stro ng,No, OverCast,Hot,High,W
5、eak,Yes,Rain,Mild,High,Weak,Yes,Rain,Cool,Normal,Weak,Yes, Rain,Cool,Normal,Strong,No, OverCast,Cool,Normal,Strong,Yes, Sunny,Mild,High,Weak,No, Sunny,Cool,Normal,Weak,Yes, Rain,Mild,Normal,Weak,Yes, Sunny,Mild,Normal,Strong,Yes, OverCast,Mild,Normal,Strong,Yes, OverCast,Hot,Normal,Weak,Yes, Rain,Mi
6、ld,High,Strong,No ;char * Attributes_kind4 = OutLook,Temperature,Humidity,Wind; int Attr_kind4 = 3,3,2,2;char * OutLook_kind3 = Sunny,OverCast,Rain;char * Temperature_kind3 = Hot,Mild,Cool;char * Humidity_kind2 = High,Normal;char * Wind_kind2 = Weak,Strong;/*int i_Exampple145 = 0,0,0,0,1,0,0,0,1,1,1
7、,0,0,1,0,2,1,0,0,0,2,2,1,0,0,2,2,1,1,1,1,2,1,1,0,0,1,0,0,1,0,2,1,0,0,2,1,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,2,1,0,0,1;*/void treelists(tree T);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind);void InitLink(link &L,char * ExamplesCOL);void ID3(tree &T,link L,link Target_Attr
8、,Attributes attr);void PN_Num(link L,int &positve,int &negative);double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L);void main()link LL,p; Attributes attr_L,q;tree T;T = new TNode;T-firstchild = T-nextsibling = NULL; strcpy(T-weight,);strcpy(T-data,);attr_L = new AttrNod
9、e; attr_L-next = NULL;LL = new LNode;LL-next = NULL;/成功建立两个链表InitLink(LL,Examples);InitAttr(attr_L,Attributes_kind,Attr_kind);ID3(T,LL,NULL,attr_L);coutvv决策树以广义表形式输出如下:e ndl; treelists(T);/以广义表的形式输出树/ coutGain(9,5,OutLook,LL,attr_L)endl;coutendl;/以广义表的形式输出树void treelists(tree T)tree p;if(!T)return;c
10、outweight;coutdata;p = T-firstchild;if (p)coutnextsibling;if (p)cout,; cout);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind) Attributes p;for (int i =0;i next = NULL;strcpy(p-attributes,Attributes_kindi); p-attr_Num = Attr_kindi;p-next = attr_link-next; attr_link-next = p;v
11、oid InitLink(link &LL,char * ExamplesCOL)link p;for (int i = 0;i next = NULL;strcpy(p-OutLook,Examplesi0); strcpy(p-Temperature,Examplesi1); strcpy(p-Humidity,Examplesi2); strcpy(p-Wind,Examplesi3); strcpy(p-PlayTennis,Examplesi4);p-next = LL-next;LL-next = p;void PN_Num(link L,int &positve,int &neg
12、ative)positve = 0; negative = 0; link p;p = L-next;while (p)if (strcmp(p-PlayTennis,No) = 0) negative+;else if(strcmp(p-PlayTennis,Yes) = 0) positve+;p = p-next;/计算信息增益/link L: 样本集合 Sattr_L :属性集合double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L) int atrr_ki nds;每个属性中的值的
13、个数Attributes p = attr_L-next; link q = L-next;int attr_th = 0;/第几个属性while (p)if (strcmp(p-attributes,atrribute) = 0) atrr_kinds = p-attr_Num; break;p = p-next;attr_th+;double entropy,gain=0;double p1 = 1.0*positive/(positive + negative);double p2 = 1.0*negative/(positive + negative); entropy = -p1*l
14、og(p1)/log2 - p2*log(p2)/log2;/ 集合熵 gain = entropy;/获取每个属性值在训练样本中出现的个数/获取每个属性值所对应的正例和反例的个数/声明一个 3*atrr_kinds 的数组int * kinds= new int * 3;for (int j =0;j 3;j+)kindsj = new intatrr_kinds;/ 保存每个属性值在训练样本中出现的个数/初始化for (int j = 0;j 3;j+)for (int i =0;i atrr_kinds;i+)kindsji = 0;while (q)if (strcmp(OutLook
15、,atrribute) = 0)for (int i = 0;i OutLook,OutLook_kindi) = 0)kinds0i+;if(strcmp(q-PlayTennis,Yes) = 0)kinds1i+;elsekinds2i+;else if (strcmp(Temperature,atrribute) = 0)for (int i = 0;i Temperature,Temperature_kindi) = 0) kinds0i+;if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+;else kinds2i+;else if (strcmp(
16、Humidity,atrribute) = 0)for (int i = 0;i Humidity,Humidity_kindi) = 0) kinds0i+;if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+;/else kinds2i+;else if (strcmp(Wind,atrribute) = 0)for (int i = 0;i Wind,Wind_kindi) = 0) kinds0i+;if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+;else kinds2i+;q = q-next;/计算信息增益 doub
17、le * gain_kind = new doubleatrr_kinds;int positive_kind = 0,negative_kind = 0;for (int j = 0;j next;Link-next = NULL;while (p)q = p;p = p-next; free(q);void ID3(tree &T,link L,link Target_Attr,Attributes attr)Attributes p,max,attr_child,p1;link q,link_child,q1;tree r,tree_p;int positive =0,negative
18、=0;PN_Num(L,positive,negative);/初始化两个子集合attr_child = new AttrNode; attr_child-next = NULL;link_child = new LNode; link_child-next = NULL;if (positive = 0)/ 全是反例strcpy(T-data,No);return;else if( negative = 0)/全是正例strcpy(T-data,Yes);return;p = attr-next; /属性链表double gain,g = 0;/*/* 建立属性子集合与训练样本子集合有两个方
19、案: 一:在原来链表的基础上进行删除; 二:另外申请空间进行存储子集合; 采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间 的应用混乱*/*/if(p)while (p)gain = Gain(positive,negative,p-attributes,L,attr); coutattributes gain g)g = gain;max = p;寻找信息增益最大的属性 p = p-next;strcpy(T-data,max-attributes);/增加决策树的节点coutattributes = attributesendlnext;while (p)if (strcmp
20、(p-attributes,max-attributes) != 0)p1 = new AttrNode; strcpy(p1-attributes,p-attributes); p1-attr_Num = p-attr_Num; p1-next = NULL;p1-next = attr_child-next; attr_child-next = p1;p = p-next;/需要区分出是哪一种属性 /建立每一层的第一个节点if (strcmp(OutLook,max-attributes) = 0)r = new TNode;r-firstchild = r-nextsibling = N
21、ULL; strcpy(r-weight,OutLook_kind0); T-firstchild = r;获取与属性值相关的训练样例 Example(vi),建立一个新的训练样本链表 link_childq = L-next; while (q)if (strcmp(q-OutLook,OutLook_kind0) = 0)q1 = new LNode; strcpy(q1-OutLook,q-OutLook); strcpy(q1-Humidity,q-Humidity);strcpy(q1-Temperature,q-Temperature);strcpy(q1-Wind,q-Wind)
22、;strcpy(q1-PlayTennis,q-PlayTennis);q1-next = NULL;q1-next = link_child-next;link_child-next = q1;q = q-next;else if (strcmp(Temperature,max-attributes) = 0)r = new TNode;r-firstchild = r-nextsibling = NULL;strcpy(r-weight,Temperature_kind0);T-firstchild = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本 链表
23、 link_childq = L-next;while (q)if (strcmp(q-Temperature,Temperature_kind0) = 0)q1 = new LNode;strcpy(q1-OutLook,q-OutLook);strcpy(q1-Humidity,q-Humidity);strcpy(q1-Temperature,q-Temperature);strcpy(q1-Wind,q-Wind);strcpy(q1-PlayTennis,q-PlayTennis);q1-next = NULL;q1-next = link_child-next;link_child
24、-next = q1;q = q-next;else if (strcmp(Humidity,max-attributes) = 0)r = new TNode;r-firstchild = r-nextsibling = NULL;strcpy(r-weight,Humidity_kind0);T-firstchild = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本 链表 link_childq = L-next;while (q)if (strcmp(q-Humidity,Humidity_kind0) = 0)q1 = new LNode;strcpy(q1-OutLook,q-OutLook);strcpy(q1-Humidity,q-Humidity);strcpy(q1-Temperature,q-Temperature);strcpy(q1-Wind,q-Wind);strcpy(q1-PlayTennis,q-PlayTennis);q1-next = NULL;q1-next = link_child-next;link_child-next = q1;q = q-