Wednesday, 15 June 2016

PRISM Algorithm in R


Hello everyone,

Alas! After a long time I am writing a post:

This post is about the PRISM Algorithm which is used for generation of rules. PRISM Algorithm is available in Weka, but I wasn't able to find an R code for it, so I just wrote one.

PRISM algorithm dates back to 1987. This is the original paper on PRISM algorithm. PRISM is explained very well here.

The R code for PRISM Algorithm:


## Load the Contact-lenses data from Rweka
prism_data<- data.frame(matrix(as.character(sapply(c("young","myope","no","reduced","none
","young","myope","no","normal","soft","young","myope","yes","reduced","none
                                        ","young","myope","yes","normal","hard
                                        ","young","hypermetrope","no","reduced","none
                                        ","young","hypermetrope","no","normal","soft
                                        ","young","hypermetrope","yes","reduced","none
                                        ","young","hypermetrope","yes","normal","hard
                                        ","pre-presbyopic","myope","no","reduced","none
                                        ","pre-presbyopic","myope","no","normal","soft
                                        ","pre-presbyopic","myope","yes","reduced","none
                                        ","pre-presbyopic","myope","yes","normal","hard
                                        ","pre-presbyopic","hypermetrope","no","reduced","none
                                        ","pre-presbyopic","hypermetrope","no","normal","soft
                                        ","pre-presbyopic","hypermetrope","yes","reduced","none
                                        ","pre-presbyopic","hypermetrope","yes","normal","none
                                        ","presbyopic","myope","no","reduced","none
                                        ","presbyopic","myope","no","normal","none
                                        ","presbyopic","myope","yes","reduced","none
                                        ","presbyopic","myope","yes","normal","hard
                                        ","presbyopic","hypermetrope","no","reduced","none
                                        ","presbyopic","hypermetrope","no","normal","soft
                                        ","presbyopic","hypermetrope","yes","reduced","none
                                        ","presbyopic","hypermetrope","yes","normal","none"),function(x)gsub('\\s+|\\s+$', '',x)))
            ,ncol = 5,nrow=24,dimnames = list(NULL,list("age","spectacle-prescrip","astigmatism","tear-prod-rate","contact-lenses"
)),byrow = T))


Ajit_Prism<- function(inpdata,Yname)
{
  
  # Give the required exception conditions:
  
  if(sum(is.na(inpdata))>0){
    return(cat("The data contains missing values. Input the data without NA values.","\n"))
  }

  if(sum(!sapply(inpdata,class)%in%"factor")>0){
    return(cat("The data contains variables which dont have data type as factor","\n",
               "Input the data with factor variables only.","\n"))
  }

  # Initiliaze the data
    
  row.names(inpdata)<- 1:nrow(inpdata)
  Ydata<- inpdata[,Yname]
  Xdata<- inpdata[,-which(colnames(inpdata)%in%Yname)]
  Yclass<- as.character(unique(Ydata))
  rule_list<- NULL
  rule_count<- 0

  # Run the loops for each class of the Y variable until the PRISM conditions are not exhausted
  
  for(i in Yclass){

    meta_inpdata<- inpdata
    
    while(sum(meta_inpdata[,Yname]==i)>0){
    
    cat("Obtaining rule for ",Yname,"=",i,"......","\n",sep = "")
    
    data_sub<- meta_inpdata
    meta_rules<- data.frame(matrix(nrow = 0,ncol = 3))
    colnames(meta_rules)<- c("variable","level","confidence")
    var_max<- NULL
    coverage<- 0
    
    while(coverage!=1){
    pt<- NULL
    pt_df<- data.frame()
    for(j in colnames(data_sub[,-(which(names(data_sub)%in%c(Yname,var_max)))])){
      
      for(k in unique(data_sub[,j])){
        metaData<- data_sub[data_sub[,j]==k,]
        pt<- length(metaData[metaData[,Yname]==i,Yname])/nrow(metaData)
        pt_df<- rbind(pt_df,cbind(j,k,pt,nrow(metaData)))
      }
    }
    pt_df$pt<- as.numeric(as.character(pt_df$pt))
    pt_df$V4<- as.numeric(as.character(pt_df$V4))
    
    var_max_meta<-if(max(pt_df$pt)==1){
      var_max_meta<- as.character(pt_df[which.max(pt_df$V4),1])
    }else {as.character(pt_df[which.max(pt_df$pt),1])}

    var_max<- c(var_max,var_max_meta)
    
    level_max<- if(max(pt_df$pt)==1)
      {as.character(pt_df[which.max(pt_df$V4),2])
    }else {as.character(pt_df[which.max(pt_df$pt),2])}
    
    confidence_val<-  if(max(pt_df$pt)==1)
    {as.character(pt_df[which.max(pt_df$V4),3])
    }else {as.character(pt_df[which.max(pt_df$pt),3])}
    
    meta_df<- cbind(var_max_meta,level_max,confidence_val)
    colnames(meta_df)<- c("variable","level","confidence")
    meta_rules<- rbind(meta_rules,meta_df)
    colnames(meta_rules)<- c("variable","level","confidence")
    coverage<- max(pt_df$pt)
    data_sub<- data_sub[data_sub[,var_max_meta]==level_max,]
    
    
    }
    rule_count<- rule_count + 1
    rule_list[[paste("Rules No",rule_count,":",Yname,"=",i,sep = " ")]]<- meta_rules
    mdata<- meta_inpdata
    for(m in 1:nrow(meta_rules)){
    mdata<- mdata[mdata[,as.character(meta_rules$variable[m])]==as.character(meta_rules$level[m]),]
    }
    meta_inpdata<- meta_inpdata[-which(as.numeric(row.names(meta_inpdata))%in%as.numeric(row.names(mdata))),]
    
  }

  }
  
  cat("\n","The Final Set of Rules:","\n")
  
  return(rule_list)
  
  
}


# Run the function:
Ajit_Prism(inpdata = prism_data,Yname = "contact.lenses")

No comments:

Post a Comment