package example; import Tests.VIF_Tolerance; /** * @author Marios Michailidis 2013 * @Goal The purpose of this example class is to show you * how to derive Vif and Tolerance from a set of variables based on * the KazAClasses package.When it comes to modeling,on many * Occasions we do not want to have variables that are highly * correlated with each other (e.g. Higher Age tends to higher income) * because they add little value (or they bring no new information). Too much * Multicolinearity might cause specific algorithms to crash * as there are infinite combinations of values (e.g.in coefficients) that could * work and the algorithm cannot decide where to stop. Again we will use * three variables as a set for each person per observation: *
1)The Age, * 2),The Income and * 3) A flag that shows whether they are married or not. */ public class Multicolinearity_Example { /** * @param args This is the main method that runs the example */ public static void main(String[] args) { /* * In this example we will * Initialize the names of the predictors we are going to use, * Namely : Age, Income, if they are married. */ String [] Name_of_predictors = {"Age","Income","Marital Status"}; /* * Initialize a double Array for the predictors again * displaying Age, Income and a binary target that when it is 1=Are married * and when it is 0= they are not. */ double predictors_array [][]= {{23,15000,0}, {24,16000,0},{25,17000,0},{26,18000,0},{27,19000,1},{28,20000,0}, {29,21000,0},{30,22000,1},{31,23000.34,0},{32,24000,0},{33,25000,1}, {34,26000,1},{35,27000,1},{36,28000,0},{37,29000.78,0},{38,30000,1}, {39,31000,1},{40,32000,0},{41,33000,1},{42,34000,0},{43,35000,1}, {44,36000,0},{45,37000,1},{46,38000.17,0},{47,39000,1},{48,40000,0}, {49,41000,1},{50,42000,0},{51,43000,1},{52,44000,0},{53,45000,1}, {54,46000,1},{55,47000,1},{56,48000,0},{57,49000,1},{58,50000,1}, {59,51000,0},{60,52000,1},{61,53000,1},{62,54340,0},{63,30034.78,1}, {64,56000,1},{65,57000,0},{66,58000,0},{67,59000,1},{68,60000,0}, {69,61000,1},{70,62000,1},{71,63000,1},{72,64000,0},{73,65000,0}, {74,66000,1},{75,67000,1},{76,68000,0},{77,69000,1},{78,70000,0}, {79,71000,1},{80,72000,1},{81,73000,0},{82,74000,1},{83,75000,0}, {84,76000,1},{85,77000,0},{86,78000,0},{87,79000,0},{88,80000,1}, {89,81000,1},{90,82000,1},{91,83000,0},{92,84000,1},{93,48000,0}, {94,49000,1},{95,50000,1},{96,51000,0},{97,52000,1},{98,53000,0}, {99,54000,1},{100,55000,1},{101,55000,1},{23,27000,0},{24,28000,0}, {25,29000,1},{26,30000,0},{27,31000,0},{28,32000,1},{29,33000,1}, {30,34000,0},{31,35000,0},{32,36000,0},{33,37000,1},{34,38000,1}, {35,39000,0},{36,40000,1},{37,38000,0},{38,39000,1},{39,40000,1}, {40,38000,0},{41,39000,0},{42,40000,1},{43,41000,1},{44,42000,1}, {45,43000,0}, }; /* * Initiate the VIF_Tolerance class */ VIF_Tolerance vif_tol= new VIF_Tolerance(); /* * Initiate the VIF_Tolerance main method * that computes all the important statistics */ vif_tol.get_VIF_TOL(predictors_array); /* * Now we can get the Vif and Tolerance double arrays . * Generally, have in mind that According to Bowerman & O Connell 1990, * an average of VIF substantially higher than 1, it would have been a concern. * According to Myers 1990 a VIF value higher than 10 would have been an issue. * Also according to Menard (1995), tolerance below 0.2 would have been a problem. */ double tolerance []=vif_tol.get_TOL(); double Vif []=vif_tol.get_VIF(); /* * Print the results */ for (int i=0; i