From f3d845cb66abaa1e88928137d3ad4448c2997a58 Mon Sep 17 00:00:00 2001 From: Pierre Ratinaud Date: Thu, 24 Mar 2016 10:18:22 +0100 Subject: [PATCH] chi2 de McNemar --- iramuteq.py | 10 +- openanalyse.py | 2 +- tabchi2mcnemar.py | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 413 insertions(+), 2 deletions(-) create mode 100644 tabchi2mcnemar.py diff --git a/iramuteq.py b/iramuteq.py index ef99d20..acbff9e 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -1,7 +1,7 @@ #!/bin/env python # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2008-2012, Pierre Ratinaud +#Copyright (c) 2008-2016, Pierre Ratinaud #License: GNU GPL from optparse import OptionParser @@ -36,6 +36,7 @@ from tableau import Tableau from dialog import PrefDialog from tabfrequence import Frequences, FreqMultiple from tabchi2 import ChiSquare +from tabchi2mcnemar import McNemar #from tabstudent import MakeStudent from tabchddist import ChdCluster from tabafcm import DoAFCM @@ -68,6 +69,7 @@ ID_OpenText = wx.NewId() ID_OnOpenAnalyse = wx.NewId() ID_Freq = wx.NewId() ID_Chi2 = wx.NewId() +ID_Chi2mc = wx.NewId() ID_Student = wx.NewId() ID_CHDSIM = wx.NewId() ID_CHDReinert = wx.NewId() @@ -185,6 +187,7 @@ images_analyses = { 'freq' : 'frequences.png', 'freqmulti' : 'frequences.png', 'chi2' : 'chi2.png', + 'chi2mcnemar' : 'chi2.png', 'reinertmatrix' : 'reinertmatrix.png', 'simimatrix' : 'simimatrix.png', 'simiclustermatrix' : 'simimatrix.png', @@ -328,6 +331,7 @@ class IraFrame(wx.Frame): matanalyses = [[ID_Freq, _(u"Frequencies").decode('utf8'), 'freq'], [ID_FreqMulti, _(u"Multiple Frequencies").decode('utf8'), 'freqmulti'], [ID_Chi2, _(u"Chi2").decode('utf8'), 'chi2'], + [ID_Chi2mc, _(u"Chi2 McNemar").decode('utf8'), 'chi2mcnemar'], {'name' : _(u"Clustering").decode('utf8'), 'content' : [[ID_CHDReinert, _(u"Reinert's Method").decode('utf8'), 'reinertmatrix']]}, [ID_SIMI, _(u"Similarities Analysis").decode('utf8'), 'simimatrix'], @@ -582,6 +586,7 @@ class IraFrame(wx.Frame): self.Bind(wx.EVT_MENU, self.OnFreq, id=ID_Freq) self.Bind(wx.EVT_MENU, self.OnFreqMulti, id=ID_FreqMulti) self.Bind(wx.EVT_MENU, self.OnChi2, id=ID_Chi2) + self.Bind(wx.EVT_MENU, self.OnChi2McNemar, id=ID_Chi2mc) self.Bind(wx.EVT_MENU, self.OnStudent, id=ID_Student) self.Bind(wx.EVT_MENU, self.OnCHDSIM, id=ID_CHDSIM) self.Bind(wx.EVT_MENU, self.OnCHDReinert, id=ID_CHDReinert) @@ -969,6 +974,9 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis.""" def OnChi2(self, event, matrix = None): self.analyse_matrix(ChiSquare, matrix = matrix, analyse_type = 'chi2', dlgnb = 3) + def OnChi2McNemar(self, event, matrix = None): + self.analyse_matrix(McNemar, matrix = matrix, analyse_type = 'chi2mcnemar', dlgnb = 3) + def OnSimiTab(self, event, matrix = None): self.analyse_matrix(DoSimi, matrix = matrix, analyse_type = 'simimatrix', dlgnb = 5) diff --git a/openanalyse.py b/openanalyse.py index c8089d2..a80670e 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -170,6 +170,6 @@ class OpenAnalyse(): MatLayout(self.parent, corpus) elif self.conf['type'] == 'freq' or self.conf['type'] == 'freqmulti': FreqLayout(self.parent, corpus, self.conf) - elif self.conf['type'] == 'chi2' : + elif self.conf['type'] == 'chi2' or self.conf['type'] == 'chi2mcnemar': Chi2Layout(self.parent, corpus, self.conf) diff --git a/tabchi2mcnemar.py b/tabchi2mcnemar.py new file mode 100644 index 0000000..7270ac3 --- /dev/null +++ b/tabchi2mcnemar.py @@ -0,0 +1,403 @@ +#!/bin/env python +# -*- coding: utf-8 -*- +#Author: Pierre Ratinaud +#Copyright (c) 2016 Pierre Ratinaud +#License: GNU/GPL + +import HTML +import os +import string +import wx +import os +import sys +import tempfile +from chemins import ffr,FFF +import wx.lib.sized_controls as sc +from time import sleep +from functions import exec_rcode, check_Rresult +from dialog import ChiDialog, PrefChi +from analysematrix import AnalyseMatrix + +def make_res(line) : + if float(line[5]) <= 0.05 and line[6] != 'warning': + line.append('green') + elif float(line[5]) <= 0.05 and line[6] == 'warning': + line.append('blue') + else : + line.append('red') + return line + +def clean_line(result) : + return [[val for val in line if val != '**'] for line in result] + +def make_table(tabs, tab_title, res) : + return ['
'.join(['%s' % (res[i][-1], tab_title), HTML.table(tab)]) for i,tab in enumerate(tabs)] + +def make_restab(res) : + return ['
'.join(['%s'% (line[-1], u'Résultats'),HTML.table([['chi', line[3]],['p', line[5]]])]) for i,line in enumerate(res)] + +def make_htmlgraphs(graphs) : + return ['' % os.path.basename(val) for val in graphs] + +def make_link_list(res, text) : + return ['%s' % (i, i, chi[-1], text[i]) for i, chi in enumerate(res)] + +def make_title(res, text) : + return ['
%s
retour
' % (i, val[-1], text[i], i) for i, val in enumerate(res)] + + +chioption = { 'valobs' : True, + 'valtheo' : True, + 'resi' : False, + 'contrib' : True, + 'pourcent' : False, + 'pourcentl' : True, + 'pourcentc' : True, + 'graph' : True, + 'bw' : False, + } + +class McNemar(AnalyseMatrix): + def doparametres(self, dlg = None): + if dlg is None : + return + dial = ChiDialog(self.parent, -1, u"Chi2 McNemar", chioption, self.tableau, size=(400, 350), + style = wx.DEFAULT_DIALOG_STYLE + ) + dial.CenterOnParent() + val = dial.ShowModal() + if val==wx.ID_OK : + self.colsel1 = dial.list_box_1.GetSelections() + self.colsel2 = dial.list_box_2.GetSelections() + if dial.chiopt : + chioption['valobs'] = dial.dial.check1.GetValue() + chioption['valtheo'] = dial.dial.check2.GetValue() + chioption['resi'] = dial.dial.check3.GetValue() + chioption['contrib'] = dial.dial.check4.GetValue() + chioption['pourcent'] = dial.dial.check5.GetValue() + chioption['pourcentl'] = dial.dial.check6.GetValue() + chioption['pourcentc'] = dial.dial.check7.GetValue() + chioption['graph'] = dial.dial.check8.GetValue() + chioption['bw'] = dial.dial.checkbw.GetValue() + dial.dial.Destroy() + dial.Destroy() + self.parametres.update(chioption) + self.chioption = chioption + else : + if dial.chiopt : + dial.dial.Destroy() + dial.Destroy() + self.parametres = None + + def doanalyse(self): + self.count = 1 + keepGoing = self.dlg.Update(self.count,u"Analyse dans R...") + self.OutFrame=tempfile.mktemp(dir=self.parent.TEMPDIR) + self.encode=self.parent.encode + self.TEMPDIR=self.parent.TEMPDIR + self.RPath=self.parent.PathPath.get('PATHS','rpath') + self.TextCroise=[] + for i in self.colsel1 : + for j in self.colsel2 : + self.TextCroise.append(self.tableau.colnames[i] + ' / ' + self.tableau.colnames[j]) + rchioption = {} + for val in self.chioption : + if self.chioption[val]: + rchioption[val] = 'TRUE' + else : + rchioption[val] = 'FALSE' + txt=""" + source("%s") + """%ffr(self.parent.RscriptsPath['Rfunct']) + txt += """ + source("%s") + """ % ffr(self.parent.RscriptsPath['Rgraph']) + txt += """ + doobs <- %s + doexp <- %s + docontrib <- %s + doresi <- %s + dopr <- %s + doprl <- %s + doprc <- %s + dograph <- %s + bw <- %s + """ % (rchioption['valobs'], rchioption['valtheo'], rchioption['contrib'], rchioption['resi'], rchioption['pourcent'], rchioption['pourcentl'], rchioption['pourcentc'], rchioption['graph'], rchioption['bw']) + txt+=""" + datadm <- read.csv2("%s", encoding="%s", header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '') + listres<-list() + listcol<-list() + cont<-1 + """%(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding']) + if len(self.colsel1)==1: + strsel1=str(self.colsel1).replace(',','') + else: + strsel1=str(self.colsel1) + if len(self.colsel2)==1: + strsel2=str(self.colsel2).replace(',','') + else: + strsel2=str(self.colsel2) + txt+=""" + for (i in c%s) {""" % strsel1 + txt+=""" + for (j in c%s) {""" % strsel2 + txt+=""" + tab<-table(datadm[,i+1],datadm[,j+1]) + if (min(dim(tab)) != 1) { + chi <- mcnemar.test(tab) + #chi<-chisq.test(tab) + CS<-colSums(tab) + RS<-rowSums(tab) + GT<-sum(tab) + chi$observed <- tab + chi$expected <- tab + chi$contrib <- tab + chi$residuals <- tab + #chi$contrib<-(tab-chi$expected)/sqrt(chi$expected * ((1 - RS/GT) %%*%% t(1 - CS/GT))) + listres[[cont]]<-chi + listcol[[cont]]<-ncol(tab) + cont<-cont+1 + } else { + chi <- list(observed = tab, residuals = tab, contrib = tab, statistic = 0, p.value = 1, expected = tab, message = 'pas de calcul') + listres[[cont]] <- chi + listcol[[cont]]<-ncol(tab) + cont <- cont + 1 + } + } + } + maxcol<-max(unlist(listcol))+1 + if (maxcol<7) {maxcol<-7} + frameout<-matrix('*',1,maxcol) + count<-0 + for (chi in listres) { + if (min(chi$expected)<5) { + att<-"warning" + } else { + att<-"" + } + if ('message' %%in%% attributes(chi)$names) { + att <- "Ce chi2 n\'a pas été calculé" + nom_colresi<-colnames(chi$observed) + chi$prl <- chi$expected + chi$prc <- chi$expected + st <- sum(chi$observed) + } else { + nom_colresi<-colnames(chi$observed) + st <- sum(chi$observed) + sc <- colSums(chi$observed) + sr <- rowSums(chi$observed) + chi$prl <- round((chi$observed/sr)*100,2) + chi$prc <- t(round((t(chi$observed)/sc)*100,2)) + } + fileout<-paste('histo_',count,sep='') + fileout<-paste(fileout,'.png',sep='') + count<-count+1 + fileout<-file.path("%s",fileout) + if (max(nchar(colnames(chi$observed)))>15) { + leg <- 1:length(colnames(chi$observed)) + } else { + leg <- colnames(chi$observed) + } + if (dograph) { + width<-ncol(chi$observed)*100 + if (width < 350) {width <- 350} + open_file_graph(fileout,width = width, height = 300) + par(mar=c(0,0,0,0)) + layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,1)) + par(mar=c(2,2,1,0)) + par(cex=0.8) + if (!bw) colors <- rainbow(length(rownames(chi$observed))) + else colors <- gray.colors(length(rownames(chi$observed))) + barplot(chi$prl,names.arg = leg, beside=TRUE,border=NA, col=colors) + par(mar=c(0,0,0,0)) + par(cex=0.8) + plot(0, axes = FALSE, pch = '') + legend(x = 'center' , rownames(chi$observed), fill = colors) + dev.off() + } + chi$prl <- cbind(chi$prl, total = rowSums(chi$prl)) + chi$prc <- rbind(chi$prc, total = colSums(chi$prc)) + chi$observed<-rbind(chi$observed,total=colSums(chi$observed)) + chi$observed<-cbind(chi$observed,total=rowSums(chi$observed)) + chi$pr <- round((chi$observed/st)*100,2) + chi$expected<-rbind(chi$expected,total=colSums(chi$expected)) + chi$expected<-cbind(chi$expected,total=rowSums(chi$expected)) + chi$expected<-round(chi$expected,digits=2) + chi$residuals<-round(chi$residuals,digits=2) + chi$contrib<-round(chi$contrib, digits=2) + nom_col<-colnames(chi$observed) + + if (ncol(chi$observed)\n + \n + \n +

Test du Chi2

\n +
+
+ Légende :
+ p <= 0.05
+ p <= 0.05 mais il y a des valeurs théoriques < 5
+ p > 0.05 +


+ """%self.parent.SysEncoding + + pretxt = '
\n'.join(links)+'


\n' + txt = '


\n'.join(['

'.join([tab[i] for tab in allhtml]) for i,val in enumerate(res)]) + txt = header + pretxt + txt + '\n' + + fileout=os.path.join(self.parametres['pathout'],'resultats-chi2.html') + with open(fileout, 'w') as f : + f.write(txt) + ListFile.append(fileout) + return ListFile \ No newline at end of file -- 2.7.4