font size...
[iramuteq] / Rscripts / chdfunct.R
1 #datadm<-read.table('/home/pierre/.hippasos/corpus_agir_CHDS_16/fileACTtemp.csv', header=TRUE,sep=';', quote='\"',row.names = 1, na.strings = 'NA')
2 library(cluster)
3 #dissmat<-daisy(dataact, metric = 'gower', stand = FALSE)
4 #chd<-diana(dissmat,diss=TRUE,)
5 #height<-chd$height
6 #sortheight<-sort(height,decreasing=TRUE)
7 FindBestCluster<-function (x,Max=15) {
8     i<-1
9     j<-1
10     ListClasseOk<-list()
11     while (i < Max) {
12         if (x[i]==1){
13             while (x[i]==1) {
14             i<-i+1
15             }
16             ListClasseOk[[j]]<-i
17             j<-j+1
18         }
19         if (x[i]==x[i+1]) {
20             i<-i+1
21         }
22         else {
23             ListClasseOk[[j]]<-i+1
24             i<-i+1
25             j<-j+1
26         }
27     }
28         unlist(ListClasseOk)
29 }
30 #BestCLusterNb<-FindBestCluster(sortheight)
31 #classes<-as.data.frame(cutree(as.hclust(chd), k=6))[,1]
32 #datadm<-cbind(datadm,classes)
33 #clusplot(datadm,classes,shade=TRUE,color=TRUE,labels=4)
34
35 BuildContTable<- function (x) {
36         afctable<-NULL
37         for (i in 1:(ncol(x)-1)) {
38             coltable<-table(x[,i],x$classes)
39             afctable<-rbind(afctable,coltable)
40         }
41         afctable
42 }
43
44 PrintProfile<- function(dataclasse,profileactlist,profileetlist,antiproact,antiproet,clusternb,profileout,antiproout,profilesuplist=NULL,antiprosup=NULL) {
45     prolist<-list()
46     profile<-matrix(,0,6)
47     antipro<-matrix(,0,6)
48         cltot<-as.data.frame(dataclasse[dataclasse[,ncol(dataclasse)]!=0,])
49         cltot<-as.data.frame(as.character(cltot[,ncol(cltot)]))
50         tot<-nrow(cltot)
51     classes<-as.data.frame(as.character(dataclasse[,ncol(dataclasse)]))
52     classes.s<-as.data.frame(summary(cltot[,1],maxsum=500))
53     profile<-rbind(profile,c('***','nb classes',clusternb,'***','',''))
54     antipro<-rbind(antipro,c('***','nb classes',clusternb,'***','',''))
55     for(i in 1:clusternb) {
56         profile<-rbind(profile,c('**','classe',i,'**','',''))
57         nbind<-classes.s[which(rownames(classes.s)==i),1]
58         pr<-round((nbind/tot)*100,digits=2)
59         profile<-rbind(profile,c('****',nbind,tot,pr,'****',''))
60                 if (length(profileactlist[[1]][[i]])!=0){
61                 profile<-rbind(profile,as.matrix(profileactlist[[1]][[i]]))
62                 }
63                 if (!is.null(profilesuplist)) {
64                         profile<-rbind(profile,c('*****','*','*','*','*','*'))
65                         if (length(profilesuplist[[1]][[i]])!=0) {
66                                 profile<-rbind(profile,as.matrix(profilesuplist[[1]][[i]]))
67                         }               
68                 }
69         if (!is.null(profileetlist)) {
70             profile<-rbind(profile,c('*','*','*','*','*','*'))
71                     if (length(profileetlist[[1]][[i]])!=0) {
72                     profile<-rbind(profile,as.matrix(profileetlist[[1]][[i]]))
73                     }
74         }
75         antipro<-rbind(antipro,c('**','classe',i,'**','',''))
76         antipro<-rbind(antipro,c('****',nbind,tot,pr,'****',''))
77                 if (length(antiproact[[1]][[i]])!=0) {
78                 antipro<-rbind(antipro,as.matrix(antiproact[[1]][[i]]))
79                 }
80                 if (!is.null(profilesuplist)) {
81                         antipro<-rbind(antipro,c('*****','*','*','*','*','*'))
82                         if (length(antiprosup[[1]][[i]])!=0) {
83                                 antipro<-rbind(antipro,as.matrix(antiprosup[[1]][[i]]))
84                         }
85                 }
86         if (!is.null(profileetlist)) {
87             antipro<-rbind(antipro,c('*','*','*','*','*','*'))
88             if (length(antiproet[[1]][[i]])!=0) {
89                             antipro<-rbind(antipro,as.matrix(antiproet[[1]][[i]]))
90                     }
91         }
92         }
93     write.csv2(profile,file=profileout,row.names=FALSE)
94     write.csv2(antipro,file=antiproout,row.names=FALSE)
95 }
96
97 AddCorrelationOk<-function(afc) {
98         rowcoord<-afc$rowcoord
99         colcoord<-afc$colcoord
100         factor <- ncol(rowcoord)
101         
102         hypo<-function(rowcoord,ligne) {
103                 somme<-0
104                 for (i in 1:factor) {
105                         somme<-somme+(rowcoord[ligne,i])^2
106                 }
107                 sqrt(somme)
108         }
109         cor<-function(d,hypo) {
110                 d/hypo
111         }
112         CompCrl<-function(rowcol) {
113                 out<-rowcol
114                 for (i in 1:factor) {
115                         for(ligne in 1:nrow(rowcol)) {      
116                                 out[ligne,i]<-cor(rowcol[ligne,i],hypo(rowcol,ligne))
117                         }
118                 }
119         out
120         }
121
122         get.corr <- function(rowcol) {
123                 sqrowcol <- rowcol^2
124                 sqrowcol <- sqrt(rowSums(sqrowcol))
125                 corr <- rowcol / sqrowcol
126                 corr
127         }
128         #afc$rowcrl<-CompCrl(rowcoord)
129         afc$rowcrl <- get.corr(rowcoord)
130         #afc$colcrl<-CompCrl(colcoord)
131         afc$colcrl<-get.corr(colcoord)
132         afc
133 }
134
135 AsLexico<- function(x) {
136         x<-as.matrix(x)
137         sumcol<-colSums(x)
138     sumrow<-rowSums(x)
139         tot<-sum(sumrow)
140         tablesqr<-x
141         tablep<-x
142         mod.names<-rownames(x)
143         #problem exemple aurelia
144         for (classe in 1:ncol(x)) {
145                 print(classe)
146                 for (ligne in 1:nrow(x)) {
147                         conttable<-matrix(0,2,2)
148                         conttable[1,1]<-as.numeric(x[ligne,classe])
149                         conttable[1,2]<-sumrow[ligne]-conttable[1,1]
150                         conttable[2,1]<-sumcol[classe]-conttable[1,1]
151                         conttable[2,2]<-tot-sumrow[ligne]-conttable[2,1]
152                         chiresult<-chisq.test(conttable,correct=TRUE)
153                         if (is.na(chiresult$p.value)) {
154                                 chiresult$p.value<-1
155                                 chiresult$statistic<-0
156                         }
157                         obsv<-chiresult$expected
158                         pval<-as.character(format(chiresult$p.value,scientific=TRUE))
159                         spval<-strsplit(pval,'e')
160                         if (is.na(spval)) {
161                                 print(spval)
162                         }
163                         if (conttable[1,1]>obsv[1,1]) {
164                                 tablep[ligne,classe]<-as.numeric(spval[[1]][2])*(-1)
165                                 tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
166                         }
167                         else if (conttable[1,1]<obsv[1,1]){
168                                 tablep[ligne,classe]<-as.numeric(spval[[1]][2])
169                                 tablesqr[ligne,classe]<--round(chiresult$statistic,digits=3)
170                         }
171                 }
172         }
173         output<-list()
174         eff_relatif<-(x/sumcol)*1000
175         output[[1]]<-tablep
176         output[[2]]<-tablesqr
177         output[[3]]<-eff_relatif
178         output
179 }               
180
181 MyChiSq<-function(x){
182         sr<-rowSums(x)
183     sc<-colSums(x)
184     n <- sum(x)
185         E <- outer(sr, sc, "*")/n
186         STAT<-sum((abs(x - E))^2/E)
187     PVAL <- pchisq(STAT, 1, lower.tail = FALSE)
188         chi<-list(statistic = STAT, expected = E, p.value = PVAL)
189     chi
190 }
191
192 AsLexico2<- function(mat, chip = FALSE) {
193         mat<-as.matrix(mat)
194         sumcol<-colSums(mat)
195         sumrow<-rowSums(mat)
196         tot<-sum(sumrow)
197         tablesqr<-mat
198         tablep<-mat
199     contcs <- mat
200     for (i in 1:nrow(contcs)) {
201         contcs[i,] <- sumcol
202     }
203     contrs <- mat
204     contrs[,1:ncol(contrs)] <- sumrow
205     conttot <- matrix(tot, nrow = nrow(mat), ncol = ncol(mat))
206     cont12 <- contrs - mat
207     cont21 <- contcs - mat
208     cont22 <- conttot - contrs - cont21
209         mod.names<-rownames(mat)
210     make_chi_lex <- function(x) {
211         tb<-matrix(0,2,2)
212         tb[1,1] <- mat[x]
213         tb[1,2] <- cont12[x]
214         tb[2,1] <- cont21[x]
215         tb[2,2] <- cont22[x]
216         chiresult<-MyChiSq(tb)
217         #chiresult$statistic
218         if (is.na(chiresult$p.value)) {
219                         chiresult$p.value<-1
220                         chiresult$statistic<-0
221                 }
222                 obsv<-chiresult$expected
223                 pval<-as.character(format(chiresult$p.value,scientific=TRUE))
224                 spval<-strsplit(pval,'e')
225                 if (is.na(spval)) {
226                         print(spval)
227                 }
228                 if (tb[1,1]>obsv[1,1]) {
229                         as.numeric(spval[[1]][2])*(-1)
230                 }
231                 else if (tb[1,1]<obsv[1,1]){
232                         as.numeric(spval[[1]][2])
233                 } else {
234             0
235         }
236     }
237     make_chi_p <- function(x) {
238         tb<-matrix(0,2,2)
239         tb[1,1] <- mat[x]
240         tb[1,2] <- cont12[x]
241         tb[2,1] <- cont21[x]
242         tb[2,2] <- cont22[x]
243         chiresult<-MyChiSq(tb)
244         #chiresult$statistic
245         if (is.na(chiresult$p.value)) {
246                         chiresult$p.value<-1
247                         chiresult$statistic<-0
248                 }
249                 obsv<-chiresult$expected
250                 if (tb[1,1]>obsv[1,1]) {
251                         chiresult$p.value
252                 }
253                 else if (tb[1,1]<obsv[1,1]){
254                         1
255                 } else {
256             1
257         }
258     }
259     make_chi <- function(x) {
260         tb<-matrix(0,2,2)
261         tb[1,1] <- mat[x]
262         tb[1,2] <- cont12[x]
263         tb[2,1] <- cont21[x]
264         tb[2,2] <- cont22[x]
265         chiresult<-MyChiSq(tb)
266         #chiresult$statistic
267         if (is.na(chiresult$p.value)) {
268                         chiresult$p.value<-1
269                         chiresult$statistic<-0
270                 }
271                 obsv<-chiresult$expected
272                 if (tb[1,1]>obsv[1,1]) {
273                         chiresult$statistic
274                 }
275                 else if (tb[1,1]<obsv[1,1]){
276                         0
277                 } else {
278             0
279         }
280     }
281
282     res <- matrix(sapply(1:length(mat), make_chi_lex), ncol = ncol(mat))
283     rownames(res)<-mod.names
284     colnames(res) <- colnames(mat)
285     eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2)
286     rownames(eff_relatif)<-mod.names
287     colnames(eff_relatif) <- colnames(mat)
288     if (chip) {
289         reschip <- matrix(sapply(1:length(mat), make_chi_p), ncol = ncol(mat))
290         rownames(reschip)<- mod.names
291         colnames(reschip) <- colnames(mat)
292         reschi <- matrix(sapply(1:length(mat), make_chi), ncol = ncol(mat))
293         rownames(reschip)<- mod.names
294         colnames(reschip) <- colnames(mat)
295     }
296     out <-list()
297     out[[1]]<-res
298     out[[3]]<-eff_relatif
299     if (chip) {
300         out[[2]] <- reschip
301         out[[4]] <- reschi
302     }
303     out
304 }
305
306 make.spec.hypergeo <- function(mat) {
307     library(textometry)
308     spec <- specificities(mat)
309         sumcol<-colSums(mat)
310     eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2)
311     colnames(eff_relatif) <- colnames(mat)
312     out <-list()
313     out[[1]]<-spec
314     out[[3]]<-eff_relatif
315     out
316 }
317
318 BuildProf01<-function(x,classes) {
319         #x : donnees en 0/1
320         #classes : classes de chaque lignes de x
321         dm<-cbind(x,cl=classes)
322         clnb=length(summary(as.data.frame(as.character(classes)),max=100))
323         mat<-matrix(0,ncol(x),clnb)
324         rownames(mat)<-colnames(x)
325         for (i in 1:clnb) {
326                 dtmp<-dm[which(dm$cl==i),]
327                 for (j in 1:(ncol(dtmp)-1)) {
328                         mat[j,i]<-sum(dtmp[,j])
329                 }
330         }
331         mat
332 }
333
334 build.prof.tgen <- function(x) {
335     nbst <- sum(x[nrow(x),])
336     totcl <- x[nrow(x),]
337     tottgen <- rowSums(x)
338     nbtgen <- nrow(x) - 1
339     chi2 <- x[1:(nrow(x)-1),]
340     pchi2 <- chi2
341     for (classe in 1:ncol(x)) {
342         for (tg in 1:nbtgen) {
343             cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe]))
344             cont <- matrix(unlist(cont), nrow=2)
345             chiresult<-chisq.test(cont,correct=FALSE)
346             if (is.na(chiresult$p.value)) {
347                 chiresult$p.value<-1
348                 chiresult$statistic<-0
349             }
350             if (chiresult$expected[1,1] > cont[1,1]) {
351                 chiresult$statistic <- chiresult$statistic * -1
352             }
353             chi2[tg,classe] <- chiresult$statistic
354             pchi2[tg,classe] <- chiresult$p.value
355         }
356     }
357     res <- list(chi2 = chi2, pchi2 = pchi2)
358 }
359
360
361 new.build.prof <- function(x,dataclasse,clusternb,lim=2) {
362         cl <- dataclasse[,ncol(dataclasse)]
363         nst <- length(which(cl != 0))
364         rs <- rowSums(x)
365         mod.names<-rownames(x)
366         lnbligne <- list()
367         lchi <- list()
368         prof <- list()
369         aprof <- list()
370         for (classe in 1:clusternb) {
371                 lnbligne[[classe]]<-length(which(cl==classe))
372                 tmpprof <- data.frame()
373                 tmpanti <- data.frame()
374                 obs1 <- x[,classe] #1,1
375                 obs2 <- rs - obs1 #1,2
376             obs3 <- lnbligne[[classe]] - obs1   #2,1
377                 obs4 <- nst - (obs1 + obs2 + obs3) #2,2
378                 exp1 <- ((obs1 + obs3) * (obs1 + obs2)) / nst
379                 exp2 <- ((obs2 + obs1) * (obs2 + obs4)) / nst
380                 exp3 <- ((obs3 + obs4) * (obs3 + obs1)) / nst
381                 exp4 <- ((obs4 + obs3) * (obs4 + obs2)) / nst
382                 chi1 <- ((obs1 - exp1)^2) / exp1
383                 chi2 <- ((obs2 - exp2)^2) / exp2
384                 chi3 <- ((obs3 - exp3)^2) / exp3
385                 chi4 <- ((obs4 - exp4)^2) / exp4
386                 chi <- chi1 + chi2 + chi3 + chi4        
387                 chi[which(is.na(chi)==T)] <- 0
388                 tochange <- ifelse(obs1 > exp1, 1, -1)
389                 lchi[[classe]] <- chi * tochange
390                 tokeep <- which(lchi[[classe]] > lim)
391                 if (length(tokeep)) {
392                         tmpprof[1:length(tokeep),1] <- obs1[tokeep]
393                         tmpprof[,2] <- rs[tokeep]
394                         tmpprof[,3] <- round((obs1/rs)*100, digits=2)[tokeep]
395                         tmpprof[,4] <- round(lchi[[classe]], digits=3)[tokeep]
396                         tmpprof[,5] <- mod.names[tokeep] 
397                         tmpprof[,6] <- pchisq(lchi[[classe]] ,1, lower.tail=F)[tokeep]
398                 }
399                 prof[[classe]] <- tmpprof
400                 toanti <- which(lchi[[classe]] < -lim)
401                 if (length(toanti)) {
402                         tmpanti[1:length(toanti),1] <- obs1[toanti]
403                         tmpanti[,2] <- rs[toanti]
404                         tmpanti[,3] <- round((obs1/rs)*100, digits=2)[toanti]
405                         tmpanti[,4] <- round(lchi[[classe]], digits=3)[toanti]
406                         tmpanti[,5] <- mod.names[toanti] 
407                         tmpanti[,6] <- pchisq(-lchi[[classe]] ,1, lower.tail=F)[toanti]
408                 }
409                 aprof[[classe]] <- tmpanti
410                 if (length(prof[[classe]])!=0) {
411                         prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),]
412                 }
413                 if (length(aprof[[classe]])!=0) {
414                         aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),]   
415                 }
416         }
417     tablechi <- do.call(cbind, lchi)
418         tablep <- pchisq(tablechi,1, lower.tail=F)
419         tablep <- round(tablep, digits=3)
420         tablechi <- round(tablechi, digits=3)
421         out <- list()
422         out[[1]] <- tablep
423         out[[2]] <- tablechi
424         out[[3]] <- cbind(x, rowSums(x))
425     out[[4]] <- prof    
426         out[[5]] <- aprof
427         out
428 }
429
430
431 BuildProf<- function(x,dataclasse,clusternb,lim=2) {
432         print('build prof')
433         ####
434         #r.names<-rownames(x)
435         #x<-as.matrix(x)
436         #rownames(x)<-r.names
437         ####
438         #nuce<-nrow(dataclasse)
439     sumcol<-paste(NULL,1:nrow(x))
440         colclasse<-dataclasse[,ncol(dataclasse)]
441         nuce <- length(which(colclasse != 0))
442 #       for (i in 1:nrow(x)) {
443 #               sumcol[i]<-sum(x[i,])
444 #       }
445 #       afctablesum<-cbind(x,sumcol)
446     afctablesum <- cbind(x, rowSums(x))
447     #dataclasse<-as.data.frame(dataclasse[dataclasse[,ncol(dataclasse)]!=0,])
448         dataclasse<-as.matrix(dataclasse[dataclasse[,ncol(dataclasse)]!=0,])
449         tablesqr<-x
450         tablep<-x
451         x<-afctablesum
452         listprofile<-list()
453         listantipro<-list()
454         mod.names<-rownames(x)
455         prof<-list()
456         aprof<-list()
457         lnbligne<-matrix()
458         for (classe in 1:clusternb) {
459                 lnbligne[classe]<-length(colclasse[colclasse==classe])
460                 prof[[classe]]<-data.frame()
461                 aprof[[classe]]<-data.frame()
462         }
463         
464         for (ligne in 1:nrow(x)) {
465                 for (classe in 1:clusternb) {
466                         nbligneclasse<-lnbligne[classe]
467                         conttable<-data.frame()
468                         conttable[1,1]<-as.numeric(x[ligne,classe])
469                         conttable[1,2]<-as.numeric(as.vector(x[ligne,ncol(x)]))-as.numeric(x[ligne,classe])
470                         conttable[2,1]<-nbligneclasse-as.numeric(x[ligne,classe])
471                         conttable[2,2]<-nrow(dataclasse)-as.numeric(as.vector(x[ligne,ncol(x)]))-conttable[2,1]
472                         chiresult<-chisq.test(conttable,correct=FALSE)
473                         if (is.na(chiresult$p.value)) {
474                                 chiresult$p.value<-1
475                                 chiresult$statistic<-0
476                                 china=TRUE
477                         }
478                         obsv<-chiresult$expected
479                         tablep[ligne,classe]<-round(chiresult$p.value,digits=3)
480                         #tablep[ligne,classe]<-format(chiresult$p.value, scientific=TRUE)
481             if (chiresult$statistic>=lim) {
482                                 if (conttable[1,1]>obsv[1,1]) {
483                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
484                                         prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
485                                         prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
486                                         prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
487                                         prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2)
488                                         prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne]
489                                         prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value
490                                 }
491                                 else if (conttable[1,1]<obsv[1,1]){
492                                         tablesqr[ligne,classe]<--round(chiresult$statistic,digits=3)
493                                         aprof[[classe]][nrow(aprof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
494                                         aprof[[classe]][nrow(aprof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
495                                         aprof[[classe]][nrow(aprof[[classe]]),3]<-round((as.numeric(x[ligne,classe])/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
496                                         aprof[[classe]][nrow(aprof[[classe]]),4]<--round(chiresult$statistic,digits=2)
497                                         aprof[[classe]][nrow(aprof[[classe]]),5]<-mod.names[ligne]
498                                         aprof[[classe]][nrow(aprof[[classe]]),6]<-chiresult$p.value
499                                 }
500                                 #pour gerer le cas avec une seule v et par exemple
501                                 else if (conttable[1,1]==obsv[1,1]) {
502                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
503                                         prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
504                                         prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
505                                         prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
506                                         prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2)
507                                         prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne]
508                                         prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value
509                                         aprof[[classe]][nrow(aprof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
510                                         aprof[[classe]][nrow(aprof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
511                                         aprof[[classe]][nrow(aprof[[classe]]),3]<-round((as.numeric(x[ligne,classe])/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
512                                         aprof[[classe]][nrow(aprof[[classe]]),4]<--round(chiresult$statistic,digits=2)
513                                         aprof[[classe]][nrow(aprof[[classe]]),5]<-mod.names[ligne]
514                                         aprof[[classe]][nrow(aprof[[classe]]),6]<-chiresult$p.value
515                                 }
516                         }else {
517                                 if (conttable[1,1]>obsv[1,1]) {
518                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
519                                 } else if (conttable[1,1]<obsv[1,1]){
520                                         tablesqr[ligne,classe]<--round(chiresult$statistic,digits=3)
521                                 }
522                                 #pour gerer le cas avec une seule v et par exemple
523                                 else if (conttable[1,1]==obsv[1,1]) {
524                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
525                                 }
526                         }
527                         #                               }
528                 }
529         }
530         for (classe in 1:clusternb) {
531                 if (length(prof[[classe]])!=0) {
532                         prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),]
533                 }
534                 if (length(aprof[[classe]])!=0) {
535                         aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),]   
536                 }
537         }
538         print('fini build prof')
539         output<-list()
540         output[[1]]<-tablep
541         output[[2]]<-tablesqr
542         output[[3]]<-afctablesum
543         output[[4]]<-prof
544         output[[5]]<-aprof
545         output
546 }
547
548
549 build.pond.prof <- function(mat, lim = 2) {
550         mat<-as.matrix(mat)
551         mod.names<-rownames(mat)
552         scol<-colSums(mat)
553         srow<-rowSums(mat)
554         tot<-sum(srow)
555         tablesqr<-mat
556         tablep<-mat
557         prof<-list()
558         aprof<-list()
559     clusternb <- ncol(mat)
560     x <- cbind(mat, rowSums(mat))
561         for (classe in 1:clusternb) {
562                 prof[[classe]]<-data.frame()
563                 aprof[[classe]]<-data.frame()
564         }
565
566     for (ligne in 1:nrow(mat)) {
567         for(classe in 1:ncol(mat)) {
568             tb <- matrix(0,2,2)
569             tb[1,1] <- mat[ligne,classe]
570             tb[1,2] <- srow[ligne] - tb[1,1]
571             tb[2,1] <- scol[classe] - tb[1,1]
572             tb[2,2] <- tot - srow[ligne] - tb[2,1]
573             chiresult <- MyChiSq(tb)
574             if (is.na(chiresult$p.value)) {
575                                 chiresult$p.value<-1
576                                 chiresult$statistic<-0
577                         }
578             tablep[ligne,classe]<-round(chiresult$p.value,digits=3)
579             conttable<-tb
580             obsv <- chiresult$expected
581             if (chiresult$statistic>=lim) {
582                                 if (conttable[1,1]>obsv[1,1]) {
583                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
584                                         prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
585                                         prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
586                                         prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
587                                         prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2)
588                                         prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne]
589                                         prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value
590                                 }
591                                 else if (conttable[1,1]<obsv[1,1]){
592                                         tablesqr[ligne,classe]<--round(chiresult$statistic,digits=3)
593                                         aprof[[classe]][nrow(aprof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
594                                         aprof[[classe]][nrow(aprof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
595                                         aprof[[classe]][nrow(aprof[[classe]]),3]<-round((as.numeric(x[ligne,classe])/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
596                                         aprof[[classe]][nrow(aprof[[classe]]),4]<--round(chiresult$statistic,digits=2)
597                                         aprof[[classe]][nrow(aprof[[classe]]),5]<-mod.names[ligne]
598                                         aprof[[classe]][nrow(aprof[[classe]]),6]<-chiresult$p.value
599                                 }
600                                 #pour gerer le cas avec une seule v et par exemple
601                                 else if (conttable[1,1]==obsv[1,1]) {
602                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
603                                         prof[[classe]][nrow(prof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
604                                         prof[[classe]][nrow(prof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
605                                         prof[[classe]][nrow(prof[[classe]]),3]<-round((as.numeric(as.vector(x[ligne,classe]))/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
606                                         prof[[classe]][nrow(prof[[classe]]),4]<-round(chiresult$statistic,digits=2)
607                                         prof[[classe]][nrow(prof[[classe]]),5]<-mod.names[ligne]
608                                         prof[[classe]][nrow(prof[[classe]]),6]<-chiresult$p.value
609                                         aprof[[classe]][nrow(aprof[[classe]])+1,1]<-as.numeric(x[ligne,classe])
610                                         aprof[[classe]][nrow(aprof[[classe]]),2]<-as.numeric(as.vector(x[ligne,ncol(x)]))
611                                         aprof[[classe]][nrow(aprof[[classe]]),3]<-round((as.numeric(x[ligne,classe])/as.numeric(as.vector(x[ligne,ncol(x)])))*100,digits=2)
612                                         aprof[[classe]][nrow(aprof[[classe]]),4]<--round(chiresult$statistic,digits=2)
613                                         aprof[[classe]][nrow(aprof[[classe]]),5]<-mod.names[ligne]
614                                         aprof[[classe]][nrow(aprof[[classe]]),6]<-chiresult$p.value
615                                 }
616                         }else {
617                                 if (conttable[1,1]>obsv[1,1]) {
618                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
619                                 } else if (conttable[1,1]<obsv[1,1]){
620                                         tablesqr[ligne,classe]<--round(chiresult$statistic,digits=3)
621                                 }
622                                 #pour gerer le cas avec une seule v et par exemple
623                                 else if (conttable[1,1]==obsv[1,1]) {
624                                         tablesqr[ligne,classe]<-round(chiresult$statistic,digits=3)
625                                 }
626                         }
627
628         }
629     }
630         for (classe in 1:clusternb) {
631                 if (length(prof[[classe]])!=0) {
632                         prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),]
633                 }
634                 if (length(aprof[[classe]])!=0) {
635                         aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),]   
636                 }
637         }
638         output<-list()
639         output[[1]]<-tablep
640         output[[2]]<-tablesqr
641         output[[3]]<-x
642         output[[4]]<-prof
643         output[[5]]<-aprof
644         output
645 }
646
647     
648