1 # -*- coding: utf-8 -*-
2 # Author: Pierre Ratinaud
4 # copyright : 2014 (c) Pierre Ratinaud
8 from analysetxt import AnalyseText
9 from OptionAlceste import OptionAlc
10 from PrintRScript import RchdTxt, ReinertTxtProf, TgenProfScript, ReDoProfScript
11 from layout import PrintRapport
12 from chemins import ChdTxtPathOut, PathOut
13 from functions import DoConf, print_liste, TGen
16 class Reinert(AnalyseText) :
18 self.parametres['type'] = 'alceste'
19 self.pathout.basefiles(ChdTxtPathOut)
20 self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1)
21 self.parametres['eff_min_forme'] = lim
22 self.parametres['nbactives'] = len(self.actives)
24 if self.parametres['classif_mode'] == 0 :
25 lenuc1, lenuc2 = self.corpus.make_and_write_sparse_matrix_from_uc(self.actives, self.parametres['tailleuc1'], self.parametres['tailleuc2'], self.pathout['TableUc1'], self.pathout['TableUc2'], self.pathout['listeuce1'], self.pathout['listeuce2'])
26 self.parametres['lenuc1'] = lenuc1
27 self.parametres['lenuc2'] = lenuc2
28 elif self.parametres['classif_mode'] == 1 :
29 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
30 elif self.parametres['classif_mode'] == 2 :
31 self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
33 Rscript = self.printRscript()
34 result = self.doR(Rscript, dlg=self.dlg, message='CHD...')
37 self.corpus.make_ucecl_from_R(self.pathout['uce'])
38 self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'], uci = uci)
39 self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2)
40 self.corpus.make_and_write_profile(self.sup, self.corpus.lc, self.pathout['ContSupOut'], uci = uci)
41 self.corpus.make_and_write_profile_et(self.corpus.lc, self.pathout['ContEtOut'], uci = uci)
42 self.clnb = len(self.corpus.lc)
43 self.parametres['clnb'] = self.clnb
44 Rscript = self.printRscript2()
45 self.doR(Rscript, dlg=self.dlg, message='profils et A.F.C. ...')
46 self.time = time() - self.t1
47 minutes, seconds = divmod(self.time, 60)
48 hours, minutes = divmod(minutes, 60)
49 self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds)
50 self.print_graph_files()
52 def preferences(self) :
53 print 'parametres en entree config alceste', self.parametres
54 parametres = DoConf(self.parent.ConfigPath['reinert']).getoptions('ALCESTE')
55 print 'parametres apres doconf', parametres
56 parametres['corpus'] = self.corpus
57 parametres['pathout'] = self.pathout
58 parametres['lem'] = self.parametres['lem']
59 self.dial = OptionAlc(self.parent, parametres)
60 self.dial.CenterOnParent()
61 self.dialok = self.dial.ShowModal()
62 if self.dialok == 5100 :
63 parametres['classif_mode'] = self.dial.radio_box_2.GetSelection()
64 parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue()
65 parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue()
66 parametres['mincl'] = self.dial.spin_ctrl_4.GetValue()
67 parametres['minforme'] = self.dial.spin_ctrl_5.GetValue()
68 parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue()
69 parametres['max_actives'] = self.dial.spin_max_actives.GetValue()
70 parametres['corpus'] = ''
71 parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()]
72 parametres['pathout'] = self.pathout.dirout
73 parametres['mode.patate'] = self.dial.check_patate.GetValue()
74 DoConf(self.parent.ConfigPath['reinert']).makeoptions(['ALCESTE'], [parametres])
77 self.parametres.update(parametres)
78 return self.parametres
83 def printRscript(self) :
84 RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt=self.parametres['nbcl_p1'] - 1, svdmethod=self.parametres['svdmethod'], libsvdc=self.parent.pref.getboolean('iramuteq', 'libsvdc'), libsvdc_path=self.parent.pref.get('iramuteq', 'libsvdc_path'), R_max_mem=False, mode_patate=self.parametres['mode.patate'])
85 return self.pathout['Rchdtxt']
87 def printRscript2(self) :
88 ReinertTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9)
89 return self.pathout['RTxtProfGraph']
91 def print_graph_files(self) :
92 mess_afc = u"La position des points n'est peut être pas exacte"
93 afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
94 [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
95 [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc],
96 [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']]
97 chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']]
98 if self.parametres['classif_mode'] == 0 :
99 chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2'])
100 chd_graph_list.append([os.path.basename(self.pathout['arbre1']), u'chd1'])
101 if self.parametres['classif_mode'] == 0 :
102 chd_graph_list.append([os.path.basename(self.pathout['arbre2']), u'chd2'])
103 print_liste(self.pathout['liste_graph_afc'], afc_graph_list)
104 print_liste(self.pathout['liste_graph_chd'], chd_graph_list)
105 PrintRapport(self, self.corpus, self.parametres)
107 class TgenProf(AnalyseText):
108 def __init__(self, ira, corpus, parametres, cluster_size):
111 self.parametres = parametres
112 self.pathout = PathOut(dirout = self.parametres['pathout'])
113 self.cluster_size = [len(classe) for classe in corpus.lc]
117 self.tgen = TGen(path = self.parametres['tgenpath'], encoding = self.ira.syscoding)
118 self.tgen.read(self.tgen.path)
119 #self.parametres['etoiles'].sort()
120 self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv')
121 tgenst = self.corpus.make_tgen_profile(self.tgen.tgen, self.corpus.lc)
122 clnames = ['cluster_%03d' % i for i in range(1, len(self.cluster_size) + 1)]
123 et = dict(zip(clnames, self.cluster_size))
124 tgenst = dict([[line[0], dict(zip(clnames, line[1:]))] for line in tgenst])
125 self.tgen.writetable(self.parametres['tgeneff'], tgenst, et)
126 self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenchi2.csv')
127 self.parametres['tgenlemeff'] = os.path.join(self.parametres['pathout'], 'tgenlemeff.csv')
128 self.parametres['tgenlemspec'] = os.path.join(self.parametres['pathout'], 'tgenlemchi2.csv')
129 tgenlemeff = dict([[lem, dict(zip(clnames, self.corpus.tgenlem[lem]))] for lem in self.corpus.tgenlem])
130 self.tgen.writetable(self.parametres['tgenlemeff'], tgenlemeff, et)
131 self.Rscript = TgenProfScript(self)
132 self.Rscript.make_script()
134 self.doR(self.Rscript.scriptout, dlg = False, message = 'R...')
136 class ReDoProfile(AnalyseText):
137 def __init__(self, ira, corpus, analyses, parametres):
140 self.parametres = parametres
141 self.analyse = analyse
142 self.pathout = PathOut(dirout = self.parametres['pathout'])
143 #self.cluster_size = [len(classe) for classe in corpus.lc]
147 script = ReDoProfScript(self)