1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2020 Pierre Ratinaud
4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
7 #------------------------------------
8 # import des modules python
9 #------------------------------------
12 from operator import itemgetter
16 #------------------------------------
17 # import des modules wx
18 #------------------------------------
21 #------------------------------------
22 # import des fichiers du projet
23 #------------------------------------
24 from chemins import ffr, simipath
25 from analysetxt import AnalyseText
26 from guifunct import PrepSimi
27 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste, DoConf, exec_rcode, check_Rresult
28 from PrintRScript import PrintSimiScript
31 log = logging.getLogger('iramuteq.textsimi')
34 class SimiTxt(AnalyseText):
37 self.parametres['type'] = 'simitxt'
38 self.pathout.basefiles(simipath)
39 self.indices = indices_simi
40 if self.dlg : # quel est le lien ???
43 self.actives = self.corpus.make_actives_limit(3)
44 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
47 self.listet = self.corpus.make_etoiles()
49 self.stars = copy(self.listet)
50 self.parametres['stars'] = copy(self.listet)
51 self.parametres['sfromchi'] = False
52 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
53 if prep.val == wx.ID_OK :
55 self.parametres = prep.parametres
56 # self.dlg = progressbar(self.ira, 4)
60 order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)]
61 order_actives = sorted(order_actives, key=itemgetter(2), reverse = True)
62 with open(self.pathout['selected.csv'], 'w') as f :
63 f.write('\n'.join([repr(order_actives[val][0]) for val in self.parametres['selected']]))
67 script = PrintSimiScript(self)
69 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
72 if self.parametres['type_graph'] == 1:
73 if self.parametres['svg'] :
74 filename, ext = os.path.splitext(script.filename)
75 fileout = filename + '.svg'
77 fileout = script.filename
78 if os.path.exists(self.pathout['liste_graph']):
79 graph_simi = read_list_file(self.pathout['liste_graph'])
80 graph_simi.append([os.path.basename(fileout), script.txtgraph])
82 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
83 print_liste(self.pathout['liste_graph'], graph_simi)
87 def makesimiparam(self) :
88 self.paramsimi = {'coeff' : 0,
103 'cexfromchi' : False,
110 'cola' : (200,200,200),
115 'keep_coord' : False,
122 #'ira' : self.pathout['Analyse.ira']
124 self.parametres.update(self.paramsimi)
126 def makefiles(self, lim=3) :
127 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
128 self.parametres['eff_min_forme'] = lim
129 self.parametres['nbactives'] = len(self.actives)
130 self.parametres['fromprof'] = False
131 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
132 with open(self.pathout['actives.csv'], 'w') as f :
133 f.write('\n'.join(self.actives))
136 class SimiFromCluster(SimiTxt) :
138 def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) :
139 self.actives = actives
140 self.numcluster = numcluster
143 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
145 SimiTxt.__init__(self, ira, corpus, parametres, dlg=True, lemdial = False)
147 def preferences(self) :
148 return self.parametres
150 def doanalyse(self) :
151 self.parametres['type'] = 'clustersimitxt'
152 self.pathout.basefiles(simipath)
153 self.indices = indices_simi
156 if 'bystar' in self.parametres :
157 del self.parametres['bystar']
158 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
163 self.parametres['stars'] = 0
164 self.parametres['sfromchi'] = 1
165 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
166 if prep.val == wx.ID_OK :
168 self.parametres = prep.parametres
172 self.dlg = progressbar(self.parent, 3)
174 self.parametres['type'] = 'clustersimitxt'
175 script = PrintSimiScript(self)
177 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
179 if self.parametres['type_graph'] == 1:
180 if self.parametres['svg'] :
181 filename, ext = os.path.splitext(script.filename)
182 fileout = filename + '.svg'
184 fileout = script.filename
185 if os.path.exists(self.pathout['liste_graph']):
186 graph_simi = read_list_file(self.pathout['liste_graph'])
187 graph_simi.append([os.path.basename(fileout), script.txtgraph])
189 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
190 print_liste(self.pathout['liste_graph'], graph_simi)
195 def makefiles(self) :
196 self.parametres['eff_min_forme'] = 3
197 self.parametres['nbactives'] = len(self.actives)
198 self.parametres['fromprof'] = True
199 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
200 with open(self.pathout['actives.csv'], 'w') as f :
201 f.write('\n'.join(self.actives))
202 with open(self.pathout['actives_nb.csv'], 'w') as f :
203 f.write('\n'.join([repr(val) for val in self.lfreq]))
204 with open(self.pathout['actives_chi.csv'], 'w') as f :
205 f.write('\n'.join([repr(val) for val in self.lchi]))