1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
8 from analysetxt import AnalyseText
9 from guifunct import PrepSimi
10 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste, DoConf, exec_rcode, check_Rresult
11 from PrintRScript import PrintSimiScript
14 from operator import itemgetter
18 log = logging.getLogger('iramuteq.textsimi')
20 class SimiTxt(AnalyseText):
22 self.parametres['type'] = 'simitxt'
23 self.pathout.basefiles(simipath)
24 self.indices = indices_simi
28 self.actives = self.corpus.make_actives_limit(3)
29 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
32 self.listet = self.corpus.make_etoiles()
34 self.stars = copy(self.listet)
35 self.parametres['stars'] = copy(self.listet)
36 self.parametres['sfromchi'] = False
38 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
39 if prep.val == wx.ID_OK :
41 self.parametres = prep.parametres
42 self.dlg = progressbar(self.ira, 4)
46 order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)]
47 order_actives = sorted(order_actives, key=itemgetter(2), reverse = True)
48 with open(self.pathout['selected.csv'], 'w') as f :
49 f.write('\n'.join([`order_actives[val][0]` for val in self.parametres['selected']]))
54 script = PrintSimiScript(self)
56 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
59 if self.parametres['type_graph'] == 1:
60 if self.parametres['svg'] :
61 filename, ext = os.path.splitext(script.filename)
62 fileout = filename + '.svg'
64 fileout = script.filename
65 if os.path.exists(self.pathout['liste_graph']):
66 graph_simi = read_list_file(self.pathout['liste_graph'])
67 graph_simi.append([os.path.basename(fileout), script.txtgraph])
69 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
70 print_liste(self.pathout['liste_graph'], graph_simi)
74 def makesimiparam(self) :
75 self.paramsimi = {'coeff' : 0,
97 'cola' : (200,200,200),
102 'keep_coord' : False,
109 #'ira' : self.pathout['Analyse.ira']
111 self.parametres.update(self.paramsimi)
113 def makefiles(self, lim=3) :
114 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
115 self.parametres['eff_min_forme'] = lim
116 self.parametres['nbactives'] = len(self.actives)
117 self.parametres['fromprof'] = False
118 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
119 with open(self.pathout['actives.csv'], 'w') as f :
120 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
122 class SimiFromCluster(SimiTxt) :
123 def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) :
124 self.actives = actives
125 self.numcluster = numcluster
128 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
130 SimiTxt.__init__(self, ira, corpus, parametres, dlg=True, lemdial = False)
132 def preferences(self) :
133 return self.parametres
135 def doanalyse(self) :
136 self.parametres['type'] = 'clustersimitxt'
137 self.pathout.basefiles(simipath)
138 self.indices = indices_simi
141 if 'bystar' in self.parametres :
142 del self.parametres['bystar']
143 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
148 self.stars = []#copy(self.listet)
149 self.parametres['stars'] = 0#copy(self.listet)
150 self.parametres['sfromchi'] = 1
151 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
152 if prep.val == wx.ID_OK :
154 self.parametres = prep.parametres
158 self.dlg = progressbar(self.parent, 3)
160 self.parametres['type'] = 'clustersimitxt'
161 script = PrintSimiScript(self)
163 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
165 if self.parametres['type_graph'] == 1:
166 if self.parametres['svg'] :
167 filename, ext = os.path.splitext(script.filename)
168 fileout = filename + '.svg'
170 fileout = script.filename
171 if os.path.exists(self.pathout['liste_graph']):
172 graph_simi = read_list_file(self.pathout['liste_graph'])
173 graph_simi.append([os.path.basename(fileout), script.txtgraph])
175 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
176 print_liste(self.pathout['liste_graph'], graph_simi)
180 def makefiles(self) :
181 self.parametres['eff_min_forme'] = 3
182 self.parametres['nbactives'] = len(self.actives)
183 self.parametres['fromprof'] = True
184 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
185 with open(self.pathout['actives.csv'], 'w') as f :
186 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
187 with open(self.pathout['actives_nb.csv'], 'w') as f :
188 f.write('\n'.join([`val` for val in self.lfreq]))
189 with open(self.pathout['actives_chi.csv'], 'w') as f :
190 f.write('\n'.join([`val` for val in self.lchi]))