1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 #from ConfigParser import RawConfigParser
11 #from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 #from tableau import Tableau
16 #from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 log = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
54 if self.parametres['type_graph'] == 1:
55 if self.parametres['svg'] :
56 filename, ext = os.path.splitext(script.filename)
57 fileout = filename + '.svg'
59 fileout = script.filename
60 if os.path.exists(self.pathout['liste_graph']):
61 graph_simi = read_list_file(self.pathout['liste_graph'])
62 graph_simi.append([os.path.basename(fileout), script.txtgraph])
64 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
65 print_liste(self.pathout['liste_graph'], graph_simi)
69 # def preferences(self) :
70 # dial = StatDialog(self, self.parent)
71 # dial.CenterOnParent()
72 # val = dial.ShowModal()
74 # if dial.radio_lem.GetSelection() == 0 :
78 # self.parametres['lem'] = lem
80 # return self.parametres
85 def makesimiparam(self) :
86 self.paramsimi = {'coeff' : 0,
101 'cexfromchi' : False,
108 'cola' : (200,200,200),
120 #'ira' : self.pathout['Analyse.ira']
122 self.parametres.update(self.paramsimi)
124 def makefiles(self, lim=3) :
125 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
126 self.parametres['eff_min_forme'] = lim
127 self.parametres['nbactives'] = len(self.actives)
128 self.parametres['fromprof'] = False
129 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
130 with open(self.pathout['actives.csv'], 'w') as f :
131 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
133 class SimiFromCluster(SimiTxt) :
134 def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) :
135 self.actives = actives
136 self.numcluster = numcluster
139 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
140 SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False)
142 def preferences(self) :
143 return self.parametres
145 def doanalyse(self) :
146 self.parametres['type'] = 'clustersimitxt'
147 self.pathout.basefiles(simipath)
148 self.indices = indices_simi
150 if 'bystar' in self.parametres :
151 del self.parametres['bystar']
152 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
155 #self.listet = self.corpus.make_etoiles()
157 self.stars = []#copy(self.listet)
158 self.parametres['stars'] = False#copy(self.listet)
159 self.parametres['sfromchi'] = True
160 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
161 if prep.val == wx.ID_OK :
163 self.parametres = prep.parametres
168 script = PrintSimiScript(self)
170 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
172 if self.parametres['type_graph'] == 1:
173 if self.parametres['svg'] :
174 filename, ext = os.path.splitext(script.filename)
175 fileout = filename + '.svg'
177 fileout = script.filename
178 if os.path.exists(self.pathout['liste_graph']):
179 graph_simi = read_list_file(self.pathout['liste_graph'])
180 graph_simi.append([os.path.basename(fileout), script.txtgraph])
182 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
183 print_liste(self.pathout['liste_graph'], graph_simi)
187 def makefiles(self) :
188 self.parametres['eff_min_forme'] = 3
189 self.parametres['nbactives'] = len(self.actives)
190 self.parametres['fromprof'] = True
191 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
192 with open(self.pathout['actives.csv'], 'w') as f :
193 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
194 with open(self.pathout['actives_nb.csv'], 'w') as f :
195 f.write('\n'.join([`val` for val in self.lfreq]))
196 with open(self.pathout['actives_chi.csv'], 'w') as f :
197 f.write('\n'.join([`val` for val in self.lchi]))