1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 #from ConfigParser import RawConfigParser
11 #from guifunct import getPage, getCorpus
12 from guifunct import PrepSimi
13 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
14 #from tableau import Tableau
15 #from tabsimi import DoSimi
16 from PrintRScript import PrintSimiScript
22 log = logging.getLogger('iramuteq.textsimi')
24 class SimiTxt(AnalyseText):
26 self.parametres['type'] = 'simitxt'
27 self.pathout.basefiles(simipath)
28 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 self.listet = self.corpus.make_etoiles()
38 self.stars = copy(self.listet)
39 self.parametres['stars'] = copy(self.listet)
40 self.parametres['sfromchi'] = False
42 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
43 if prep.val == wx.ID_OK :
45 self.parametres = prep.parametres
46 self.dlg = progressbar(self.ira, 4)
51 script = PrintSimiScript(self)
53 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
56 if self.parametres['type_graph'] == 1:
57 if self.parametres['svg'] :
58 filename, ext = os.path.splitext(script.filename)
59 fileout = filename + '.svg'
61 fileout = script.filename
62 if os.path.exists(self.pathout['liste_graph']):
63 graph_simi = read_list_file(self.pathout['liste_graph'])
64 graph_simi.append([os.path.basename(fileout), script.txtgraph])
66 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
67 print_liste(self.pathout['liste_graph'], graph_simi)
71 def makesimiparam(self) :
72 self.paramsimi = {'coeff' : 0,
94 'cola' : (200,200,200),
106 #'ira' : self.pathout['Analyse.ira']
108 self.parametres.update(self.paramsimi)
110 def makefiles(self, lim=3) :
111 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
112 self.parametres['eff_min_forme'] = lim
113 self.parametres['nbactives'] = len(self.actives)
114 self.parametres['fromprof'] = False
115 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
116 with open(self.pathout['actives.csv'], 'w') as f :
117 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
119 class SimiFromCluster(SimiTxt) :
120 def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) :
121 self.actives = actives
122 self.numcluster = numcluster
125 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
126 SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False)
128 def preferences(self) :
129 return self.parametres
131 def doanalyse(self) :
132 self.parametres['type'] = 'clustersimitxt'
133 self.pathout.basefiles(simipath)
134 self.indices = indices_simi
137 if 'bystar' in self.parametres :
138 del self.parametres['bystar']
139 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
142 #self.listet = self.corpus.make_etoiles()
144 self.stars = []#copy(self.listet)
145 self.parametres['stars'] = 0#copy(self.listet)
146 self.parametres['sfromchi'] = 1
147 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
148 if prep.val == wx.ID_OK :
150 self.parametres = prep.parametres
155 self.parametres['type'] = 'clustersimitxt'
156 script = PrintSimiScript(self)
158 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
160 if self.parametres['type_graph'] == 1:
161 if self.parametres['svg'] :
162 filename, ext = os.path.splitext(script.filename)
163 fileout = filename + '.svg'
165 fileout = script.filename
166 if os.path.exists(self.pathout['liste_graph']):
167 graph_simi = read_list_file(self.pathout['liste_graph'])
168 graph_simi.append([os.path.basename(fileout), script.txtgraph])
170 graph_simi = [[os.path.basename(fileout), script.txtgraph]]
171 print_liste(self.pathout['liste_graph'], graph_simi)
175 def makefiles(self) :
176 self.parametres['eff_min_forme'] = 3
177 self.parametres['nbactives'] = len(self.actives)
178 self.parametres['fromprof'] = True
179 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
180 with open(self.pathout['actives.csv'], 'w') as f :
181 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
182 with open(self.pathout['actives_nb.csv'], 'w') as f :
183 f.write('\n'.join([`val` for val in self.lfreq]))
184 with open(self.pathout['actives_chi.csv'], 'w') as f :
185 f.write('\n'.join([`val` for val in self.lchi]))