2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath, PathOut
15 from functions import ReadLexique, DoConf, History, ReadDicoAsDico
16 from ConfigParser import *
17 #######################################
18 #from textchdalc import AnalyseAlceste
19 #from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from textreinert import Reinert
22 from corpus import Corpus, copycorpus, BuildFromAlceste, BuildSubCorpus
23 from textaslexico import Lexico
24 from textstat import Stat
25 from tools import SubCorpus
26 from textsimi import SimiTxt
28 ######################################
30 log = logging.getLogger('iramuteq')
31 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
32 ch = logging.StreamHandler()
33 ch.setFormatter(formatter)
35 log.setLevel(logging.DEBUG)
36 #######################################
40 #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste'])
42 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
43 if os.getenv('HOME') != None:
44 user_home = os.getenv('HOME')
46 user_home = os.getenv('HOMEPATH')
47 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
50 def __init__(self, args = None, AppliPath = None, parametres = None) :
52 self.DictPath = ConstructDicoPath(AppliPath)
53 self.ConfigPath = ConstructConfigPath(UserConfigPath)
54 self.syscoding = sys.getdefaultencoding()
55 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
56 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
57 self.PathPath = ConfigParser()
58 self.PathPath.read(self.ConfigPath['path'])
59 self.RPath = self.PathPath.get('PATHS', 'rpath')
60 self.pref = RawConfigParser()
61 self.pref.read(self.ConfigPath['preferences'])
62 self.history = History(os.path.join(UserConfigPath, 'history.db'))
64 # self.history.clean()
66 parser = OptionParser()
68 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
69 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
70 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None)
71 parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None)
72 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
73 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
74 parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
75 parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False)
78 (options, args) = parser.parse_args()
80 (options, args) = parser.parse_args(args)
84 if options.configfile is not None:
85 config = DoConf(os.path.abspath(options.configfile)).getoptions()
86 elif options.filename and options.type_analyse :
87 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
88 elif options.read and options.type_analyse :
89 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
92 elif options.filename and options.build :
98 if options.filename or options.read :
99 self.corpus_encodage = options.encodage
100 self.corpus_lang = options.language
101 self.keys = DoConf(self.ConfigPath['key']).getoptions()
104 ReadLexique(self, lang = options.language)
105 self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp'))
106 gramact = [k for k in self.keys if self.keys[k] == 1]
107 gramsup = [k for k in self.keys if self.keys[k] == 2]
109 if options.filename :
110 self.filename = os.path.abspath(options.filename)
111 if options.corpusconfigfile is not None :
112 corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus')
114 corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions()
115 dire, corpus_parametres['filename'] = os.path.split(self.filename)
116 corpus_parametres['originalpath'] = self.filename
117 corpus_parametres['encoding'] = self.corpus_encodage
118 corpus_parametres['syscoding'] = locale.getpreferredencoding()
119 corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout()
121 corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus
122 except Exception, txt:
123 log.info('probleme lors de la construction: %s' %txt)
127 self.history.add(corpus.parametres)
128 corpus = copycorpus(corpus)
130 corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read)
131 corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read))
132 pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
135 corpus.parametres['pathout'] = '/home/pierre/fac/etudiant/verdier/corpus20_corpus_2/test2'
136 BuildSubCorpus(corpus, parametres = {'fromthem' : True, 'theme' : [u'-*thématique_idéal']})
138 if corpus is not None :
140 #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7])
143 corpus.parse_active(gramact, gramsup)
144 #print corpus.getlemconcorde('de').fetchall()
145 # log.warning('ATTENTION gethapaxuces')
146 # MakeUciStat(corpus)
147 #corpus.gethapaxuces()
148 # ucisize = corpus.getucisize()
149 # ucisize = [`val` for val in ucisize]
150 #uciet = [uci.etoiles[1:] for uci in corpus.ucis]
151 #uceet = [corpus.ucis[uce.uci].etoiles[1:] for uci in corpus.ucis for uce in uci.uces]
154 # print '\t'.join(line)
155 #res = zip(uciet, ucisize)
156 # res = [uciet[i] + [ucisize[i]] for i, val in enumerate(uciet)]
158 #ucesize = corpus.getucesize()
160 #with open('sentences_size.csv', 'w') as f :
161 # f.write('\n'.join([`val` for val in ucesize]))
162 # self.content = f.read()
163 #self.content = self.content.replace('\r','')
164 if options.type_analyse == 'alceste' :
165 log.debug('ATTENTION : ANALYSE NG')
166 #print corpus.make_etoiles()
168 #corpus.read_corpus()
169 #corpus.parse_active(gramact, gramsup)
170 config['type'] = 'alceste'
171 self.Text = Reinert(self, corpus, parametres = config)
172 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
173 #self.Text = AnalyseAlceste(self, cmd = True)
174 elif options.type_analyse == 'pam' :
175 self.Text = PamTxt(self, cmd = True)
176 elif options.type_analyse == 'afcuci' :
177 self.Text = AfcUci(self, cmd = True)
178 elif options.type_analyse == 'stat' :
179 self.Text = Stat(self, corpus, parametres = {'type':'stat'})
180 elif options.type_analyse == 'spec' :
181 self.Text = Lexico(self, corpus, config = {'type' : 'spec'})
182 elif options.type_analyse == 'simitxt' :
183 self.Text = SimiTxt(self, corpus, parametres = parametres)
184 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
185 # self.Text.corpus.make_colored_corpus('colored.html')
187 if __name__ == '__main__':
189 CmdLine(AppliPath = AppliPath)