2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath, ChdTxtPathOut
15 from functions import ReadLexique
16 from ConfigParser import *
17 #######################################
18 from textchdalc import AnalyseAlceste
19 from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from textstat import Stat
22 from corpus import Corpus
25 from word_stat import *
26 from textclassechd import ClasseCHD
28 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
29 if os.getenv('HOME') != None:
30 user_home = os.getenv('HOME')
32 user_home = os.getenv('HOMEPATH')
33 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
37 self.DictPath = ConstructDicoPath(AppliPath)
38 self.ConfigPath = ConstructConfigPath(UserConfigPath)
40 parser = OptionParser()
42 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
43 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default='alceste')
45 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=False)
46 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
47 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
48 (options, args) = parser.parse_args()
52 self.filename = os.path.abspath(options.filename)
53 self.conf = RawConfigParser()
54 self.conf.read(self.filename)
55 print self.conf.sections()
57 if 'analyse' in self.conf.sections() :
59 DictPathOut=ChdTxtPathOut(os.path.dirname(self.filename))
60 self.pathout = os.path.dirname(self.filename)
61 self.DictPathOut=DictPathOut
62 self.corpus = Corpus(self)
63 self.corpus.dictpathout = self.DictPathOut
64 self.corpus.read_corpus_from_shelves(self.DictPathOut['db'])
65 self.corpus.parametre['analyse'] = 'alceste'
66 self.corpus.make_lem_type_list()
67 for i in range(1,18) :
68 ClasseCHD(self, self.corpus, i, True)
70 #ll = self.corpus.find_segments_doublon(15,1000000)
71 #with open('extrait_doublons.csv' ,'w') as f :
72 # f.write('\n'.join([';'.join([`v[0]`,v[1]]) for v in ll]))
74 #self.corpus.count_uci_from_list('/home/pierre/fac/lerass/bouquin_indentite/liste_mot_chercher_uci.txt')
76 #output = open('testpickle.pkl', 'r')
77 #pickle.dump(self.corpus.formes, output, -1)
78 #formes = pickle.load(output)
80 #print 'finish pickle'
82 #listin = '/home/pierre/fac/identite/Personnages.csv'
83 #with codecs.open(listin, 'r', 'cp1252') as f :
85 #content = content.replace('"','').splitlines()
87 #self.corpus.make_and_write_sparse_matrix_from_uce_list(content, '/home/pierre/fac/identite/personnages.mm')
90 # self.corpus.extractnr()
91 #listin = [u'droit', u'devoir']
92 #make_word_stat(self.corpus, listin)
95 self.filename = os.path.abspath(options.filename)
96 self.corpus_encodage = options.encodage
97 self.corpus_lang = options.language
98 if options.configfile :
99 self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile)
100 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
101 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
102 self.PathPath = ConfigParser()
103 self.PathPath.read(self.ConfigPath['path'])
104 self.RPath = self.PathPath.get('PATHS', 'rpath')
105 self.pref = RawConfigParser()
106 self.pref.read(self.ConfigPath['preferences'])
107 #print 'PAS DE CODECS POUR CABLE'
108 with codecs.open(self.filename, 'r', self.corpus_encodage) as f:
109 self.content = f.read()
110 self.content = self.content.replace('\r','')
111 ReadLexique(self, lang = options.language)
112 if options.type_analyse == 'alceste' :
113 # print 'ATTENTION : BIGGGGGGGGGGGGGGGGGGG'
114 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
115 self.Text = AnalyseAlceste(self, cmd = True)
116 elif options.type_analyse == 'pam' :
117 self.Text = PamTxt(self, cmd = True)
118 elif options.type_analyse == 'afcuci' :
119 self.Text = AfcUci(self, cmd = True)
120 elif options.type_analyse == 'stat' :
121 self.Text = Stat(self, cmd = True)
122 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
123 # self.Text.corpus.make_colored_corpus('colored.html')
125 if __name__ == '__main__':