7 corpus_out = 'corpus.txt'
9 with codecs.open(corpus_out ,'r', 'utf8') as f:
12 ucis_paras_uces = [[[uce for uce in para.splitlines()] for para in uci.split(u'$$$')] for uci in content.split(sep)]
14 print ucis_paras_uces[0]
17 #conn = sqlite3.connect(db)
19 #conn.text_factory = str
21 #c.execute('''CREATE TABLE if not exists uce (id INTEGER PRIMARY KEY, iduci INTEGER, idpara INTEGER, content TEXT)''')
28 def addforme(word, formes, iduce) :
31 if iduce in formes[word][1] :
32 formes[word][1][iduce] += 1
34 formes[word][1][iduce] = 1
36 formes[word] = [1, {iduce:1}]
38 for i, uci in enumerate(ucis_paras_uces) :
44 uce_uci_para[iduce] = [i, idpara]
45 fileout = os.path.join('uce', '%i.txt' % iduce)
46 with open(fileout, 'w') as f :
47 f.write(uce.encode('utf8'))
50 addforme(word, formes, iduce)
53 d = shelve.open('shelves.db')
59 d = shelve.open('shelves.db')
65 ucis = [uce_uci_para[iduce][0] for iduce in word[1]]
69 #c.execute('INSERT INTO uce values (?, ?, ?, ?)', (iduce, i, idpara, uce))