Python Examples of codecs.open - ProgramCreek.com

文章推薦指數: 80 %
投票人數:10人

This page shows Python examples of codecs.open. ... sentences=[] try: fp=open(filename,'r',encoding='utf-8') lines=fp.readlines() except: fp=open(filename ... SearchbyModule SearchbyWords SearchProjects MostPopular TopPythonAPIs PopularProjects Java Python JavaScript TypeScript C++ Scala Blog reportthisad Morefromcodecs .open() .lookup() .getwriter() .getreader() .BOM_UTF16_BE .BOM_UTF16_LE .BOM_UTF8 .utf_8_decode() .utf_16_be_decode() .getencoder() .utf_16_le_decode() .BOM_UTF32_BE .BOM_UTF32_LE .StreamReader() .register_error() .register() .getdecoder() .StreamWriter() .getincrementalencoder() .latin_1_decode() reportthisad RelatedMethods os.path.join() sys.exit() sys.argv() re.compile() os.path.abspath() os.environ() time.time() os.listdir() unittest.TestCase() re.sub() time.sleep() re.search() os.remove() os.makedirs() logging.getLogger() sys.version_info() json.loads() json.dumps() argparse.ArgumentParser() setuptools.find_packages() RelatedModules os sys re time logging datetime random os.path math subprocess shutil json collections argparse setuptools Pythoncodecs.open() Examples Thefollowingare30 codeexamplesofcodecs.open(). Youcanvoteuptheonesyoulikeorvotedowntheonesyoudon'tlike, andgototheoriginalprojectorsourcefilebyfollowingthelinksaboveeachexample. Youmayalsowanttocheckoutallavailablefunctions/classesofthemodule codecs ,ortrythesearchfunction . Example#1 SourceProject: Financial-NLP   Author:Coldog2333   File: NLP.py   License: ApacheLicense2.0 7 votes deftxt2sentence(self,filename): """ readaandreturnaniteratorsentences (thatisalistofsomelists,andthesecond'list'isalistofwords). """ sentences=[] try: fp=open(filename,'r',encoding='utf-8') lines=fp.readlines() except: fp=open(filename,'r',encoding='gbk') lines=fp.readlines() forlineinlines: line=line.strip() iflen(line)<=1: continue line=line.replace('\n','').replace('\r','').split('') sentences.append(line) returnsentences Example#2 SourceProject: Financial-NLP   Author:Coldog2333   File: NLP.py   License: ApacheLicense2.0 6 votes defloadWordNet(self): """ loadzh_wordnetintotheobject. 将cow-not-full文件中的数据集整合成set """ f=codecs.open(self.wordnet_txt,"rb","utf-8") self.known=dict() #self.known=set() forlinf: ifl.startswith('\ufeff#')ornotl.strip(): continue row=l.strip().split("\t") (synset,lemma)=row #iflen(row)==2: #(synset,lemma)=row #eliflen(row)==3: #(synset,lemma,status)=row#根本就没有三个东西的项 #else: #print("illformedline:",l.strip()) #ifnot(synset.strip(),lemma.strip())inself.known: #self.known.add((synset.strip(),lemma.strip())) ifnotlemma.strip()inself.known.keys(): self.known[lemma.strip()]=[] self.known[lemma.strip()].append(synset) Example#3 SourceProject: Att-ChemdNER   Author:lingluodlut   File: utils.py   License: ApacheLicense2.0 6 votes defget_perf(filename): '''runconlleval.plperlscripttoobtain precision/recallandF1score''' _conlleval=PREFIX+'conlleval' ifnotisfile(_conlleval): #download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl') os.system('wgethttps://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl') chmod('conlleval.pl',stat.S_IRWXU)#givetheexecutepermissions out=[] proc=subprocess.Popen(["perl",_conlleval],stdin=subprocess.PIPE,stdout=subprocess.PIPE) stdout,_=proc.communicate(open(filename).read()) forlineinstdout.split('\n'): if'accuracy'inline: out=line.split() break #out=['accuracy:','16.26%;','precision:','0.00%;','recall:','0.00%;','FB1:','0.00'] precision=float(out[3][:-2]) recall=float(out[5][:-2]) f1score=float(out[7]) return{'p':precision,'r':recall,'f1':f1score} Example#4 SourceProject: Att-ChemdNER   Author:lingluodlut   File: model.py   License: ApacheLicense2.0 6 votes defsave_mappings(self,id_to_word,id_to_char,id_to_tag): #{{{ """ Weneedtosavethemappingsifwewanttousethemodellater. """ self.id_to_word=id_to_word self.id_to_char=id_to_char self.id_to_tag=id_to_tag withopen(self.mappings_path,'wb')asf: mappings={ 'id_to_word':self.id_to_word, 'id_to_char':self.id_to_char, 'id_to_tag':self.id_to_tag, } cPickle.dump(mappings,f) #}}} Example#5 SourceProject: Att-ChemdNER   Author:lingluodlut   File: loader.py   License: ApacheLicense2.0 6 votes defload_sentences(path,lower,zeros): #{{{ """ Loadsentences.Alinemustcontainatleastawordanditstag. Sentencesareseparatedbyemptylines. """ sentences=[] sentence=[] forlineincodecs.open(path,'r','utf8'): line=zero_digits(line.rstrip())ifzeroselseline.rstrip() ifnotline: iflen(sentence)>0: if'DOCSTART'notinsentence[0][0]: sentences.append(sentence) sentence=[] else: word=line.split() assertlen(word)>=2 sentence.append(word) iflen(sentence)>0: if'DOCSTART'notinsentence[0][0]: sentences.append(sentence) returnsentences #}}} Example#6 SourceProject: L.E.S.M.A   Author:NatanaelAntonioli   File: setup.py   License: ApacheLicense2.0 6 votes deffind_version(*file_paths): #OpeninLatin-1sothatweavoidencodingerrors. #Usecodecs.openforPython2compatibility try: f=codecs.open(os.path.join(here,*file_paths),'r','latin1') version_file=f.read() f.close() except: raiseRuntimeError("Unabletofindversionstring.") #Theversionlinemusthavetheform #__version__='ver' version_match=re.search(r"^__version__=['\"]([^'\"]*)['\"]", version_file,re.M) ifversion_match: returnversion_match.group(1) raiseRuntimeError("Unabletofindversionstring.") #Getthelongdescriptionfromtherelevantfile Example#7 SourceProject: ciocheck   Author:ContinuumIO   File: formatters.py   License: MITLicense 6 votes def_add_missing_init_py(self,paths): """Addmissing__init__.pyfilesinthemodulesubdirectories.""" results=[] folders=[os.path.dirname(p)forpinpaths] #Avoidaddinganinitonrepolevelifsetup.pyorotherscriptonthe #toplevelhaschanged ifself.cmd_rootinfolders: folders.remove(self.cmd_root) forfolderinfolders: init_py=os.path.join(folder,"__init__.py") exists=os.path.exists(init_py) ifnotexists: withcodecs.open(init_py,'w','utf-8')ashandle: handle.flush() result={ 'path':init_py, 'created':notexists, 'diff':diff('',''), 'error':None, } results.append(result) returnresults Example#8 SourceProject: text-rank   Author:ouprince   File: Segmentation.py   License: MITLicense 6 votes def__init__(self,stop_words_file=None,allow_speech_tags=util.allow_speech_tags): """ Keywordarguments: stop_words_file--保存停止词的文件路径,utf8编码,每行一个停止词。

若不是str类型,则使用默认的停止词 allow_speech_tags--词性列表,用于过滤 """ allow_speech_tags=[util.as_text(item)foriteminallow_speech_tags] self.default_speech_tag_filter=allow_speech_tags self.stop_words=set() self.stop_words_file=get_default_stop_words_file() iftype(stop_words_file)isstr: self.stop_words_file=stop_words_file forwordincodecs.open(self.stop_words_file,'r','utf-8','ignore'): self.stop_words.add(word.strip()) Example#9 SourceProject: open-sesame   Author:swabhs   File: preprocess.py   License: ApacheLicense2.0 6 votes defwrite_to_conll(outf,fsp,firstex,sentid): mode="a" iffirstex: mode="w" withcodecs.open(outf,mode,"utf-8")asoutf: foriinxrange(fsp.sent.size()): token,postag,nltkpostag,nltklemma,lu,frm,role=fsp.info_at_idx(i) outf.write(str(i+1)+"\t")#ID=0 outf.write(token.encode('utf-8')+"\t")#FORM=1 outf.write("_\t"+nltklemma+"\t")#LEMMAPLEMMA=2,3 outf.write(postag+"\t"+nltkpostag+"\t")#POSPPOS=4,5 outf.write(str(sentid-1)+"\t_\t")#FEATPFEAT=6,7~replacingFEATwithsentencenumber outf.write("_\t_\t")#HEADPHEAD=8,9 outf.write("_\t_\t")#DEPRELPDEPREL=10,11 outf.write(lu+"\t"+frm+"\t")#FILLPREDPRED=12,13 outf.write(role+"\n")#APREDS=14 outf.write("\n")#endofsentence outf.close() Example#10 SourceProject: dynamic-training-with-apache-mxnet-on-aws   Author:awslabs   File: data_helpers.py   License: ApacheLicense2.0 6 votes defload_data_and_labels(): """ LoadsMRpolaritydatafromfiles,splitsthedataintowordsandgenerateslabels. Returnssplitsentencesandlabels. """ #downloaddataset get_chinese_text() #Loaddatafromfiles positive_examples=list(codecs.open("./data/pos.txt","r","utf-8").readlines()) positive_examples=[s.strip()forsinpositive_examples] positive_examples=[peforpeinpositive_examplesiflen(pe)<100] negative_examples=list(codecs.open("./data/neg.txt","r","utf-8").readlines()) negative_examples=[s.strip()forsinnegative_examples] negative_examples=[neforneinnegative_examplesiflen(ne)<100] #Splitbywords x_text=positive_examples+negative_examples #x_text=[clean_str(sent)forsentinx_text] x_text=[list(s)forsinx_text] #Generatelabels positive_labels=[[0,1]for_inpositive_examples] negative_labels=[[1,0]for_innegative_examples] y=np.concatenate([positive_labels,negative_labels],0) return[x_text,y] Example#11 SourceProject: CyberTK-Self   Author:CyberTKR   File: Self.py   License: GNUGeneralPublicLicensev2.0 6 votes defsendImage(self,to_,path): M=Message(to=to_,contentType=1) M.contentMetadata=None M.contentPreview=None M_id=self.Talk.client.sendMessage(0,M).id files={ 'file':open(path,'rb'), } params={ 'name':'media', 'oid':M_id, 'size':len(open(path,'rb').read()), 'type':'image', 'ver':'1.0', } data={ 'params':json.dumps(params) } r=self.post_content('https://os.line.naver.jp/talk/m/upload.nhn',data=data,files=files) ifr.status_code!=201: raiseException('Uploadimagefailure.') returnTrue Example#12 SourceProject: CyberTK-Self   Author:CyberTKR   File: Self.py   License: GNUGeneralPublicLicensev2.0 6 votes defsendAudio(self,to_,path): M=Message(to=to_,text=None,contentType=3) M_id=self.Talk.client.sendMessage(0,M).id files={ 'file':open(path,'rb'), } params={ 'name':'media', 'oid':M_id, 'size':len(open(path,'rb').read()), 'type':'audio', 'ver':'1.0', } data={ 'params':json.dumps(params) } r=self.post_content('https://os.line.naver.jp/talk/m/upload.nhn',data=data,files=files) printr ifr.status_code!=201: raiseException('Uploadaudiofailure.') Example#13 SourceProject: CyberTK-Self   Author:CyberTKR   File: Self.py   License: GNUGeneralPublicLicensev2.0 6 votes defsendVoice(self,to_,path): M=Message(to=to_,text=None,contentType=3) M.contentPreview=None M_id=self._client.sendMessage(0,M).id files={ 'file':open(path,'rb'), } params={ 'name':'voice_message', 'oid':M_id, 'size':len(open(path,'rb').read()), 'type':'audio', 'ver':'1.0', } data={ 'params':json.dumps(params) } r=self.post_content('https://os.line.naver.jp/talk/m/upload.nhn',data=data,files=files) ifr.status_code!=201: raiseException('Uploadvoicefailure.') returnTrue Example#14 SourceProject: DOTA_models   Author:ringringyi   File: utils.py   License: ApacheLicense2.0 6 votes defbod2darknet(subpath,label,extractclassname): labelpath=os.path.join(subpath,label) filelist=GetFileFromThisRootDir(labelpath) outpath=r'/home/dj/data/bod-subset/labels' forfullnameinfilelist: objects=parse_bod_poly(fullname) name=os.path.splitext(os.path.basename(fullname))[0] withopen(os.path.join(outpath,name+'.txt'),'w')asf_out: forobjinobjects: poly=obj['poly'] bbox=np.array(dots4ToRecC(poly))/1024 if(sum(bbox<=0)+sum(bbox>=1))>=1: continue if(obj['name']inextractclassname): id=extractclassname.index(obj['name']) else: continue outline=str(id)+''+''.join(list(map(str,bbox))) f_out.write(outline+'\n') Example#15 SourceProject: DOTA_models   Author:ringringyi   File: utils.py   License: ApacheLicense2.0 6 votes defbodpolyToRec(self,label): Recpath=os.path.join(self.basepath,r'ReclabelTxt') forbasenameinself.namelist: #objects=parse_bod_poly(os.path.join(self.labelpath,basename+'.txt')) objects=parse_bod_poly(os.path.join(self.basepath,label,basename+'.txt')) f_out=codecs.open(os.path.join(Recpath,basename+'.txt'),'w','utf_16') forobjinobjects: bbox=dots4ToRec8(obj['poly']) name=obj['name'] difficult=obj['difficult'] bbox=list(map(str,bbox)) outline=''.join(bbox) outline=outline+''+name ifdifficult: outline=outline+''+str(difficult) f_out.write(outline+'\n') Example#16 SourceProject: DOTA_models   Author:ringringyi   File: utils.py   License: ApacheLicense2.0 6 votes defTransTo15Word_gt(self): dstpath=r'wordlabel' self.ParseTxtAndWrite(self.labelpath,dstpath,datamap_15) #defTransTo15class(self,path): #filelist=GetFileFromThisRootDir(self.labelpath) #forfullnameinfilelist: #objects=parse_bod_poly2(fullname) #name=mybasename(fullname) #outname=os.path.join(self.basepath,path,name+'.txt') #f_out=codecs.open(outname,'w','utf_16') # #forobjinobjects: #ifobj['name']inclassname_15: #ifpath=='wordlabel': #outline=''.join(map(str,obj['poly']))+''+datamap_15[obj['name']]+''+str(obj['difficult']) #print('outline:',outline) ##f_out.write(outline+'\n') #elifpath=='label15Txt': #outline=''.join(map(str,obj['poly']))+''+obj['name']+''+str(obj['difficult']) #print('outline:',outline) #f_out.write(outline+'\n') Example#17 SourceProject: DOTA_models   Author:ringringyi   File: utils.py   License: ApacheLicense2.0 6 votes defwordlabel2dark(self): filelist=GetFileFromThisRootDir(self.wordlabelpath) #print(filelist) forfullnameinfilelist: objects=parse_bod_poly(fullname) name=mybasename(fullname) withopen(os.path.join(self.darkpath,name+'.txt'),'w')asf_out: forobjinobjects: poly=obj['poly'] bbox=np.array(dots4ToRecC(poly))/1024 ##note:theboxisx_center,y_center,w,h,thatmeansthewholeboxcanbeoutofborder if(str(obj['difficult'])=='1'): continue if(sum(bbox<=0)+sum(bbox>=1))>=1: continue if(obj['name']inwordname_15): id=wordname_15.index(obj['name']) else: continue outline=str(id)+''+''.join(list(map(str,bbox))) f_out.write(outline+'\n') Example#18 SourceProject: DOTA_models   Author:ringringyi   File: utils.py   License: ApacheLicense2.0 6 votes defbodpolyToRec(srcpath,dstpath): #dstpath=os.path.join(r'E:\bod-dataset\patches\subcategorylabel\results\ReclabelTxt') filelist=GetFileFromThisRootDir(srcpath) namelist=[mybasename(x.strip())forxinfilelist] forbasenameinnamelist: #objects=parse_bod_poly(os.path.join(self.labelpath,basename+'.txt')) objects=parse_bod_poly(os.path.join(srcpath,basename+'.txt')) f_out=codecs.open(os.path.join(dstpath,basename+'.txt'),'w','utf_16') forobjinobjects: bbox=dots4ToRec8(obj['poly']) name=obj['name'] difficult=obj['difficult'] bbox=list(map(str,bbox)) outline=''.join(bbox) outline=outline+''+name ifdifficult: outline=outline+''+str(difficult) f_out.write(outline+'\n') Example#19 SourceProject: trip-advisor-crawler   Author:aesuli   File: trip-advisor-crawler.py   License: GNUGeneralPublicLicensev3.0 6 votes defgetreview(domain,cityid,activity,reviewid,timeout,maxretries,basepath,force,pause): baseurl='http://www.tripadvisor.'+domain+'/ShowUserReviews-g' reviewurl='%s%s-d%s-r%s'%(baseurl,cityid,activity,reviewid) path=os.sep.join((basepath,domain,str(cityid),str(activity))) filename=os.sep.join((path,str(reviewid)+'.html')) ifforceornotos.path.exists(filename): htmlpage=download_page(reviewurl,maxretries,timeout,pause) ifhtmlpageisNone: print('ErrordownloadingthereviewURL:'+reviewurl) else: ifnotos.path.exists(path): os.makedirs(path) withcodecs.open(filename,mode='w',encoding='utf8')asfile: file.write(htmlpage.decode('utf-8')) Example#20 SourceProject: keras-gpt-2   Author:CyberZHG   File: bpe.py   License: MITLicense 6 votes defget_bpe_from_files(encoder_path,vocab_path): """GetinitializedBPE. :paramencoder_path:Pathto'encoder.json'. :paramvocab_path:Pathto'vocab.bpe' :return:Theobjectfromencodeanddecodestrings. """ withcodecs.open(encoder_path,'r','utf8')asreader: token_dict=json.load(reader) bpe_rank={} withcodecs.open(vocab_path,'r','utf8')asreader: reader.readline() forrank,lineinenumerate(reader): line=line.strip() ifline: bpe_rank[tuple(line.split())]=rank returnBytePairEncoding(token_dict,bpe_rank) Example#21 SourceProject: lyrebird-api-coverage   Author:Meituan-Dianping   File: load_base.py   License: MITLicense 6 votes defauto_load_base(): lyrebird_conf=lyrebird.context.application.conf #读取指定base文件,写入到base.json iflyrebird_conf.get('hunter.base'): base_path=lyrebird_conf.get('hunter.base') base=codecs.open(base_path,'r','utf-8').read() f=codecs.open(DEFAULT_BASE,'w','utf-8') f.write(base) f.close() app_context.base_sha1=get_file_sha1(DEFAULT_BASE) returnjson.loads(base) #通过本地默认base文件获取base elifnotos.path.exists(DEFAULT_BASE): copy_file(DEFAULT_BASE) withcodecs.open(DEFAULT_BASE,'r','utf-8')asf: json_obj=json.load(f) app_context.base_sha1=get_file_sha1(DEFAULT_BASE) returnjson_obj Example#22 SourceProject: mutatest   Author:EvanKepner   File: conf.py   License: MITLicense 5 votes defread(*parts): """ Buildanabsolutepathfrom*parts*andandreturnthecontentsofthe resultingfile.AssumeUTF-8encoding. """ withcodecs.open(os.path.join(HERE,*parts),"rb","utf-8")asf: returnf.read() Example#23 SourceProject: mutatest   Author:EvanKepner   File: setup.py   License: MITLicense 5 votes defread(*parts): """ Buildanabsolutepathfrom*parts*andandreturnthecontentsofthe resultingfile.AssumeUTF-8encoding. """ withcodecs.open(os.path.join(HERE,*parts),"rb","utf-8")asf: returnf.read() Example#24 SourceProject: Financial-NLP   Author:Coldog2333   File: NLP.py   License: ApacheLicense2.0 5 votes defloadstopwords(self): """ loadstopwordsintotheobject. """ self.stop_words=list() stop_f=open(self.stopwords_txt,'r',encoding='utf-8') forlineinstop_f.readlines(): line=line.strip() ifnotlen(line): continue self.stop_words.append(line) stop_f.close() Example#25 SourceProject: Financial-NLP   Author:Coldog2333   File: NLP.py   License: ApacheLicense2.0 5 votes deftxt2wordbag(self,origin_file,cutflag=False,remove_stopwords=True):#testing """ pleaseremembertosetacorrespondingprocessingfile. """ iforigin_file.split('.')[0][-3:]!='cut': cut_file=self.cut(origin_file,remove_stopwords=True,swith_to_newtxt=True) else: cut_file=origin_file try: fp=open(cut_file,'r',encoding='utf-8') rawtxt=fp.read() except: fp=open(cut_file,'r',encoding='gbk') rawtxt=fp.read() words_list=rawtxt.split('') new_words_list=[] forwordinwords_list: ifword==''or(ord(word[0])<1024): continue else: new_words_list.append(word) ifnew_words_list=='\u3000': returnnew_words_list[1:] else: returnnew_words_list Example#26 SourceProject: Att-ChemdNER   Author:lingluodlut   File: tagger.py   License: ApacheLicense2.0 5 votes defload_sentences(path): sentences=[] forlineincodecs.open(path,'r','utf8'): sentence=[]; line=line.rstrip() ifline: word=line.split() foreleminword: sentence.append([elem]); sentences.append(sentence) returnsentences Example#27 SourceProject: Att-ChemdNER   Author:lingluodlut   File: utils.py   License: ApacheLicense2.0 5 votes deffindNotSame(fNameX,fNameY): #{{{ """ verifytwofileissameornot """ space='space'; defloadFile(fName): word=[]; importcodecs; forlineincodecs.open(fName,'r','utf8'): line=line.rstrip(); iflen(line)>0: word.append(line[0]); else: word.append(space); returnword; word1=loadFile(fNameX); word2=loadFile(fNameY); i=0; j=0; whilei



請為這篇文章評分?