#http://pynote.hatenablog.com/entry/python-wordcloud #!/usr/bin/env python # -*- coding: utf-8 -*- import MeCab import sys from sys import argv import wordcloud as wc import codecs from wordcloud import STOPWORDS, WordCloud import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt FONT="ipaexg.ttf" ''' #Mecabしない場合 f1 = open('./wakachidata/output_after_mecab2.txt','r') #f1 = open('/home/hashimori/zsugisaki/190730wordcloud/nagaoka/touhoku.txt','r') # ファイルを読み込む data1 = f1.read() f2 = open('./wakachidata/output_after_mecab2.txt','r') # ファイルを読み込む data2 = f2.read() data1 = data.replace('\n','') data1 = data.replace('\r','') ''' #Mecabする場合 with codecs.open(u'./textdata/touhoku_r.txt', "r", "utf-8") as f1: data01 = f1.read() with codecs.open(u'./textdata/yobo3_1_6_r.txt', "r", "utf-8") as f2: data02 = f2.read() with codecs.open(u'./textdata/ds_dourokyousonsyou_r.txt', "r", "utf-8") as f3: data03 = f3.read() with codecs.open(u'./textdata/ks0829_r.txt', "r", "utf-8") as f4: data04 = f4.read() with codecs.open(u'./textdata/ks0381_r.txt', "r", "utf-8") as f5: data05 = f5.read() with codecs.open(u'./textdata/output_america.txt', "r", "utf-8") as f6: data06 = f6.read() sys.getsizeof(data01) data1=data01+data02+data03+data04 data2=data06 # 分かち書きのみ出力する設定にする mecab = MeCab.Tagger("-Owakati") text1 = mecab.parse(data1) text1=text1.encode("Shift_jis",'ignore') text1=text1.decode("Shift_jis") file_name1 = "./mecab_out/output_after_mecab.txt" with codecs.open(file_name1,"w",'utf-8') as f1: f1.write(text1) text2 = mecab.parse(data2) text2=text2.encode("Shift_jis",'ignore') text2=text2.decode("Shift_jis") file_name2 = "./mecab_out/output_after_mecab2.txt" with codecs.open(file_name2,"w",'utf-8') as f2: f2.write(text2) def plot_wordcloud(wordcloud, title="WordCloud", pngfile=None): """ WordCloud描画 parameters ------------- wordcloud: wordcloud object 描画対象 tilte: str タイトル pngfile: str (or None) ファイル名 None出なければここで指定した名のpngファイルに出力 """ # 描画できるような形に変換 awc = wordcloud.to_array() #描画 (画像を作る) fig=plt.figure() left, width = 0.01, 0.98 bottom, height = 0.01, 0.90 ax= plt.axes([left, bottom, width, height]) #plot wordclouds ax.imshow(awc, interpolation="bilinear") ax.axis("off") ax.set_title(title,fontsize=18) # 表示 plt.show() # png if pngfile: fig.savefig(pngfile) #fpath = "/System/Library/Fonts/HelveticaNeue-UltraLight.otf" wc1=wc.WordCloud( font_path=FONT, width=800,height=600, background_color="white", max_words=50, min_font_size=10, max_font_size=96, collocations=False, stopwords={u'について',u'ある',u'もらい',u'さん',u'たい',u'なっ',u'いる',u'から',u'ます', u'その',u'する',u'あり',u'ので',u'まで',u'こと',u'にて',u'でき',u'なかっ',u'ため'} ) wc1.generate(text1) wc1.to_file('wordcloud_japan.png') plot_wordcloud(wc1, title="Japan") wc2=wc.WordCloud( font_path=FONT, width=800,height=600, background_color="white", max_words=50, min_font_size=10, max_font_size=96, collocations=False, stopwords={u'and',u'the',u'Figure',u'for',u'of',u'to',u'in',u'that',u'or', u'are',u'with',u'a',u'an',u'as',u'may',u'can',u'This',u'also',u'at', u'is',u'by',u'be',u'not',u'if',u'it',u'other',u'these',u'when',u'have'} ) wc2.generate(text2) wc2.to_file('wordcloud_usa.png') plot_wordcloud(wc2, title="USA") #wc2019.generate_from_frequencies(wdict)