Python数据可视化词云图绘制词云的方法总结

相关文章推荐

痛苦的红金鱼 · 不同类型可视化图表的适用场景_智能商业分析 ...· 1 月前 ·

眉毛粗的豆浆 · 2023 年 Databricks SQL ...· 2 周前 ·

霸气的胡萝卜 · 中国石油大学（北京）信息中心· 1 年前 ·

活泼的青蛙 · mysql中SELECT INTO 和 ...· 1 年前 ·

爱热闹的松鼠 · Kafka学习（三）-------- ...· 1 年前 ·

另类的路灯 · javascript - Using ...· 1 年前 ·

腾讯云

备案控制台

开发者社区

TVP

文章/答案/技术大牛

写文章

专栏首页 Python进阶之路 Python数据可视化词云图绘制词云的方法总结

4 2

分享

pip install wordcloud -i http://pypi.douban.com/simple --trusted-host pypi.douban.com

import jieba
import collections
import re
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 958条评论数据
with open('data.txt') as f:
    data = f.read()
# 文本预处理  去除一些无用的字符   只提取出中文出来
new_data = re.findall('[\u4e00-\u9fa5]+', data, re.S)
new_data = " ".join(new_data)
# 文本分词
seg_list_exact = jieba.cut(new_data, cut_all=True)
result_list = []
with open('stop_words.txt', encoding='utf-8') as f:
    con = f.readlines()
    stop_words = set()
    for i in con:
        i = i.replace("\n", "")   # 去掉读取每一行数据的\n
        stop_words.add(i)
for word in seg_list_exact:
    # 设置停用词并去除单个词
    if word not in stop_words and len(word) > 1:
        result_list.append(word)
print(result_list)
# 筛选后统计
word_counts = collections.Counter(result_list)
# 获取前100最高频的词
word_counts_top100 = word_counts.most_common(100)
print(word_counts_top100)
# 绘制词云
my_cloud = WordCloud(
    background_color='white',  # 设置背景颜色  默认是black
    width=900, height=600,
    max_words=100,            # 词云显示的最大词语数量
    font_path='simhei.ttf',   # 设置字体  显示中文
    max_font_size=99,         # 设置字体最大值
    min_font_size=16,         # 设置子图最小值
    random_state=50           # 设置随机生成状态，即多少种配色方案
).generate_from_frequencies(word_counts)
# 显示生成的词云图片
plt.imshow(my_cloud, interpolation='bilinear')
# 显示设置词云图中无坐标轴
plt.axis('off')
plt.show()

# class pyecharts.charts.WordCloud
class WordCloud(
    # 初始化配置项，参考 `global_options.InitOpts`
    init_opts: opts.InitOpts = opts.InitOpts()
)

# func pyecharts.charts.WordCloud.add
def add(
    # 系列名称，用于 tooltip 的显示，legend 的图例筛选。
    series_name: str,
    # 系列数据项，[(word1, count1), (word2, count2)]
    data_pair: Sequence,
    # 词云图轮廓，有 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star' 可选
    shape: str = "circle",
    # 自定义的图片（目前支持 jpg, jpeg, png, ico 的格式，其他的图片格式待测试）
    # 该参数支持：
    # 1、 base64 （需要补充 data 头）；
    # 2、本地文件路径（相对或者绝对路径都可以）
    # 注：如果使用了 mask_image 之后第一次渲染会出现空白的情况，再刷新一次就可以了（Echarts 的问题）
    # Echarts Issue: https://github.com/ecomfe/echarts-wordcloud/issues/74
    mask_image: types.Optional[str] = None,
    # 单词间隔
    word_gap: Numeric = 20,
    # 单词字体大小范围
    word_size_range=None,
    # 旋转单词角度
    rotate_step: Numeric = 45,
    # 距离左侧的距离
    pos_left: types.Optional[str] = None,
    # 距离顶部的距离
    pos_top: types.Optional[str] = None,
    # 距离右侧的距离
    pos_right: types.Optional[str] = None,
    # 距离底部的距离
    pos_bottom: types.Optional[str] = None,
    # 词云图的宽度
    width: types.Optional[str] = None,
    # 词云图的高度
    height: types.Optional[str] = None,
    # 允许词云图的数据展示在画布范围之外
    is_draw_out_of_bound: bool = False,
    # 提示框组件配置项，参考 `series_options.TooltipOpts`
    tooltip_opts: Union[opts.TooltipOpts, dict, None] = None,
    # 词云图文字的配置
    textstyle_opts: types.TextStyle = None,
    # 词云图文字阴影的范围
    emphasis_shadow_blur: types.Optional[types.Numeric] = None,
    # 词云图文字阴影的颜色
    emphasis_shadow_color: types.Optional[str] = None,
)

import jieba
import collections
import re
from pyecharts.charts import WordCloud
from pyecharts.globals import SymbolType
from pyecharts import options as opts
from pyecharts.globals import ThemeType, CurrentConfig
CurrentConfig.ONLINE_HOST = 'D:/python/pyecharts-assets-master/assets/'
# 958条评论数据
with open('data.txt') as f:
    data = f.read()
# 文本预处理  去除一些无用的字符   只提取出中文出来
new_data = re.findall('[\u4e00-\u9fa5]+', data, re.S)  # 只要字符串中的中文
new_data = " ".join(new_data)
# 文本分词--精确模式分词
seg_list_exact = jieba.cut(new_data, cut_all=True)
result_list = []
with open('stop_words.txt', encoding='utf-8') as f:
    con = f.readlines()
    stop_words = set()
    for i in con:
        i = i.replace("\n", "")   # 去掉读取每一行数据的\n
        stop_words.add(i)
for word in seg_list_exact:
    # 设置停用词并去除单个词
    if word not in stop_words and len(word) > 1:
        result_list.append(word)
print(result_list)
# 筛选后统计
word_counts = collections.Counter(result_list)
# 获取前100最高频的词
word_counts_top100 = word_counts.most_common(100)
# 可以打印出来看看统计的词频
print(word_counts_top100)
word1 = WordCloud(init_opts=opts.InitOpts(width='1350px', height='750px', theme=ThemeType.MACARONS))
word1.add('词频', data_pair=word_counts_top100,
          word_size_range=[15, 108], textstyle_opts=opts.TextStyleOpts(font_family='cursive'),
          shape=SymbolType.DIAMOND)
word1.set_global_opts(title_opts=opts.TitleOpts('商品评论词云图'),
                      toolbox_opts=opts.ToolboxOpts(is_show=True, orient='vertical'),
                      tooltip_opts=opts.TooltipOpts(is_show=True, background_color='red', border_color='yellow'))
word1.render("商品评论词云图.html")

pip install stylecloud -i http://pypi.douban.com/simple --trusted-host pypi.douban.com

from stylecloud import gen_stylecloud
import jieba
import re
import random
# 读取数据
with open('datas.txt', encoding='utf-8') as f:
    data = f.read()
# 文本预处理  去除一些无用的字符   只提取出中文出来
new_data = re.findall('[\u4e00-\u9fa5]+', data, re.S)
new_data = "/".join(new_data)
# 文本分词
seg_list_exact = jieba.cut(new_data, cut_all=True)
result_list = []
with open('stop_words.txt', encoding='utf-8') as f:
    con = f.readlines()
    stop_words = set()
    for i in con:
        i = i.replace("\n", "")   # 去掉读取每一行数据的\n
        stop_words.add(i)
for word in seg_list_exact:
    # 设置停用词并去除单个词
    if word not in stop_words and len(word) > 1:
        result_list.append(word)
print(result_list)
# 将palettable配色方案 1587类弄到了本地txt里  读取配色方案
with open('palettable配色方案.txt') as f:
    choices = f.read().split('\n')[:-1]
    print(choices)
# 个人推荐使用的palette配色方案
# colorbrewer.qualitative.Dark2_7
# cartocolors.qualitative.Bold_5
# colorbrewer.qualitative.Set1_8
gen_stylecloud(
    text=' '.join(result_list),               # 文本数据
    size=600,                                 # 词云图大小
    font_path=r'‪C:\Windows\Fonts\msyh.ttc',   # 中文词云  显示需要设置字体
    output_name='词云.png',                   # 输出词云图名称
    icon_name='fas fa-grin-beam',             # 图标
    palette=random.choice(choices)            # 随机选取配色方案
)

叶庭云

Python数据可视化 词云图 绘制词云的方法总结

Python数据可视化 词云图 绘制词云的方法总结

文章目录

一、词云图

二、wordcloud库绘制词云

三、pyecharts库的WordCloud绘制词云

四、stylecloud库绘制词云

1. stylecloud简介

2. 蒙版图片

3. 配色

4. 绘制词云

5. 参考文章

Python爬虫，看看我最近博客都写了啥，带你制作高逼格的数据聚合云图

Python爬取自己微信好友信息，并制作好友签名词云

我分析了《用商业案例学R语言数据挖掘》书评，告诉你R有多火

【数据可视化】发送文字图片，帮你私人订制云词可视化

利用pandas+python制作100G亚马逊用户评论数据词云

Python生成词云图，TIIDF方法文本挖掘: 词频统计，词云图

Python爬淘宝——300W淘宝文胸说明了什么

Python使用wordcloud+pillow基于给定图像制作词云

【编程课堂】词云 wordcloud

Python数据可视化词云图绘制词云的方法总结