numpy如何实现同时使用多个索引值进行操作?
- 内容介绍
- 文章标签
- 相关推荐
本文共计163个文字,预计阅读时间需要1分钟。
pythonimport numpy as npfrom pypinyin import pinyinimport pandas as pd
set_word_list=['乱', '湿', '坡', '了', '给', '争', '事', '二']
import numpy as npfrom pypinyin import pinyin
import pandas as pd
set_word_lsit=['乱', '乳', '乾', '了', '予', '争', '事', '二']
# 转为拼音
word_to_pinyin={i:pinyin(i)[0][0] for i in set_word_lsit[29:]}
pinyin_df=pd.DataFrame({"hanzi":word_to_pinyin.keys(),"pinyin":word_to_pinyin.values()})
# 统计同音字
pinyin_df["tongyin"]=""
for k,v in pinyin_df.groupby("pinyin"):
if v.shape[0]>1:
tri_value = np.array([v["hanzi"].values.tolist()] * v.shape[0])
tri_value[range(v.shape[0]), range(v.shape[0])] = ""
pinyin_df["tongyin"][pinyin_df["pinyin"] == k] = [ "".join(i) for i in tri_value[tri_value != ""].reshape(v.shape[0],-1)]
else:
pinyin_df["tongyin"][pinyin_df["pinyin"] == k] = [v["hanzi"].values.tolist()] * v.shape[0]
# 得到同音df
本文共计163个文字,预计阅读时间需要1分钟。
pythonimport numpy as npfrom pypinyin import pinyinimport pandas as pd
set_word_list=['乱', '湿', '坡', '了', '给', '争', '事', '二']
import numpy as npfrom pypinyin import pinyin
import pandas as pd
set_word_lsit=['乱', '乳', '乾', '了', '予', '争', '事', '二']
# 转为拼音
word_to_pinyin={i:pinyin(i)[0][0] for i in set_word_lsit[29:]}
pinyin_df=pd.DataFrame({"hanzi":word_to_pinyin.keys(),"pinyin":word_to_pinyin.values()})
# 统计同音字
pinyin_df["tongyin"]=""
for k,v in pinyin_df.groupby("pinyin"):
if v.shape[0]>1:
tri_value = np.array([v["hanzi"].values.tolist()] * v.shape[0])
tri_value[range(v.shape[0]), range(v.shape[0])] = ""
pinyin_df["tongyin"][pinyin_df["pinyin"] == k] = [ "".join(i) for i in tri_value[tri_value != ""].reshape(v.shape[0],-1)]
else:
pinyin_df["tongyin"][pinyin_df["pinyin"] == k] = [v["hanzi"].values.tolist()] * v.shape[0]
# 得到同音df

