Python
读取excel文件某一列
1
2
3
4
5
6
7
8import pandas as pd
names = pd.read_excel(
'NOT_EXIST_List.xlsx',
sheet_name='NOT EXIST',
header=0,
usecols=['KolName'])
for i in range(0, len(names)):
name = names[i:i + 1].values.item()List去重
1
2import pandas as pd
List = pd.unique(List).tolist()将接口返回值解析成json格式
1
2
3
4
5
6
7import request
response = requests.request(
"GET", url,
data=payload,
headers=headers,
params=querystring)
json_response = json.loads(response.text)移除字符串中的标点符号
1
2
3def removePunctuation(text):
str = ''.join(c for c in text if c not in string.punctuation)
return str读取和写入json文件
1
2
3
4file = open('XXX.json', 'r')
XXX = json.loads(file.read())
file = open('XXX.json', 'a')
json.dump(XXX, file, indent=2)将List写入excel文件
1
2
3
4df = pd.DataFrame(List, columns=['name', 'url'])
writer = pd.ExcelWriter('remain.xlsx')
df.to_excel(writer, 'remain')
writer.save()dict根据key排序
1
2
3
4
5
6def sortdict(data):
result = collections.OrderedDict()
dict = sorted(data.items(), key=lambda d: d[0])
for i in range(0, len(dict)):
result[dict[i][0]] = dict[i][1]
return result以追加方式写入excel
1
2
3
4
5
6
7
8
9
10
11rexcel = open_workbook("ad_match_new.xls")
rows = rexcel.sheets()[0].nrows
excel = copy(rexcel)
table = excel.get_sheet(0)
row = rows
table.write(row, 0, title)
table.write(row, 1, channel)
table.write(row, 2, video_url)
table.write(row, 3, url)
row += 1
excel.save("ad_match_new.xls")
好处是在爬取数据或者使用接口时可以动态保存数据,不需要全部爬取完后再一次性存储,避免中途出错导致前功尽弃
PyTorch
- 得到一个张量中每一行的第一个非零元素的索引
1
2
3
4
idx = torch.arrange(data.shape[1], 0, -1)
tmp2 = data * idx
indices = torch.argmax(tmp2, 1, keepdim=True)