Python如何仅需两秒获取最新大学排名信息?

2026-06-11 00:561阅读0评论SEO基础
  • 内容介绍
  • 文章标签
  • 相关推荐

本文共计354个文字,预计阅读时间需要2分钟。

Python如何仅需两秒获取最新大学排名信息?

python导入 traceback,requests 和 xlsxwriter 模块从 pyquery 模块导入 PyQuery 为 pq

import traceback
import requests
import xlsxwriter
from pyquery import PyQuery as pq

#init workBook
def __initWorkBook():
workBook = xlsxwriter.Workbook('QSUniversities.xlsx')
print("[info]: workBook initialized")
return workBook

# init sheet
def __initSheet(workBook):
Sheet = workBook.add_worksheet()
#initial the table head
Sheet.write(0, 0, 'Rank')
Sheet.write(0, 1, 'University')
Sheet.write(0, 2, 'Location')
Sheet.write(0, 3, 'Overall Score')
Sheet.write(0, 4, 'Academic Reputation')
Sheet.write(0, 5, 'Employer Reputation')
Sheet.write(0, 6, 'Citations per Faculty')
Sheet.write(0, 7, 'Faculty Student Ratio')
Sheet.write(0, 8, 'International Students Ratio')
Sheet.write(0, 9, 'International Faculty Ratio')
Sheet.write(0, 10, 'International Research Network')
Sheet.write(0, 11, 'Employment Outcomes')
print("[info]: sheet initialized")
return Sheet


def __closeWorkBook(workBook):
workBook.close()
print("[info]: WorkBook closed")

def record():
workBook = __initWorkBook()
sheet = __initSheet(workBook=workBook)
queryList = ["overall_rank_dis", "uni", "city+location", "overall", "ind_76", "ind_77", "ind_73",
"ind_36", "ind_14", "ind_18", "ind_15", "ind_3819456"]
try:
ret = requests.get("www.topuniversities.com/sites/default/files/qs-rankings-data/en/3816281_indicators.txt?rd6fcf")
data = ret.json().get("data")
curLine = 1
for item in data:
for i in range(len(queryList)):
if i == 2:
qs = queryList[2].split("+")
city = item.get(qs[0])
location = item.get(qs[1])
if city != "":
city = pq(city).text()
if location != "":
location = pq(location).text()
sheet.write(curLine, 2, city + location)
elif item.get(queryList[i]) != "":
sheet.write(curLine, i, pq(item.get(queryList[i])).text())
else:
sheet.write(curLine, i, "")
print(f"[info]: {curLine} processed...")
curLine = curLine + 1
except:
traceback.print_exc()
__closeWorkBook(workBook=workBook)


if __name__ == "__main__":
record()

结果如下:


Python如何仅需两秒获取最新大学排名信息?

本文共计354个文字,预计阅读时间需要2分钟。

Python如何仅需两秒获取最新大学排名信息?

python导入 traceback,requests 和 xlsxwriter 模块从 pyquery 模块导入 PyQuery 为 pq

import traceback
import requests
import xlsxwriter
from pyquery import PyQuery as pq

#init workBook
def __initWorkBook():
workBook = xlsxwriter.Workbook('QSUniversities.xlsx')
print("[info]: workBook initialized")
return workBook

# init sheet
def __initSheet(workBook):
Sheet = workBook.add_worksheet()
#initial the table head
Sheet.write(0, 0, 'Rank')
Sheet.write(0, 1, 'University')
Sheet.write(0, 2, 'Location')
Sheet.write(0, 3, 'Overall Score')
Sheet.write(0, 4, 'Academic Reputation')
Sheet.write(0, 5, 'Employer Reputation')
Sheet.write(0, 6, 'Citations per Faculty')
Sheet.write(0, 7, 'Faculty Student Ratio')
Sheet.write(0, 8, 'International Students Ratio')
Sheet.write(0, 9, 'International Faculty Ratio')
Sheet.write(0, 10, 'International Research Network')
Sheet.write(0, 11, 'Employment Outcomes')
print("[info]: sheet initialized")
return Sheet


def __closeWorkBook(workBook):
workBook.close()
print("[info]: WorkBook closed")

def record():
workBook = __initWorkBook()
sheet = __initSheet(workBook=workBook)
queryList = ["overall_rank_dis", "uni", "city+location", "overall", "ind_76", "ind_77", "ind_73",
"ind_36", "ind_14", "ind_18", "ind_15", "ind_3819456"]
try:
ret = requests.get("www.topuniversities.com/sites/default/files/qs-rankings-data/en/3816281_indicators.txt?rd6fcf")
data = ret.json().get("data")
curLine = 1
for item in data:
for i in range(len(queryList)):
if i == 2:
qs = queryList[2].split("+")
city = item.get(qs[0])
location = item.get(qs[1])
if city != "":
city = pq(city).text()
if location != "":
location = pq(location).text()
sheet.write(curLine, 2, city + location)
elif item.get(queryList[i]) != "":
sheet.write(curLine, i, pq(item.get(queryList[i])).text())
else:
sheet.write(curLine, i, "")
print(f"[info]: {curLine} processed...")
curLine = curLine + 1
except:
traceback.print_exc()
__closeWorkBook(workBook=workBook)


if __name__ == "__main__":
record()

结果如下:


Python如何仅需两秒获取最新大学排名信息?