随手搓了一个随时统计洛谷做题数据的程序
  • 板块灌水区
  • 楼主dongzhen
  • 当前回复3
  • 已保存回复3
  • 发布时间2025/2/5 08:38
  • 上次更新2025/2/5 12:20:14
查看原帖
随手搓了一个随时统计洛谷做题数据的程序
1145602
dongzhen楼主2025/2/5 08:38

源码地址

from selenium import webdriver
from bs4 import BeautifulSoup  # 网页解析,获取数据
import re  # 正则表达式,进行文字匹配`
import urllib.error  # 制定URL,获取网页数据
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

import pickle  # 登录

cnt = [0, 0, 0, 0, 0, 0, 0, 0]  # 难度计数器
vis = {}
dificulty = [
    r"入门", r"普及−", r"普及/提高−", r"普及+/提高", r"提高+/省选−", r"省选/NOI−",
    r"NOI/NOI+/CTSC"
]

driver_path = r"C:\Program Files\Google\Chrome\Application\chromedriver.exe"
cookie_path = r"C:\Users\W1986\Documents\Projects\py\cookies.pkl"
record_path = r"https://www.luogu.com.cn/record/list?user=你的uid&page="
problem_path = r"https://www.luogu.com.cn/problem/"
user_path = r"&user=你的uid"
www_path = r"https://www.luogu.com.cn/record/list?pid="


def dificulty_measure(date):
    ed = date.find("查看题解")
    for i in range(6, -1, -1):
        if date.find(dificulty[i], 0, ed) != -1:  #找到
            return i
    return 7


def is_accepted(date):
    first = date.find("Accepted")
    second = date.find("Accepted", first + 10)
    return second != -1


# 初始化 driver
options = Options()
service = Service(executable_path=driver_path)
driver = webdriver.Chrome(service=service, options=options)
p = webdriver.Chrome(service=service, options=options)


def update_cookies(driver):
    cookies = pickle.load(open(cookie_path, 'rb'))
    for cookie in cookies:
        driver.add_cookie(cookie)

    driver.refresh()  # 刷新页面使登录状态生效


def solve_problem(problem):
    if problem[0]=='U' or problem[0]=='T':
        return
    vis[problem] = 1
    p.get(www_path + problem + user_path)
    # p.refresh()
    if is_accepted(p.page_source) == 0:
        pd = 0
        for i in range(5):
            p.get(www_path + problem + user_path)
            time.sleep(3)
            if is_accepted(p.page_source) == 1:
                pd = 1
                break
        if pd == 0:
            return
    p.get(problem_path + problem)
    # p.refresh()
    ans = dificulty_measure(p.page_source)
    while ans == 7:
        p.get(problem_path + problem)
        time.sleep(3)
        ans = dificulty_measure(p.page_source)

    print(dificulty[ans])
    cnt[ans] += 1
    file = open(r"C:\Users\W1986\Documents\Projects\py\example.txt",
            'w',
            encoding='utf-8')  
    file.write(problem+"       "+dificulty[ans])
    file.close()


def solve_record(date):
    now = date.find(r"/problem/", 0)
    while now != -1:
        problem = ""
        now += 9
        while date[now] != "?" and date[now] != "\"":
            problem += date[now]
            now += 1

        if problem[0] != "l":
            if vis.get(problem, 0) == 0:
                print(problem, end="     ")
                solve_problem(problem)
                print(cnt)
            # time.sleep(5)

        now = date.find(r"/problem/", now)


# 打开网页
driver.get(record_path + '1')
driver.implicitly_wait(10)
update_cookies(driver)

p.get(record_path + '1')
p.implicitly_wait(10)
update_cookies(p)

# p.get(r"https://www.luogu.com.cn/problem/P7073")
# p.refresh()


def main():
    page = 1
    while True:
        now_path = record_path + str(page)
        driver.get(record_path + str(page))
        time.sleep(6)
        # driver.refresh()
        date = driver.page_source

        if date.find(r"没有找到结果") != -1:
            break

        solve_record(driver.page_source)

        page += 1


main()

# time.sleep(20)

# pickle.dump(driver.get_cookies(), open(cookie_path, 'wb'))

2025/2/5 08:38
加载中...