Pygithub: ๊ฒ€์ƒ‰ ์†๋„ ์ œํ•œ ์ง€์›

์— ๋งŒ๋“  2017๋…„ 04์›” 10์ผ  ยท  13์ฝ”๋ฉ˜ํŠธ  ยท  ์ถœ์ฒ˜: PyGithub/PyGithub

get_rate_limit ํ•จ์ˆ˜๋Š” Github์ด '์ฝ”์–ด' ์†๋„ ์ œํ•œ์œผ๋กœ ๊ฐ„์ฃผํ•˜๋Š” ๊ฒƒ์„ ๋ฐ˜ํ™˜ํ•  ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์ฝ”๋“œ ๊ฒ€์ƒ‰์—๋Š” ๋‹ค๋ฅธ ์†๋„ ์ œํ•œ์ด ์žˆ์Šต๋‹ˆ๋‹ค. ์—ฌ๊ธฐ๋ฅผ ์ฐธ์กฐํ•˜์‹ญ์‹œ์˜ค.

์ง€๊ธˆ ๋‹น์žฅ์€ ๋‚ด๊ฐ€ ๋งํ•  ์ˆ˜ ์žˆ๋Š” ํ•œ ๊ฒ€์ƒ‰ ์ฝ”๋“œ ์†๋„ ์ œํ•œ์„ ์–ป์„ ์ˆ˜ ์žˆ๋Š” ๋ฐฉ๋ฒ•์ด ์—†์Šต๋‹ˆ๋‹ค.

feature request

๊ฐ€์žฅ ์œ ์šฉํ•œ ๋Œ“๊ธ€

๊ฒ€์ƒ‰ ์—”์ง„์—์„œ ์—ฌ๊ธฐ์— ๋„์ฐฉํ•˜๋Š” ์‚ฌ๋žŒ๋“ค์„ ์œ„ํ•ด @bbi-yggy์˜ ๊ธฐ๋Šฅ์„ ์•ฝ๊ฐ„ ์ˆ˜์ •ํ–ˆ์Šต๋‹ˆ๋‹ค.

from datetime import datetime, timezone

def rate_limited_retry(github):
    def decorator(func):
        def ret(*args, **kwargs):
            for _ in range(3):
                try:
                    return func(*args, **kwargs)
                except RateLimitExceededException:
                    limits = github.get_rate_limit()
                    reset = limits.search.reset.replace(tzinfo=timezone.utc)
                    now = datetime.now(timezone.utc)
                    seconds = (reset - now).total_seconds()
                    print(f"Rate limit exceeded")
                    print(f"Reset is in {seconds:.3g} seconds.")
                    if seconds > 0.0:
                        print(f"Waiting for {seconds:.3g} seconds...")
                        time.sleep(seconds)
                        print("Done waiting - resume!")
            raise Exception("Failed too many times")
        return ret
    return decorator

์ด ๊ธฐ๋Šฅ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

@rate_limited_retry(github)
def run_query(import_string):
    query_string = f"language:Python \"{import_string}\""
    return list(github.search_code(query_string))

results = run_query(import_string)

๋ชจ๋“  13 ๋Œ“๊ธ€

๋‚˜๋Š” ๊ฐ™์€ ๋ฌธ์ œ๋ฅผ ๋ณธ๋‹ค. ๋‹ค์Œ์€ ๋ฌธ์ œ๋ฅผ ์˜ˆ์‹œํ•˜๋Š” ์ž‘์€ ์Šคํฌ๋ฆฝํŠธ์ž…๋‹ˆ๋‹ค.

import os
from datetime import datetime
from github import Github

# Login
TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
github = Github(TOKEN)

# Get initial rate limit and reset time
rl1 = github.get_rate_limit().rate
print("RL1 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl1.limit, rl1.remaining, rl1.reset))
# RL1 | Limit: 5000, Remaining: 5000, Reset: 2017-09-22 17:26:35.

# Perform a search
results = github.search_code("Hello World")

# Rate limit of Github instance is unchanged after a search
rl2 = github.get_rate_limit().rate
print("RL2 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl2.limit, rl2.remaining, rl2.reset))
# RL2 | Limit: 5000, Remaining: 5000, Reset: 2017-09-22 17:26:35.

# The PaginatedList instance has a Requestor with the same info
rl3 = results._PaginatedList__requester.rate_limiting
rl3_reset = datetime.utcfromtimestamp(int(
        results._PaginatedList__requester.rate_limiting_resettime))
print("RL3 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl3[0], rl3[1], rl3_reset))
# RL3 | Limit: 5000, Remaining: 5000, Reset: 2017-09-22 17:26:35.

# However, the actual ContentFile results show a different limit
# The Requester of each individual result ...
result = results[0]
rl4 = result._requester.rate_limiting
rl4_reset = datetime.utcfromtimestamp(int(
        result._requester.rate_limiting_resettime))
print("RL4 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl4[1], rl4[0], rl4_reset))
# RL4 | Limit: 30, Remaining: 29, Reset: 2017-09-22 16:27:36.

# ... and headers stored in the content file directly show a different rate limit.
rl5_limit = result._headers['x-ratelimit-limit']
rl5_remaining = result._headers['x-ratelimit-remaining']
rl5_reset = datetime.utcfromtimestamp(int(
        result._headers['x-ratelimit-reset']))
print("RL5 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl5_limit, rl5_remaining, rl5_reset))
# RL5 | Limit: 30, Remaining: 29, Reset: 2017-09-22 16:27:36.

# In the end, the main Github instance still shows the original full rate limit
rl6 = github.get_rate_limit().rate
print("RL6 | Limit: {}, Remaining: {}, Reset: {}.".format(
    rl6.limit, rl6.remaining, rl6.reset))
# RL6 | Limit: 5000, Remaining: 5000, Reset: 2017-09-22 17:26:35.

+1 ์ด ๊ธฐ๋Šฅ์€ ๋‚ด๊ฐ€ ๊ตฌ์ถ•ํ•˜๋ ค๋Š” ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์— ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.

@brentshermana ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜์˜ ๊ฒฝ์šฐ ์†๋„ ์ œํ•œ ํ—ค๋”(๋งˆ์ง€๋ง‰ ์‘๋‹ต, ์œ„์˜ ์˜ˆ ์ฐธ์กฐ)๋ฅผ ๊ฒ€์‚ฌํ•˜๊ฑฐ๋‚˜ /rate_limit ์—”๋“œํฌ์ธํŠธ ๋ฅผ ์ง์ ‘ ํด๋งํ•˜๋Š” ๊ฒƒ์„ ๊ณ ๋ คํ•˜์‹ญ์‹œ์˜ค. ์—ฌ๊ธฐ์—๋Š” ๋ชจ๋“  ์ข…๋ฅ˜์˜ ์†๋„ ์ œํ•œ์— ๋Œ€ํ•œ ์ •๋ณด๊ฐ€ ํฌํ•จ๋˜๋ฉฐ ์†๋„ ์ œํ•œ์— ํฌํ•จ๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.

๊ฒฐ๊ตญ PyGithub๊ฐ€ rate ๋ฟ๋งŒ ์•„๋‹ˆ๋ผ /rate_limit resources ๋„ ๊ตฌ๋ฌธ ๋ถ„์„ํ•˜๋ฉด ์ข‹์„ ๊ฒƒ์ž…๋‹ˆ๋‹ค. ์ •๋ณด๋Š” ์žˆ์ง€๋งŒ ๋ถˆํ–‰ํžˆ๋„ ๋„์„œ๊ด€ ์†Œ๋น„์ž์—๊ฒŒ๋Š” ์ œ๊ณต๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.

๋˜ํ•œ ํŽ˜์ด์ง€๊ฐ€ ๋งค๊ฒจ์ง„ ๋ชฉ๋ก์€ ์ด๋Ÿฌํ•œ ๊ฒ€์ƒ‰์˜ ๊ฒฐ๊ณผ, ์ฆ‰ _headers['x-ratelimit-*'] ์— ์ €์žฅ๋œ ๋ชจ๋“  ํ•ญ๋ชฉ์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ๊ฒฝ์šฐ ์ฝ”๋“œ ๊ฒ€์ƒ‰์— ๋Œ€ํ•œ ์†๋„ ์ œํ•œ์„ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.

btw: ๋ฐฉ๊ธˆ $#$ /rate_limit $#$์—์„œ ๋ฐ˜ํ™˜๋œ JSON์˜ rate ํ•„๋“œ๋Š” ๋” ์ด์ƒ ์‚ฌ์šฉ๋˜์ง€ ์•Š์œผ๋ฉฐ resources ์˜ ์ •๋ณด๊ฐ€ ๊ถŒ์žฅ๋˜๋Š” ๋Œ€์•ˆ์ž„์„ ์•Œ์•˜์Šต๋‹ˆ๋‹ค. https://developer.github.com/ v3/rate_limit/#deprecation -๊ณต์ง€

๋‚ด๊ฐ€ ์ •ํ™•ํžˆ ํ•˜๊ณ  ์žˆ์–ด์š”. ๋ˆ„๊ตฌ๋“ ์ง€ ์ด๊ฒƒ์„ ์ ์šฉํ•˜๊ณ  ํ’€ ๋ฆฌํ€˜์ŠคํŠธ๋ฅผ ์‹œ๋„ํ•˜๊ณ  ์‹ถ๋‹ค๋ฉด ์ œ ์ถ•๋ณต์ด ์žˆ์Šต๋‹ˆ๋‹ค.

def wait(seconds):
    print("Waiting for {} seconds ...".format(seconds))
    time.sleep(seconds)
    print("Done waiting - resume!")

def api_wait():
    url = 'https://api.github.com/rate_limit'
    response = urlopen(url).read()
    data = json.loads(response.decode())
    if data['resources']['core']['remaining'] <= 10:  # extra margin of safety
        reset_time = data['resources']['core']['reset']
        wait(reset_time - time.time() + 10)
    elif data['resources']['search']['remaining'] <= 2:
        reset_time = data['resources']['search']['reset']
        wait(reset_time - time.time() + 10)

1869๊ฐœ์˜ ๊ฒฐ๊ณผ๊ฐ€ ์žˆ์–ด์•ผ ํ•˜๋Š”๋ฐ 1020๊ฐœ์˜ ๊ฒฐ๊ณผ ์ดํ›„์— search_issues์˜ ๊ฒฐ๊ณผ์— ๋Œ€ํ•œ ๋ฐ˜๋ณต์ด ์ค‘์ง€๋˜๋Š” ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‚ด ์Šคํฌ๋ฆฝํŠธ๋Š” ๋งค๋ฒˆ ๊ฐ™์€ ์ง€์ ์—์„œ ๋ฉˆ์ถฅ๋‹ˆ๋‹ค. ์ด๊ฒƒ์ด ์†๋„ ์ œํ•œ ๋ฌธ์ œ๊ฐ€ ๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๊นŒ?

์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ•˜์ง€ ์•Š๊ณ  ๊ฒฐ๊ณผ๋งŒ ๋‚˜์˜ต๋‹ˆ๋‹ค. ๋‚ด ์ฟผ๋ฆฌ ๋ฌธ์ž์—ด์„ GitHub ์›น ์ธํ„ฐํŽ˜์ด์Šค์— ์ง์ ‘ ๋„ฃ์œผ๋ฉด ์˜ˆ์ƒ๋Œ€๋กœ 1869๊ฐœ์˜ ๊ฒฐ๊ณผ๊ฐ€ ๋ชจ๋‘ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. 1020์€ 30์˜ ๋ฐฐ์ˆ˜์ธ๋ฐ ํŽ˜์ด์ง€ ๋งค๊น€ ๋ฌธ์ œ์ธ์ง€ ๊ถ๊ธˆํ•ฉ๋‹ˆ๋‹ค.

์ฝ”๋“œ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.

querystring = "type:pr is:closed repo:xxxx closed:2017-07-01..2018-06-30"
issues = git.search_issues(query=querystring, sort="updated", order="asc")
for issue in issues:
    pull = issue.as_pull_request()
    print "%s: %s" % (pull.number, pull.title)

์—ฌ๊ธฐ์„œ ๋ฌด์—‡์ด ์ž˜๋ชป๋  ์ˆ˜ ์žˆ๋Š”์ง€์— ๋Œ€ํ•ด ๊ณต์œ ํ•  ์ˆ˜ ์žˆ๋Š” ๋ชจ๋“  ํŒ์— ๋Œ€ํ•ด ๋งŽ์€ ๊ฐ์‚ฌ๋ฅผ ๋“œ๋ฆฝ๋‹ˆ๋‹ค.

๋˜ํ•œ issues.reversed ๋ฅผ ๋ฐ˜๋ณตํ•˜์—ฌ ์˜ˆ์ƒํ•œ 1869 ๊ฒฐ๊ณผ์˜ ๋์—์„œ ์‹œ์ž‘๋˜๋Š”์ง€ ํ™•์ธํ–ˆ์Šต๋‹ˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์ด ๊ฒฝ์šฐ ๊ฒฐ๊ณผ์˜ ์ฒซ ๋ฒˆ์งธ ํŽ˜์ด์ง€์—์„œ 30๊ฐœ์˜ ๋ฌธ์ œ๋งŒ ์–ป์Šต๋‹ˆ๋‹ค.

์ถ”๊ฐ€ ์กฐ์‚ฌ์—์„œ ๊ฒ€์ƒ‰๋‹น ๊ฒฐ๊ณผ 1000๊ฐœ ์ œํ•œ ์— ๋„๋‹ฌํ•œ ๊ฒƒ์œผ๋กœ ๋ณด์ž…๋‹ˆ๋‹ค.

๊ธฐ์กด get_rate_limit() ๊ฐ€ Github์—์„œ ์ œ์•ˆํ•œ ์ตœ์‹  "์ฝ”์–ด" ์†๋„ ์ œํ•œ์„ ๊ตฌ๋ฌธ ๋ถ„์„ํ•˜๋Š” ๋™์•ˆ ๊ฒ€์ƒ‰ ์†๋„ ์ œํ•œ์— ๋Œ€ํ•ด get_search_rate_limit() ๋ฐฉ๋ฒ•์„ ํ•˜๋‚˜ ๋” ์ œ๊ณตํ•˜๋Š” ๊ฒƒ์€ ์–ด๋–ป์Šต๋‹ˆ๊นŒ: https://developer.github.com/ v3/rate_limit/

๊ฒ€์ƒ‰ API ์†๋„ ์ œํ•œ ๋ฐ GraphQL ์†๋„ ์ œํ•œ์„ ์ง€๊ธˆ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋ชจ๋‘๋ฅผ ์œ„ํ•œ ํ•˜๋‚˜์˜ ๋ฐฉ๋ฒ•์ž…๋‹ˆ๋‹ค.

๊ธฐ๋ณธ์ ์œผ๋กœ "์ฝ”์–ด" ๋น„์œจ ์ œํ•œ์ด ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ๊ฐ ์†์„ฑ์— ์•ก์„ธ์Šคํ•˜์—ฌ search/graphql ์†๋„ ์ œํ•œ์„ ์–ป์„ ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค.

r = g.get_rate_limit()
>>> r
RateLimit(core=Rate(remaining=4923, limit=5000))
>>> r.search
Rate(remaining=30, limit=30)
>>> r.graphql
Rate(remaining=5000, limit=5000)

๋ฉ‹์ง€๋„ค์š”, ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค @sfdye!

๊ฒ€์ƒ‰ ์†๋„ ์ œํ•œ ๋ฌธ์ œ๋ฅผ ํ”ผํ•˜๊ธฐ ์œ„ํ•ด @brentshermana ์˜ ๋Œ€๊ธฐ ๊ธฐ๋Šฅ์„ ์—๋ฎฌ๋ ˆ์ดํŠธํ•˜๋ ค๋ฉด ์ด์ œ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

from datetime import datetime

def api_wait_search(git):
  limits = git.get_rate_limit()
  if limits.search.remaining <= 2:
    seconds = (limits.search.reset - datetime.now()).total_seconds()
    print "Waiting for %d seconds ..." % (seconds)
    time.sleep(seconds)
    print "Done waiting - resume!"

get_rate_limit() ๋ฅผ ํ˜ธ์ถœํ•˜๋ฉด ์•ฝ๊ฐ„์˜ ์ง€์—ฐ์ด ๋ฐœ์ƒํ•˜๋ฏ€๋กœ ํ˜ธ์ถœ ๋นˆ๋„๋ฅผ ์ตœ์†Œํ™”ํ•˜๋Š” ๊ฒƒ์ด ์ข‹์Šต๋‹ˆ๋‹ค.

๊ฒ€์ƒ‰ ์—”์ง„์—์„œ ์—ฌ๊ธฐ์— ๋„์ฐฉํ•˜๋Š” ์‚ฌ๋žŒ๋“ค์„ ์œ„ํ•ด @bbi-yggy์˜ ๊ธฐ๋Šฅ์„ ์•ฝ๊ฐ„ ์ˆ˜์ •ํ–ˆ์Šต๋‹ˆ๋‹ค.

from datetime import datetime, timezone

def rate_limited_retry(github):
    def decorator(func):
        def ret(*args, **kwargs):
            for _ in range(3):
                try:
                    return func(*args, **kwargs)
                except RateLimitExceededException:
                    limits = github.get_rate_limit()
                    reset = limits.search.reset.replace(tzinfo=timezone.utc)
                    now = datetime.now(timezone.utc)
                    seconds = (reset - now).total_seconds()
                    print(f"Rate limit exceeded")
                    print(f"Reset is in {seconds:.3g} seconds.")
                    if seconds > 0.0:
                        print(f"Waiting for {seconds:.3g} seconds...")
                        time.sleep(seconds)
                        print("Done waiting - resume!")
            raise Exception("Failed too many times")
        return ret
    return decorator

์ด ๊ธฐ๋Šฅ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

@rate_limited_retry(github)
def run_query(import_string):
    query_string = f"language:Python \"{import_string}\""
    return list(github.search_code(query_string))

results = run_query(import_string)

core/search/graphql์„ ๊ณ ๋ คํ•˜๋„๋ก ์œ„์˜ pokey ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ๋ฅผ ์ˆ˜์ •ํ–ˆ์Šต๋‹ˆ๋‹ค.
๋˜ํ•œ Github์ด ์ง€์ •๋œ ์‹œ๊ฐ„์— ์†๋„ ์ œํ•œ์„ ์ •ํ™•ํ•˜๊ฒŒ ์žฌ์„ค์ •ํ•˜์ง€ ์•Š๊ธฐ ๋•Œ๋ฌธ์— 30์ดˆ ์ง€์—ฐ์ด ์ถ”๊ฐ€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.

def rate_limited_retry():
    def decorator(func):
        def ret(*args, **kwargs):
            for _ in range(3):
                try:
                    return func(*args, **kwargs)
                except RateLimitExceededException:
                    limits = gh.get_rate_limit()
                    print(f"Rate limit exceeded")
                    print("Search:", limits.search, "Core:", limits.core, "GraphQl:", limits.graphql)

                    if limits.search.remaining == 0:
                        limited = limits.search
                    elif limits.graphql.remaining == 0:
                        limited = limits.graphql
                    else:
                        limited = limits.core
                    reset = limited.reset.replace(tzinfo=timezone.utc)
                    now = datetime.now(timezone.utc)
                    seconds = (reset - now).total_seconds() + 30
                    print(f"Reset is in {seconds} seconds.")
                    if seconds > 0.0:
                        print(f"Waiting for {seconds} seconds...")
                        time.sleep(seconds)
                        print("Done waiting - resume!")
            raise Exception("Failed too many times")
        return ret
    return decorator

์ด ํŽ˜์ด์ง€๊ฐ€ ๋„์›€์ด ๋˜์—ˆ๋‚˜์š”?
0 / 5 - 0 ๋“ฑ๊ธ‰