-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: always select fallback if available #422
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,60 +38,87 @@ def _get_installation_weight(installation: GithubAppInstallation) -> int: | |
return MAX_GITHUB_APP_SELECTION_WEIGHT | ||
seconds_in_hour = 3600 | ||
age_hours = (age.seconds // seconds_in_hour) + age.days * 24 | ||
return age_hours + 2**age.days | ||
# Prevent clock differences from making the weight negative | ||
return max(1, age_hours + 2**age.days) | ||
|
||
|
||
def _can_use_this_app( | ||
app: GithubAppInstallation, installation_name: str, repository: Optional[Repository] | ||
) -> bool: | ||
return ( | ||
app.name == installation_name | ||
# We ignore apps that are not configured because those can't be used | ||
and app.is_configured() | ||
and ( | ||
# If there is a repo we want only the apps that cover said repo | ||
(repository and app.is_repo_covered_by_integration(repository)) | ||
# If there is no repo we still need some true value | ||
or (not repository) | ||
) | ||
) | ||
|
||
|
||
def _get_apps_from_weighted_selection( | ||
owner: Owner, installation_name: str, repository: Optional[Repository] | ||
) -> List[GithubAppInstallation]: | ||
ghapp_installations_filter: List[GithubAppInstallation] = list( | ||
filter( | ||
lambda obj: ( | ||
obj.name == installation_name | ||
and obj.is_configured() | ||
and ( | ||
# If there is a repo we want only the apps that cover said repo | ||
(repository and obj.is_repo_covered_by_integration(repository)) | ||
# If there is no repo we still need some true value | ||
or (not repository) | ||
) | ||
), | ||
"""This function returns an ordered list of GithubAppInstallations that can be used to communicate with GitHub | ||
in behalf of the owner. The list is ordered in such a way that the 1st element is the app to be used in Torngit, | ||
and the subsequent apps are selected as fallbacks. | ||
|
||
IF the repository is provided, the selected apps also cover the repo. | ||
IF installation_name is not the default one, than the default codecov installation | ||
is also selected as a possible fallback app. | ||
|
||
Apps are selected randomly but assigned weights based on how recently they were created. | ||
This means that older apps are selected more frequently as the main app than newer ones. | ||
(up to 10 days, when the probability of being chosen is the same) | ||
The random selection is done so we can distribute request load more evenly among apps. | ||
""" | ||
# Map GithubAppInstallation.id --> GithubAppInstallation | ||
ghapp_installations_filter: Dict[int, GithubAppInstallation] = { | ||
obj.id: obj | ||
for obj in filter( | ||
lambda obj: _can_use_this_app(obj, installation_name, repository), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is much more readable now! Thanks! |
||
owner.github_app_installations or [], | ||
) | ||
) | ||
} | ||
# We assign weights to the apps based on how long ago they were created. | ||
# The idea is that there's a greater chance that a change misconfigured the app, | ||
# So apps recently created are selected less frequently than older apps | ||
keys = list(ghapp_installations_filter.keys()) | ||
weights = [ | ||
min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj)) | ||
for obj in ghapp_installations_filter | ||
min( | ||
MAX_GITHUB_APP_SELECTION_WEIGHT, | ||
_get_installation_weight(ghapp_installations_filter[key]), | ||
) | ||
for key in keys | ||
] | ||
# Random selection of size 3. | ||
# If all apps have roughly the same probability of being selected, the array would have different entries. | ||
# If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT | ||
# from time to time the less frequent one would be selected. | ||
apps_to_consider = ( | ||
random.choices(ghapp_installations_filter, weights=weights, k=3) | ||
if len(ghapp_installations_filter) > 0 | ||
else [] | ||
) | ||
# We pick apps one by one until all apps have been selected | ||
# Obviously apps with a higher weight have a higher change of being selected as the main app (1st selection) | ||
# But it's important that others are also selected so we can use them as fallbacks | ||
apps_to_consider = [] | ||
apps_to_select = len(keys) | ||
selections = 0 | ||
while selections < apps_to_select: | ||
selected_app_id = random.choices(keys, weights, k=1)[0] | ||
apps_to_consider.append(ghapp_installations_filter[selected_app_id]) | ||
# random.choices chooses with replacement | ||
# which we are trying to avoid here. So we remove the key selected and its weight from the population. | ||
key_idx = keys.index(selected_app_id) | ||
keys.pop(key_idx) | ||
weights.pop(key_idx) | ||
selections += 1 | ||
if installation_name != GITHUB_APP_INSTALLATION_DEFAULT_NAME: | ||
# Add the default app as the last fallback if the owner is using a different app for the task | ||
default_apps = filter( | ||
lambda obj: obj.name == GITHUB_APP_INSTALLATION_DEFAULT_NAME, | ||
lambda obj: _can_use_this_app( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just double checking that this is OK. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually noticed when refactoring that the repo verification was missing from this filter. I think it is necessary to have that too, because if the repo is not covered we can't use the app. Of course, considering the default installation, we expect all repos (that Codecov has access to) to be covered, but it's better for the logic to be consistent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. About the |
||
obj, GITHUB_APP_INSTALLATION_DEFAULT_NAME, repository | ||
), | ||
owner.github_app_installations, | ||
) | ||
if default_apps: | ||
apps_to_consider.extend(default_apps) | ||
# Now we de-duplicate the apps_to_consider list before returning | ||
seen_ids = dict() | ||
list_to_return = [] | ||
for app in apps_to_consider: | ||
if seen_ids.get(app.id, False): | ||
continue | ||
seen_ids[app.id] = True | ||
list_to_return.append(app) | ||
return list_to_return | ||
return apps_to_consider | ||
|
||
|
||
def get_owner_installation_id( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be useful to add some function documentation around what it's returning because it looks like we're significantly changing the functionality. If I understand the code correctly, we're trying to get an ordered list of apps that's random based on the weighting (so that we don't always use the highest weighted one). The return is no longer just a "random subset of the apps" but an ordered list of apps (with some randomness).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is true... good callout here.