New strategy to select underperformance on BeatLeader songs.

This commit is contained in:
Brian Lee 2024-11-23 13:21:40 -08:00
parent 9c48760f42
commit 41090ca480
3 changed files with 300 additions and 15 deletions

View File

@ -32,3 +32,15 @@ from helpers.BeatSaverAPI import BeatSaverAPI
beatsaver_api = BeatSaverAPI()
map_data = beatsaver_api.get_maps(year=2024, month=9)
```
## SimpleBeatLeaderAPI
```python
from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
player_id = "76561199407393962"
beatleader_api = SimpleBeatLeaderAPI()
data = beatleader_api.get_player_accgraph(player_id)
data[0]
filtered_data = [{'acc': item['acc'], 'stars': item['stars'], 'hash': item['hash']} for item in data]
filtered_data[0]
```

View File

@ -176,4 +176,46 @@ class SimpleBeatLeaderAPI:
return leaderboard_data.get('data', [])
except requests.exceptions.RequestException as e:
logging.error(f"Error fetching leaderboard for hash {hash}, diff {diff}, mode {mode}: {e}")
return None
def get_player_accgraph(self, player_id, use_cache=True, context="general", include_unranked=False, type="acc"):
"""
Retrieve graph data for a specific player.
:param player_id: ID of the player
:param use_cache: Whether to use cached data if available (default: True)
:param context: Leaderboard context, eg. nopause, nomods, golf, scpm (default: "general")
:param include_unranked: Whether to include unranked maps (default: False)
:param type: Type of graph data to retrieve: 'acc', 'graph', or 'rank' (default: "acc")
:return: List containing graph data points
"""
cache_file = os.path.join(self.CACHE_DIR, f"player_{player_id}_{type}graph_{context}.json")
if use_cache and self._is_cache_valid(cache_file):
logging.debug(f"Using cached {type} graph data for player {player_id}")
with open(cache_file, 'r') as f:
return json.load(f)
logging.debug(f"Fetching fresh {type} graph data for player {player_id}")
url = f"{self.BASE_URL}/player/{player_id}/accgraph"
params = {
"leaderboardContext": context,
"type": type,
"no_unranked_stars": not include_unranked
}
try:
response = self.session.get(url, params=params)
response.raise_for_status()
graph_data = response.json()
# Cache the results
with open(cache_file, 'w') as f:
json.dump(graph_data, f)
sleep(1)
return graph_data
except requests.exceptions.RequestException as e:
logging.error(f"Error fetching acc graph for player {player_id}: {e}")
return None

View File

@ -1,14 +1,15 @@
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from helpers.BeatSaverAPI import BeatSaverAPI
from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
from statistics import mean
from statistics import mean, median
from typing import Dict, Any, List
import argparse
import json
import logging
import os
import sys
import math
from dotenv import load_dotenv
load_dotenv()
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
@ -39,7 +40,7 @@ def load_history() -> Dict[str, Any]:
history = json.load(f)
history.setdefault('playlist_counts', {})
return history
return {'highest_accuracy': {}, 'playlist_counts': {}}
return {'scoresaver': {}, 'playlist_counts': {}}
def save_history(history: Dict[str, Any]) -> None:
"""
@ -114,6 +115,139 @@ def infer_beatleader_leaderboard_id(song_id: str, difficulty: str) -> str:
return f"{song_id}{difficulty_map[difficulty]}1"
"""
def playlist_strategy_beatleader_accuracy_gaps(
api: SimpleBeatLeaderAPI,
song_count: int = 40,
bin_size: float = 0.25,
bin_sort: bool = False
) -> List[Dict[str, Any]]:
"""
Build a playlist of songs where the player's accuracy is furthest below the median accuracy
for their star rating range. Songs are grouped into bins by star rating to ensure fair comparison.
:param api: SimpleBeatLeaderAPI instance for making API calls
:param song_count: Number of songs to include in the playlist
:param bin_size: Size of star rating bins for grouping similar difficulty songs
:param bin_sort: Whether to sort the bins by star rating
:return: A tuple containing (list of song dictionaries, playlist title string)
"""
player_id = prompt_for_player_id()
history = load_history()
history.setdefault('beatleader_accgraph_diff', {})
history.setdefault('playlist_counts', {})
# Get the current count and increment it
count_key = 'beatleader_accgraph_diff'
current_count = history['playlist_counts'].get(count_key, 0)
new_count = current_count + 1
history['playlist_counts'][count_key] = new_count
# Fetch accuracy graph data
all_scores = api.get_player_accgraph(player_id)
if not all_scores:
logging.warning(f"No accgraph data found for player ID {player_id} on BeatLeader.")
return [], ""
logging.debug(f"Found {len(all_scores)} accgraph entries for player ID {player_id} on BeatLeader.")
# Collect all star ratings
star_ratings = [entry['stars'] for entry in all_scores if entry.get('stars') is not None]
if not star_ratings:
logging.warning("No star ratings found in accgraph data.")
return [], ""
min_stars = min(star_ratings)
max_stars = max(star_ratings)
star_range = max_stars - min_stars
# Remove the bin size calculation logic
num_bins = math.ceil(star_range / bin_size)
logging.info(f"Using bin size: {bin_size}, resulting in {num_bins} bins.")
# Group accuracies by bins
bin_to_accuracies = defaultdict(list)
for entry in all_scores:
stars = entry.get('stars')
acc = entry.get('acc')
if stars is not None and acc is not None:
bin_index = int((stars - min_stars) / bin_size)
bin_to_accuracies[bin_index].append(acc)
# Calculate median accuracy for each bin
bin_to_median = {}
for bin_index, accs in bin_to_accuracies.items():
bin_to_median[bin_index] = median(accs)
bin_start = min_stars + bin_index * bin_size
bin_end = bin_start + bin_size
logging.debug(f"Median accuracy for bin {bin_index} (stars {bin_start:.2f} to {bin_end:.2f}): {bin_to_median[bin_index]:.4f}")
# Compute difference from median for each score
for entry in all_scores:
stars = entry.get('stars')
acc = entry.get('acc')
if stars is not None and acc is not None:
bin_index = int((stars - min_stars) / bin_size)
median_acc = bin_to_median.get(bin_index)
if median_acc is not None:
entry['diff_from_median'] = acc - median_acc
else:
entry['diff_from_median'] = float('inf') # Place entries with missing data at the end
else:
entry['diff_from_median'] = float('inf') # Place entries with missing data at the end
# Sort scores by difference from median (ascending: most below median first)
all_scores.sort(key=lambda x: x.get('diff_from_median', float('inf')))
playlist_data = []
for score_entry in all_scores:
if len(playlist_data) >= song_count:
break
acc = score_entry.get('acc', 0)
stars = score_entry.get('stars')
song_hash = score_entry.get('hash')
if not song_hash or stars is None:
logging.debug(f"Skipping entry due to missing hash or stars: {score_entry}")
continue
# Use stars as a proxy for difficulty; adjust if you have actual difficulty levels
difficulty = f"Stars_{stars:.2f}"
# Avoid reusing the same song+difficulty
if song_hash in history['beatleader_accgraph_diff'] and difficulty in history['beatleader_accgraph_diff'][song_hash]:
logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
continue
song_dict = {
'hash': song_hash,
'difficulties': [
{
'name': difficulty,
'characteristic': 'Standard' # Adjust if different characteristics are needed
}
]
}
playlist_data.append(song_dict)
logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, "
f"Accuracy={acc*100:.2f}%, Diff from Median={score_entry['diff_from_median']*100:.2f}%")
# Update history
history['beatleader_accgraph_diff'].setdefault(song_hash, []).append(difficulty)
if not playlist_data:
logging.info("No new songs found to add to the playlist based on history for BeatLeader accgraph difference.")
else:
for song in playlist_data:
song_hash = song['hash']
difficulty = song['difficulties'][0]['name']
logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
logging.info(f"Total songs added to playlist from BeatLeader accgraph difference: {len(playlist_data)}")
save_history(history)
return playlist_data, f"beatleader_accgraph_diff-{new_count:02d}"
def playlist_strategy_scoresaber_oldscores(
api: ScoreSaberAPI,
song_count: int = 20
@ -548,7 +682,7 @@ def map_leaders_by_month(month: int = 9, year: int = 2024, game_modes: List[str]
logging.info(f"Collected leaderboards for {len(collected_data)} map+difficulty combinations, orderable by average accuracy of top ten plays for {month}/{year}.")
return collected_data
def playlist_strategy_highest_accuracy(
def playlist_strategy_scoresaver_acc(
song_count: int = 40
) -> List[Dict[str, Any]]:
"""
@ -560,7 +694,7 @@ def playlist_strategy_highest_accuracy(
:return: A list of dictionaries containing song information for the playlist.
"""
history = load_history()
history.setdefault('highest_accuracy', {})
history.setdefault('scoresaver', {})
history.setdefault('playlist_counts', {})
# Get last month's date
@ -585,7 +719,7 @@ def playlist_strategy_highest_accuracy(
print("Invalid input. Please enter numbers only.")
# Get the current count for highest accuracy and increment it
count_key = f"highest_accuracy-{year}-{month:02d}"
count_key = f"scoresaver-{year}-{month:02d}"
current_count = history['playlist_counts'].get(count_key, 0)
new_count = current_count + 1
history['playlist_counts'][count_key] = new_count
@ -611,7 +745,7 @@ def playlist_strategy_highest_accuracy(
song_hash = entry['hash']
# Check history to avoid reusing any map, regardless of difficulty
if song_hash in history['highest_accuracy']:
if song_hash in history['scoresaver']:
logging.debug(f"Skipping song {song_hash} as it's in history.")
continue
@ -626,7 +760,7 @@ def playlist_strategy_highest_accuracy(
logging.info(f"Song added: {entry['map_name']} ({difficulty}) - Average Accuracy: {entry['average_accuracy'] * 100:.2f}%")
# Update history (now we're just adding the song hash, not the difficulty)
history['highest_accuracy'][song_hash] = True
history['scoresaver'][song_hash] = True
# Log if no songs were added
if not playlist_data:
@ -636,7 +770,7 @@ def playlist_strategy_highest_accuracy(
save_history(history)
return playlist_data, f"highest_accuracy-{year}-{month:02d}-{new_count:02d}"
return playlist_data, f"scoresaver-{year}-{month:02d}-{new_count:02d}"
def reset_history(strategy: str) -> None:
"""
@ -657,7 +791,15 @@ def reset_history(strategy: str) -> None:
def get_strategy():
parser = argparse.ArgumentParser(description="Generate Beat Saber playlists")
parser.add_argument("-s", "--strategy",
choices=["scoresaber_oldscores", "beatleader_oldscores", "highest_accuracy", "beatleader_lowest_pp", "scoresaber_lowest_pp"],
choices=[
"scoresaber_oldscores",
"beatleader_oldscores",
"scoresaver_acc",
# "beatleader_lowest_pp",
# "scoresaber_lowest_pp",
# "beatleader_lowest_acc",
"beatleader_accuracy_gaps"
],
help="Specify the playlist generation strategy")
parser.add_argument("-r", "--reset",
action="store_true",
@ -680,24 +822,113 @@ def get_strategy():
return args.strategy
def playlist_strategy_beatleader_lowest_acc(
api: BeatLeaderAPI,
song_count: int = 20
) -> List[Dict[str, Any]]:
player_id = prompt_for_player_id()
history = load_history()
history.setdefault('beatleader_lowest_acc', {})
history.setdefault('playlist_counts', {})
"""Selects songs with the lowest accuracy, avoiding reusing the same song+difficulty."""
# Get the current count and increment it
count_key = 'beatleader_lowest_acc'
current_count = history['playlist_counts'].get(count_key, 0)
new_count = current_count + 1
history['playlist_counts'][count_key] = new_count
scores_data = api.get_player_scores(player_id)
all_scores = scores_data.get('playerScores', [])
if not all_scores:
logging.warning(f"No scores found for player ID {player_id} on BeatLeader.")
return [], ""
logging.debug(f"Found {len(all_scores)} scores for player ID {player_id} on BeatLeader.")
# Sort by accuracy in ascending order (lowest first)
all_scores.sort(key=lambda x: x.get('score', {}).get('accuracy', float('inf')))
playlist_data = []
for score_entry in all_scores:
if len(playlist_data) >= song_count:
break
score = score_entry.get('score', {})
leaderboard = score_entry.get('leaderboard', {})
song_hash = leaderboard.get('songHash')
difficulty_raw = int(leaderboard.get('difficulty', ''))
game_mode = leaderboard.get('modeName', 'Standard')
accuracy = score.get('accuracy', 0)
if not song_hash or not difficulty_raw:
logging.debug(f"Skipping score due to missing song_hash or difficulty_raw: {score_entry}")
continue
difficulty = normalize_difficulty_name(difficulty_raw)
# avoid reusing song+difficulty
if song_hash in history['beatleader_lowest_acc'] and difficulty in history['beatleader_lowest_acc'][song_hash]:
logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
continue
song_dict = {
'hash': song_hash,
'difficulties': [
{
'name': difficulty,
'characteristic': game_mode
}
]
}
playlist_data.append(song_dict)
logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, Accuracy={accuracy*100:.2f}%")
# Update history
history['beatleader_lowest_acc'].setdefault(song_hash, []).append(difficulty)
if not playlist_data:
logging.info("No new songs found to add to the playlist based on history for BeatLeader lowest accuracy.")
else:
for song in playlist_data:
song_hash = song['hash']
difficulty = song['difficulties'][0]['name']
logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
logging.info(f"Total songs added to playlist from BeatLeader lowest accuracy: {len(playlist_data)}")
save_history(history)
return playlist_data, f"beatleader_lowest_acc-{new_count:02d}"
def saberlist() -> None:
"""
Generate a playlist of songs from a range of difficulties, all with scores previously set a long time ago.
The range of difficulties ensures that the first few songs are good for warming up.
Generate a playlist of songs using a specified strategy.
Avoids reusing the same song+difficulty in a playlist based on history.
"""
strategy = get_strategy()
if strategy == 'scoresaber_oldscores':
playlist_data, playlist_title = playlist_strategy_scoresaber_oldscores(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
playlist_builder = PlaylistBuilder()
elif strategy == 'beatleader_oldscores':
playlist_data, playlist_title = playlist_strategy_beatleader_oldscores(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
elif strategy == 'highest_accuracy':
playlist_data, playlist_title = playlist_strategy_highest_accuracy()
playlist_builder = PlaylistBuilder()
elif strategy == 'scoresaver_acc':
playlist_data, playlist_title = playlist_strategy_scoresaver_acc()
playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresavers')
elif strategy == 'beatleader_lowest_pp':
playlist_data, playlist_title = playlist_strategy_beatleader_lowest_pp(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
playlist_builder = PlaylistBuilder(covers_dir='./covers/beatleader')
elif strategy == 'scoresaber_lowest_pp':
playlist_data, playlist_title = playlist_strategy_scoresaber_lowest_pp(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresaber')
elif strategy == 'beatleader_lowest_acc':
playlist_data, playlist_title = playlist_strategy_beatleader_lowest_acc(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
playlist_builder = PlaylistBuilder(covers_dir='./covers/kaiju')
elif strategy == 'beatleader_accuracy_gaps':
playlist_data, playlist_title = playlist_strategy_beatleader_accuracy_gaps(SimpleBeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
playlist_builder = PlaylistBuilder(covers_dir='./covers/pajamas')
else:
logging.error(f"Unknown strategy '{strategy}'")
return
@ -706,7 +937,7 @@ def saberlist() -> None:
logging.info("No new scores found to add to the playlist.")
return
PlaylistBuilder().create_playlist(
playlist_builder.create_playlist(
playlist_data,
playlist_title=playlist_title,
playlist_author="SaberList Tool"