New strategy to select underperformance on BeatLeader songs.

2024-11-23 13:21:40 -08:00 · 2024-11-23 13:21:40 -08:00 · 41090ca480
commit 41090ca480
parent 9c48760f42
3 changed files with 300 additions and 15 deletions
--- a/docs/ClientWrapperUsage.md
+++ b/docs/ClientWrapperUsage.md
@ -32,3 +32,15 @@ from helpers.BeatSaverAPI import BeatSaverAPI
 beatsaver_api = BeatSaverAPI()
 map_data = beatsaver_api.get_maps(year=2024, month=9)
 ```
 ## SimpleBeatLeaderAPI
 ```python
 from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
 player_id = "76561199407393962"
 beatleader_api = SimpleBeatLeaderAPI()
 data = beatleader_api.get_player_accgraph(player_id)
 data[0]
 filtered_data = [{'acc': item['acc'], 'stars': item['stars'], 'hash': item['hash']} for item in data]
 filtered_data[0]
 ```
--- a/src/helpers/SimpleBeatLeaderAPI.py
+++ b/src/helpers/SimpleBeatLeaderAPI.py
@ -177,3 +177,45 @@ class SimpleBeatLeaderAPI:
        except requests.exceptions.RequestException as e:
            logging.error(f"Error fetching leaderboard for hash {hash}, diff {diff}, mode {mode}: {e}")
            return None
    def get_player_accgraph(self, player_id, use_cache=True, context="general", include_unranked=False, type="acc"):
        """
        Retrieve graph data for a specific player.
        :param player_id: ID of the player
        :param use_cache: Whether to use cached data if available (default: True)
        :param context: Leaderboard context, eg. nopause, nomods, golf, scpm (default: "general")
        :param include_unranked: Whether to include unranked maps (default: False)
        :param type: Type of graph data to retrieve: 'acc', 'graph', or 'rank' (default: "acc")
        :return: List containing graph data points
        """
        cache_file = os.path.join(self.CACHE_DIR, f"player_{player_id}_{type}graph_{context}.json")
        if use_cache and self._is_cache_valid(cache_file):
            logging.debug(f"Using cached {type} graph data for player {player_id}")
            with open(cache_file, 'r') as f:
                return json.load(f)
        logging.debug(f"Fetching fresh {type} graph data for player {player_id}")
        url = f"{self.BASE_URL}/player/{player_id}/accgraph"
        params = {
            "leaderboardContext": context,
            "type": type,
            "no_unranked_stars": not include_unranked
        }
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            graph_data = response.json()
            # Cache the results
            with open(cache_file, 'w') as f:
                json.dump(graph_data, f)
            sleep(1)
            return graph_data
        except requests.exceptions.RequestException as e:
            logging.error(f"Error fetching acc graph for player {player_id}: {e}")
            return None
--- a/src/saberlist/make.py
+++ b/src/saberlist/make.py
@ -1,14 +1,15 @@
 from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 from helpers.BeatSaverAPI import BeatSaverAPI
 from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
-from statistics import mean
+from statistics import mean, median
 from typing import Dict, Any, List
 import argparse
 import json
 import logging
 import os
 import sys
-
+import math
 from dotenv import load_dotenv
 load_dotenv()
 LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
@ -39,7 +40,7 @@ def load_history() -> Dict[str, Any]:
            history = json.load(f)
            history.setdefault('playlist_counts', {})
            return history
-    return {'highest_accuracy': {}, 'playlist_counts': {}}
+    return {'scoresaver': {}, 'playlist_counts': {}}
 def save_history(history: Dict[str, Any]) -> None:
    """
@ -114,6 +115,139 @@ def infer_beatleader_leaderboard_id(song_id: str, difficulty: str) -> str:
    return f"{song_id}{difficulty_map[difficulty]}1"
 """
 def playlist_strategy_beatleader_accuracy_gaps(
    api: SimpleBeatLeaderAPI,
    song_count: int = 40,
    bin_size: float = 0.25,
    bin_sort: bool = False
 ) -> List[Dict[str, Any]]:
    """
    Build a playlist of songs where the player's accuracy is furthest below the median accuracy
    for their star rating range. Songs are grouped into bins by star rating to ensure fair comparison.
    :param api: SimpleBeatLeaderAPI instance for making API calls
    :param song_count: Number of songs to include in the playlist
    :param bin_size: Size of star rating bins for grouping similar difficulty songs
    :param bin_sort: Whether to sort the bins by star rating
    :return: A tuple containing (list of song dictionaries, playlist title string)
    """
    player_id = prompt_for_player_id()
    history = load_history()
    history.setdefault('beatleader_accgraph_diff', {})
    history.setdefault('playlist_counts', {})
    # Get the current count and increment it
    count_key = 'beatleader_accgraph_diff'
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count
    # Fetch accuracy graph data
    all_scores = api.get_player_accgraph(player_id)
    if not all_scores:
        logging.warning(f"No accgraph data found for player ID {player_id} on BeatLeader.")
        return [], ""
    logging.debug(f"Found {len(all_scores)} accgraph entries for player ID {player_id} on BeatLeader.")
    # Collect all star ratings
    star_ratings = [entry['stars'] for entry in all_scores if entry.get('stars') is not None]
    if not star_ratings:
        logging.warning("No star ratings found in accgraph data.")
        return [], ""
    min_stars = min(star_ratings)
    max_stars = max(star_ratings)
    star_range = max_stars - min_stars
    # Remove the bin size calculation logic
    num_bins = math.ceil(star_range / bin_size)
    logging.info(f"Using bin size: {bin_size}, resulting in {num_bins} bins.")
    # Group accuracies by bins
    bin_to_accuracies = defaultdict(list)
    for entry in all_scores:
        stars = entry.get('stars')
        acc = entry.get('acc')
        if stars is not None and acc is not None:
            bin_index = int((stars - min_stars) / bin_size)
            bin_to_accuracies[bin_index].append(acc)
    # Calculate median accuracy for each bin
    bin_to_median = {}
    for bin_index, accs in bin_to_accuracies.items():
        bin_to_median[bin_index] = median(accs)
        bin_start = min_stars + bin_index * bin_size
        bin_end = bin_start + bin_size
        logging.debug(f"Median accuracy for bin {bin_index} (stars {bin_start:.2f} to {bin_end:.2f}): {bin_to_median[bin_index]:.4f}")
    # Compute difference from median for each score
    for entry in all_scores:
        stars = entry.get('stars')
        acc = entry.get('acc')
        if stars is not None and acc is not None:
            bin_index = int((stars - min_stars) / bin_size)
            median_acc = bin_to_median.get(bin_index)
            if median_acc is not None:
                entry['diff_from_median'] = acc - median_acc
            else:
                entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end
        else:
            entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end
    # Sort scores by difference from median (ascending: most below median first)
    all_scores.sort(key=lambda x: x.get('diff_from_median', float('inf')))
    playlist_data = []
    for score_entry in all_scores:
        if len(playlist_data) >= song_count:
            break
        acc = score_entry.get('acc', 0)
        stars = score_entry.get('stars')
        song_hash = score_entry.get('hash')
        if not song_hash or stars is None:
            logging.debug(f"Skipping entry due to missing hash or stars: {score_entry}")
            continue
        # Use stars as a proxy for difficulty; adjust if you have actual difficulty levels
        difficulty = f"Stars_{stars:.2f}"
        # Avoid reusing the same song+difficulty
        if song_hash in history['beatleader_accgraph_diff'] and difficulty in history['beatleader_accgraph_diff'][song_hash]:
            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
            continue
        song_dict = {
            'hash': song_hash,
            'difficulties': [
                {
                    'name': difficulty,
                    'characteristic': 'Standard'  # Adjust if different characteristics are needed
                }
            ]
        }
        playlist_data.append(song_dict)
        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, "
                      f"Accuracy={acc*100:.2f}%, Diff from Median={score_entry['diff_from_median']*100:.2f}%")
        # Update history
        history['beatleader_accgraph_diff'].setdefault(song_hash, []).append(difficulty)
    if not playlist_data:
        logging.info("No new songs found to add to the playlist based on history for BeatLeader accgraph difference.")
    else:
        for song in playlist_data:
            song_hash = song['hash']
            difficulty = song['difficulties'][0]['name']
            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
        logging.info(f"Total songs added to playlist from BeatLeader accgraph difference: {len(playlist_data)}")
    save_history(history)
    return playlist_data, f"beatleader_accgraph_diff-{new_count:02d}"
 def playlist_strategy_scoresaber_oldscores(
    api: ScoreSaberAPI, 
    song_count: int = 20
@ -548,7 +682,7 @@ def map_leaders_by_month(month: int = 9, year: int = 2024, game_modes: List[str]
    logging.info(f"Collected leaderboards for {len(collected_data)} map+difficulty combinations, orderable by average accuracy of top ten plays for {month}/{year}.")
    return collected_data
-def playlist_strategy_highest_accuracy(
+def playlist_strategy_scoresaver_acc(
    song_count: int = 40
 ) -> List[Dict[str, Any]]:
    """
@ -560,7 +694,7 @@ def playlist_strategy_highest_accuracy(
    :return: A list of dictionaries containing song information for the playlist.
    """
    history = load_history()
-    history.setdefault('highest_accuracy', {})
+    history.setdefault('scoresaver', {})
    history.setdefault('playlist_counts', {})
    # Get last month's date
@ -585,7 +719,7 @@ def playlist_strategy_highest_accuracy(
            print("Invalid input. Please enter numbers only.")
    # Get the current count for highest accuracy and increment it
-    count_key = f"highest_accuracy-{year}-{month:02d}"
+    count_key = f"scoresaver-{year}-{month:02d}"
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count
@ -611,7 +745,7 @@ def playlist_strategy_highest_accuracy(
        song_hash = entry['hash']
        # Check history to avoid reusing any map, regardless of difficulty
-        if song_hash in history['highest_accuracy']:
+        if song_hash in history['scoresaver']:
            logging.debug(f"Skipping song {song_hash} as it's in history.")
            continue
@ -626,7 +760,7 @@ def playlist_strategy_highest_accuracy(
        logging.info(f"Song added: {entry['map_name']} ({difficulty}) - Average Accuracy: {entry['average_accuracy'] * 100:.2f}%")
        # Update history (now we're just adding the song hash, not the difficulty)
-        history['highest_accuracy'][song_hash] = True
+        history['scoresaver'][song_hash] = True
    # Log if no songs were added
    if not playlist_data:
@ -636,7 +770,7 @@ def playlist_strategy_highest_accuracy(
    save_history(history)
-    return playlist_data, f"highest_accuracy-{year}-{month:02d}-{new_count:02d}"
+    return playlist_data, f"scoresaver-{year}-{month:02d}-{new_count:02d}"
 def reset_history(strategy: str) -> None:
    """
@ -657,7 +791,15 @@ def reset_history(strategy: str) -> None:
 def get_strategy():
    parser = argparse.ArgumentParser(description="Generate Beat Saber playlists")
    parser.add_argument("-s", "--strategy", 
-                        choices=["scoresaber_oldscores", "beatleader_oldscores", "highest_accuracy", "beatleader_lowest_pp", "scoresaber_lowest_pp"],
+                        choices=[
                            "scoresaber_oldscores",
                            "beatleader_oldscores",
                            "scoresaver_acc", 
                        #   "beatleader_lowest_pp",
                        #   "scoresaber_lowest_pp",
                        #   "beatleader_lowest_acc",
                            "beatleader_accuracy_gaps"
                        ],
                        help="Specify the playlist generation strategy")
    parser.add_argument("-r", "--reset",
                        action="store_true",
@ -680,24 +822,113 @@ def get_strategy():
    return args.strategy
 def playlist_strategy_beatleader_lowest_acc(
    api: BeatLeaderAPI,
    song_count: int = 20
 ) -> List[Dict[str, Any]]:
    player_id = prompt_for_player_id()
    history = load_history()
    history.setdefault('beatleader_lowest_acc', {})
    history.setdefault('playlist_counts', {})
    """Selects songs with the lowest accuracy, avoiding reusing the same song+difficulty."""
    # Get the current count and increment it
    count_key = 'beatleader_lowest_acc'
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count
    scores_data = api.get_player_scores(player_id)
    all_scores = scores_data.get('playerScores', [])
    if not all_scores:
        logging.warning(f"No scores found for player ID {player_id} on BeatLeader.")
        return [], ""
    logging.debug(f"Found {len(all_scores)} scores for player ID {player_id} on BeatLeader.")
    # Sort by accuracy in ascending order (lowest first)
    all_scores.sort(key=lambda x: x.get('score', {}).get('accuracy', float('inf')))
    playlist_data = []
    for score_entry in all_scores:
        if len(playlist_data) >= song_count:
            break
        score = score_entry.get('score', {})
        leaderboard = score_entry.get('leaderboard', {})
        song_hash = leaderboard.get('songHash')
        difficulty_raw = int(leaderboard.get('difficulty', ''))
        game_mode = leaderboard.get('modeName', 'Standard')
        accuracy = score.get('accuracy', 0)
        if not song_hash or not difficulty_raw:
            logging.debug(f"Skipping score due to missing song_hash or difficulty_raw: {score_entry}")
            continue
        difficulty = normalize_difficulty_name(difficulty_raw)
        # avoid reusing song+difficulty
        if song_hash in history['beatleader_lowest_acc'] and difficulty in history['beatleader_lowest_acc'][song_hash]:
            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
            continue
        song_dict = {
            'hash': song_hash,
            'difficulties': [
                {
                    'name': difficulty,
                    'characteristic': game_mode
                }
            ]
        }
        playlist_data.append(song_dict)
        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, Accuracy={accuracy*100:.2f}%")
        # Update history
        history['beatleader_lowest_acc'].setdefault(song_hash, []).append(difficulty)
    if not playlist_data:
        logging.info("No new songs found to add to the playlist based on history for BeatLeader lowest accuracy.")
    else:
        for song in playlist_data:
            song_hash = song['hash']
            difficulty = song['difficulties'][0]['name']
            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
        logging.info(f"Total songs added to playlist from BeatLeader lowest accuracy: {len(playlist_data)}")
    save_history(history)
    return playlist_data, f"beatleader_lowest_acc-{new_count:02d}"
 def saberlist() -> None:
    """
-    Generate a playlist of songs from a range of difficulties, all with scores previously set a long time ago.
+    Generate a playlist of songs using a specified strategy.
    The range of difficulties ensures that the first few songs are good for warming up.
    Avoids reusing the same song+difficulty in a playlist based on history.
    """
    strategy = get_strategy()
    if strategy == 'scoresaber_oldscores':
        playlist_data, playlist_title = playlist_strategy_scoresaber_oldscores(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
        playlist_builder = PlaylistBuilder()
    elif strategy == 'beatleader_oldscores':
        playlist_data, playlist_title = playlist_strategy_beatleader_oldscores(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
-    elif strategy == 'highest_accuracy':
+        playlist_builder = PlaylistBuilder()
-        playlist_data, playlist_title = playlist_strategy_highest_accuracy()
+    elif strategy == 'scoresaver_acc':
        playlist_data, playlist_title = playlist_strategy_scoresaver_acc()
        playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresavers')
    elif strategy == 'beatleader_lowest_pp':
        playlist_data, playlist_title = playlist_strategy_beatleader_lowest_pp(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
        playlist_builder = PlaylistBuilder(covers_dir='./covers/beatleader')
    elif strategy == 'scoresaber_lowest_pp':
        playlist_data, playlist_title = playlist_strategy_scoresaber_lowest_pp(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
        playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresaber')
    elif strategy == 'beatleader_lowest_acc':
        playlist_data, playlist_title = playlist_strategy_beatleader_lowest_acc(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
        playlist_builder = PlaylistBuilder(covers_dir='./covers/kaiju')
    elif strategy == 'beatleader_accuracy_gaps':
        playlist_data, playlist_title = playlist_strategy_beatleader_accuracy_gaps(SimpleBeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
        playlist_builder = PlaylistBuilder(covers_dir='./covers/pajamas')
    else:
        logging.error(f"Unknown strategy '{strategy}'")
        return
@ -706,7 +937,7 @@ def saberlist() -> None:
        logging.info("No new scores found to add to the playlist.")
        return
-    PlaylistBuilder().create_playlist(
+    playlist_builder.create_playlist(
        playlist_data,
        playlist_title=playlist_title,
        playlist_author="SaberList Tool"