beatsaber-playlist-tool/src/saberlist/playlist_strategies/accuracy.py

from collections import defaultdict
from statistics import median
from typing import Dict, Any, List
import logging
import os
import math
from dotenv import load_dotenv
load_dotenv()
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()

import logging
logging.basicConfig(
    format='%(asctime)s %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=LOG_LEVEL
)

from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
from helpers.BeatLeaderAPI import BeatLeaderAPI

from saberlist.utils import prompt_for_player_id, load_history, save_history, normalize_difficulty_name


from helpers.ScoreSaberAPI import ScoreSaberAPI
from clients.scoresaber.models.get_api_player_player_id_scores_sort import GetApiPlayerPlayerIdScoresSort

"""Testing
api = ScoreSaberAPI()
song_count = 40
bin_size = 0.25
bin_sort = False
"""
def playlist_strategy_scoresaber_accuracy_gaps(
    api: ScoreSaberAPI,
    song_count: int = 40,
    bin_size: float = 0.25,
    bin_sort: bool = False
) -> List[Dict[str, Any]]:
    """
    Build a playlist of songs where the player's accuracy is furthest below the median accuracy
    for their star rating range. Songs are grouped into bins by star rating to ensure fair comparison.

    :param api: ScoreSaberAPI instance for making API calls
    :param song_count: Number of songs to include in the playlist
    :param bin_size: Size of star rating bins for grouping similar difficulty songs
    :param bin_sort: Whether to sort the bins by star rating
    :return: A tuple containing (list of song dictionaries, playlist title string)
    """
    player_id = prompt_for_player_id()
    history = load_history()
    history.setdefault('scoresaber_accuracy_gaps', {})
    history.setdefault('playlist_counts', {})

    # Get the current count and increment it
    count_key = 'scoresaber_accuracy_gaps'
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count

    # Fetch player scores
    scores_data = api.get_player_scores(
        player_id=player_id,
        use_cache=True,
        limit=100, # per page
        sort=GetApiPlayerPlayerIdScoresSort.RECENT
    )
    ranked_scores = [score for score in scores_data.get('playerScores', [])
                     if score.get('leaderboard', {}).get('stars', 0) != 0]

    if not ranked_scores:
        logging.warning(f"No ranked scores found for player ID {player_id} on ScoreSaber.")
        return [], ""
    logging.debug(f"Found {len(ranked_scores)} ranked scores for player ID {player_id} on ScoreSaber.")

    # Get min and max star ratings
    min_stars = min(score['leaderboard']['stars'] for score in ranked_scores)
    max_stars = max(score['leaderboard']['stars'] for score in ranked_scores)
    star_range = max_stars - min_stars

    # Determine number of bins
    num_bins = math.ceil(star_range / bin_size)
    logging.info(f"Using bin size: {bin_size}, resulting in {num_bins} bins.")

    # Group accuracies by bins
    bin_to_accuracies = defaultdict(list)
    for score in ranked_scores:
        # Calculate accuracy
        try:
            modified_score = score['score']['modifiedScore']
            max_score = score['leaderboard']['maxScore']
            accuracy = modified_score / max_score if max_score else 0
            score['accuracy'] = accuracy
        except Exception as e:
            logging.error(f"Error calculating accuracy for score {score}: {e}")
            continue

        stars = score['leaderboard'].get('stars')
        if stars is not None and accuracy is not None:
            bin_index = int((stars - min_stars) / bin_size)
            bin_to_accuracies[bin_index].append(accuracy)

    # Calculate median accuracy for each bin
    bin_to_median = {}
    for bin_index, accuracies in bin_to_accuracies.items():
        bin_to_median[bin_index] = median(accuracies)
        bin_start = min_stars + bin_index * bin_size
        bin_end = bin_start + bin_size
        logging.debug(f"Median accuracy for bin {bin_index} (stars {bin_start:.2f} to {bin_end:.2f}): {bin_to_median[bin_index]:.4f}")

    # Compute difference from median for each score
    for score in ranked_scores:
        stars = score['leaderboard'].get('stars')
        accuracy = score.get('accuracy')
        if stars is not None and accuracy is not None:
            bin_index = int((stars - min_stars) / bin_size)
            median_acc = bin_to_median.get(bin_index)
            if median_acc is not None:
                score['diff_from_median'] = accuracy - median_acc
            else:
                score['diff_from_median'] = float('inf')  # Place entries with missing data at the end
        else:
            score['diff_from_median'] = float('inf')  # Place entries with missing data at the end

    # Sort scores by difference from median (ascending: most below median first)
    ranked_scores.sort(key=lambda x: x.get('diff_from_median', float('inf')))

    playlist_data = []
    for score in ranked_scores:
        if len(playlist_data) >= song_count:
            break

        accuracy = score['score'].get('accuracy', 0)
        stars = score['leaderboard'].get('stars')
        song_hash = score['leaderboard'].get('songHash')

        if not song_hash or stars is None:
            logging.debug(f"Skipping score due to missing hash or stars: {score}")
            continue

        difficulty_raw = score['leaderboard']['difficulty'].get('difficultyRaw', '')
        game_mode = score['leaderboard']['difficulty'].get('gameMode', 'Standard')
        game_mode = game_mode.replace('Solo', '')  # Remove prefix 'Solo' from the game mode
        difficulty = normalize_difficulty_name(difficulty_raw)

        # Avoid reusing song+difficulty
        if song_hash in history['scoresaber_accuracy_gaps'] and difficulty in history['scoresaber_accuracy_gaps'][song_hash]:
            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
            continue

        song_dict = {
            'hash': song_hash,
            'difficulties': [
                {
                    'name': difficulty,
                    'characteristic': game_mode
                }
            ]
        }

        playlist_data.append(song_dict)
        song_name = score['leaderboard']['songName']
        song_artist = score['leaderboard']['songAuthorName']
        logging.debug(f"Selected song for playlist: Name={song_name}, Artist={song_artist}, "
                      f"Accuracy={accuracy*100:.2f}%, Diff from Median={score['diff_from_median']*100:.2f}%")

        # Update history
        history['scoresaber_accuracy_gaps'].setdefault(song_hash, []).append(difficulty)

    if not playlist_data:
        logging.info("No new songs found to add to the playlist based on history for ScoreSaber accuracy gaps.")
    else:
        for song in playlist_data:
            song_hash = song['hash']
            difficulty = song['difficulties'][0]['name']
            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
        logging.info(f"Total songs added to playlist from ScoreSaber accuracy gaps: {len(playlist_data)}")

    save_history(history)
    playlist_title = f"scoresaber_accgraph-{new_count:02d}"

    return playlist_data, playlist_title

def playlist_strategy_beatleader_accuracy_gaps(
    api: SimpleBeatLeaderAPI,
    song_count: int = 40,
    bin_size: float = 0.25,
    bin_sort: bool = False
) -> List[Dict[str, Any]]:
    """
    Build a playlist of songs where the player's accuracy is furthest below the median accuracy
    for their star rating range. Songs are grouped into bins by star rating to ensure fair comparison.

    :param api: SimpleBeatLeaderAPI instance for making API calls
    :param song_count: Number of songs to include in the playlist
    :param bin_size: Size of star rating bins for grouping similar difficulty songs
    :param bin_sort: Whether to sort the bins by star rating
    :return: A tuple containing (list of song dictionaries, playlist title string)
    """
    player_id = prompt_for_player_id()
    history = load_history()
    history.setdefault('beatleader_accuracy_gaps', {})
    history.setdefault('playlist_counts', {})

    # Get the current count and increment it
    count_key = 'beatleader_accuracy_gaps'
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count

    # Fetch accuracy graph data
    all_scores = api.get_player_accgraph(player_id)
    if not all_scores:
        logging.warning(f"No accgraph data found for player ID {player_id} on BeatLeader.")
        return [], ""
    logging.debug(f"Found {len(all_scores)} accgraph entries for player ID {player_id} on BeatLeader.")

    # Collect all star ratings
    star_ratings = [entry['stars'] for entry in all_scores if entry.get('stars') is not None]
    if not star_ratings:
        logging.warning("No star ratings found in accgraph data.")
        return [], ""
    min_stars = min(star_ratings)
    max_stars = max(star_ratings)
    star_range = max_stars - min_stars

    # Remove the bin size calculation logic
    num_bins = math.ceil(star_range / bin_size)
    logging.info(f"Using bin size: {bin_size}, resulting in {num_bins} bins.")

    # Group accuracies by bins
    bin_to_accuracies = defaultdict(list)
    for entry in all_scores:
        stars = entry.get('stars')
        acc = entry.get('acc')
        if stars is not None and acc is not None:
            bin_index = int((stars - min_stars) / bin_size)
            bin_to_accuracies[bin_index].append(acc)

    # Calculate median accuracy for each bin
    bin_to_median = {}
    for bin_index, accs in bin_to_accuracies.items():
        bin_to_median[bin_index] = median(accs)
        bin_start = min_stars + bin_index * bin_size
        bin_end = bin_start + bin_size
        logging.debug(f"Median accuracy for bin {bin_index} (stars {bin_start:.2f} to {bin_end:.2f}): {bin_to_median[bin_index]:.4f}")

    # Compute difference from median for each score
    for entry in all_scores:
        stars = entry.get('stars')
        acc = entry.get('acc')
        if stars is not None and acc is not None:
            bin_index = int((stars - min_stars) / bin_size)
            median_acc = bin_to_median.get(bin_index)
            if median_acc is not None:
                entry['diff_from_median'] = acc - median_acc
            else:
                entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end
        else:
            entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end

    # Sort scores by difference from median (ascending: most below median first)
    all_scores.sort(key=lambda x: x.get('diff_from_median', float('inf')))

    playlist_data = []
    for score_entry in all_scores:
        if len(playlist_data) >= song_count:
            break

        acc = score_entry.get('acc', 0)
        stars = score_entry.get('stars')
        song_hash = score_entry.get('hash')

        if not song_hash or stars is None:
            logging.debug(f"Skipping entry due to missing hash or stars: {score_entry}")
            continue

        # Use stars as a proxy for difficulty; adjust if you have actual difficulty levels
        difficulty = score_entry.get('diff', '')
        difficulty_characteristic = score_entry.get('mode', 'Standard')

        if song_hash in history['beatleader_accuracy_gaps'] and difficulty in history['beatleader_accuracy_gaps'][song_hash]:
            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
            continue

        song_dict = {
            'hash': song_hash,
            'difficulties': [
                {
                    'name': difficulty,
                    'characteristic': difficulty_characteristic
                }
            ]
        }

        playlist_data.append(song_dict)
        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, "
                      f"Accuracy={acc*100:.2f}%, Diff from Median={score_entry['diff_from_median']*100:.2f}%")

        # Update history
        history['beatleader_accuracy_gaps'].setdefault(song_hash, []).append(difficulty)

    if not playlist_data:
        logging.info("No new songs found to add to the playlist based on history for BeatLeader accuracy gaps.")
    else:
        for song in playlist_data:
            song_hash = song['hash']
            difficulty = song['difficulties'][0]['name']
            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
        logging.info(f"Total songs added to playlist from BeatLeader accuracy gaps: {len(playlist_data)}")

    save_history(history)
    playlist_title = f"accgraph-{new_count:02d}"

    return playlist_data, playlist_title

def playlist_strategy_beatleader_lowest_acc(
    api: BeatLeaderAPI,
    song_count: int = 20
) -> List[Dict[str, Any]]:
    player_id = prompt_for_player_id()
    history = load_history()
    history.setdefault('beatleader_lowest_acc', {})
    history.setdefault('playlist_counts', {})
    """Selects songs with the lowest accuracy, avoiding reusing the same song+difficulty."""

    # Get the current count and increment it
    count_key = 'beatleader_lowest_acc'
    current_count = history['playlist_counts'].get(count_key, 0)
    new_count = current_count + 1
    history['playlist_counts'][count_key] = new_count

    scores_data = api.get_player_scores(player_id)
    all_scores = scores_data.get('playerScores', [])
    if not all_scores:
        logging.warning(f"No scores found for player ID {player_id} on BeatLeader.")
        return [], ""
    logging.debug(f"Found {len(all_scores)} scores for player ID {player_id} on BeatLeader.")

    # Sort by accuracy in ascending order (lowest first)
    all_scores.sort(key=lambda x: x.get('score', {}).get('accuracy', float('inf')))

    playlist_data = []
    for score_entry in all_scores:
        if len(playlist_data) >= song_count:
            break

        score = score_entry.get('score', {})
        leaderboard = score_entry.get('leaderboard', {})

        song_hash = leaderboard.get('songHash')
        difficulty_raw = int(leaderboard.get('difficulty', ''))
        game_mode = leaderboard.get('modeName', 'Standard')
        accuracy = score.get('accuracy', 0)

        if not song_hash or not difficulty_raw:
            logging.debug(f"Skipping score due to missing song_hash or difficulty_raw: {score_entry}")
            continue

        difficulty = normalize_difficulty_name(difficulty_raw)

        # avoid reusing song+difficulty
        if song_hash in history['beatleader_lowest_acc'] and difficulty in history['beatleader_lowest_acc'][song_hash]:
            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
            continue

        song_dict = {
            'hash': song_hash,
            'difficulties': [
                {
                    'name': difficulty,
                    'characteristic': game_mode
                }
            ]
        }

        playlist_data.append(song_dict)
        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, Accuracy={accuracy*100:.2f}%")

        # Update history
        history['beatleader_lowest_acc'].setdefault(song_hash, []).append(difficulty)

    if not playlist_data:
        logging.info("No new songs found to add to the playlist based on history for BeatLeader lowest accuracy.")
    else:
        for song in playlist_data:
            song_hash = song['hash']
            difficulty = song['difficulties'][0]['name']
            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
        logging.info(f"Total songs added to playlist from BeatLeader lowest accuracy: {len(playlist_data)}")

    save_history(history)

    return playlist_data, f"beatleader_lowest_acc-{new_count:02d}"