From 41090ca480a6763b0d01f597daecd0dc9e32af00 Mon Sep 17 00:00:00 2001
From: Brian Lee <blee@satstack.cloud>
Date: Sat, 23 Nov 2024 13:21:40 -0800
Subject: [PATCH] New strategy to select underperformance on BeatLeader songs.

---
 docs/ClientWrapperUsage.md         |  12 ++
 src/helpers/SimpleBeatLeaderAPI.py |  42 +++++
 src/saberlist/make.py              | 261 +++++++++++++++++++++++++++--
 3 files changed, 300 insertions(+), 15 deletions(-)

diff --git a/docs/ClientWrapperUsage.md b/docs/ClientWrapperUsage.md
index b49b1d6..ed21fa4 100644
--- a/docs/ClientWrapperUsage.md
+++ b/docs/ClientWrapperUsage.md
@@ -32,3 +32,15 @@ from helpers.BeatSaverAPI import BeatSaverAPI
 beatsaver_api = BeatSaverAPI()
 map_data = beatsaver_api.get_maps(year=2024, month=9)
 ```
+
+## SimpleBeatLeaderAPI
+
+```python
+from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
+player_id = "76561199407393962"
+beatleader_api = SimpleBeatLeaderAPI()
+data = beatleader_api.get_player_accgraph(player_id)
+data[0]
+filtered_data = [{'acc': item['acc'], 'stars': item['stars'], 'hash': item['hash']} for item in data]
+filtered_data[0]
+```
diff --git a/src/helpers/SimpleBeatLeaderAPI.py b/src/helpers/SimpleBeatLeaderAPI.py
index 49770cc..75c75f0 100644
--- a/src/helpers/SimpleBeatLeaderAPI.py
+++ b/src/helpers/SimpleBeatLeaderAPI.py
@@ -176,4 +176,46 @@ class SimpleBeatLeaderAPI:
             return leaderboard_data.get('data', [])
         except requests.exceptions.RequestException as e:
             logging.error(f"Error fetching leaderboard for hash {hash}, diff {diff}, mode {mode}: {e}")
+            return None
+
+    def get_player_accgraph(self, player_id, use_cache=True, context="general", include_unranked=False, type="acc"):
+        """
+        Retrieve graph data for a specific player.
+
+        :param player_id: ID of the player
+        :param use_cache: Whether to use cached data if available (default: True)
+        :param context: Leaderboard context, eg. nopause, nomods, golf, scpm (default: "general")
+        :param include_unranked: Whether to include unranked maps (default: False)
+        :param type: Type of graph data to retrieve: 'acc', 'graph', or 'rank' (default: "acc")
+        :return: List containing graph data points
+        """
+        cache_file = os.path.join(self.CACHE_DIR, f"player_{player_id}_{type}graph_{context}.json")
+
+        if use_cache and self._is_cache_valid(cache_file):
+            logging.debug(f"Using cached {type} graph data for player {player_id}")
+            with open(cache_file, 'r') as f:
+                return json.load(f)
+
+        logging.debug(f"Fetching fresh {type} graph data for player {player_id}")
+        url = f"{self.BASE_URL}/player/{player_id}/accgraph"
+        
+        params = {
+            "leaderboardContext": context,
+            "type": type,
+            "no_unranked_stars": not include_unranked
+        }
+
+        try:
+            response = self.session.get(url, params=params)
+            response.raise_for_status()
+            graph_data = response.json()
+            
+            # Cache the results
+            with open(cache_file, 'w') as f:
+                json.dump(graph_data, f)
+            
+            sleep(1)
+            return graph_data
+        except requests.exceptions.RequestException as e:
+            logging.error(f"Error fetching acc graph for player {player_id}: {e}")
             return None
\ No newline at end of file
diff --git a/src/saberlist/make.py b/src/saberlist/make.py
index 4954255..cf01f5d 100644
--- a/src/saberlist/make.py
+++ b/src/saberlist/make.py
@@ -1,14 +1,15 @@
+from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 from helpers.BeatSaverAPI import BeatSaverAPI
 from helpers.SimpleBeatLeaderAPI import SimpleBeatLeaderAPI
-from statistics import mean
+from statistics import mean, median
 from typing import Dict, Any, List
 import argparse
 import json
 import logging
 import os
 import sys
-
+import math
 from dotenv import load_dotenv
 load_dotenv()
 LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper()
@@ -39,7 +40,7 @@ def load_history() -> Dict[str, Any]:
             history = json.load(f)
             history.setdefault('playlist_counts', {})
             return history
-    return {'highest_accuracy': {}, 'playlist_counts': {}}
+    return {'scoresaver': {}, 'playlist_counts': {}}
 
 def save_history(history: Dict[str, Any]) -> None:
     """
@@ -114,6 +115,139 @@ def infer_beatleader_leaderboard_id(song_id: str, difficulty: str) -> str:
     return f"{song_id}{difficulty_map[difficulty]}1"
 """
 
+
+def playlist_strategy_beatleader_accuracy_gaps(
+    api: SimpleBeatLeaderAPI,
+    song_count: int = 40,
+    bin_size: float = 0.25,
+    bin_sort: bool = False
+) -> List[Dict[str, Any]]:
+    """
+    Build a playlist of songs where the player's accuracy is furthest below the median accuracy
+    for their star rating range. Songs are grouped into bins by star rating to ensure fair comparison.
+    
+    :param api: SimpleBeatLeaderAPI instance for making API calls
+    :param song_count: Number of songs to include in the playlist
+    :param bin_size: Size of star rating bins for grouping similar difficulty songs
+    :param bin_sort: Whether to sort the bins by star rating
+    :return: A tuple containing (list of song dictionaries, playlist title string)
+    """
+    player_id = prompt_for_player_id()
+    history = load_history()
+    history.setdefault('beatleader_accgraph_diff', {})
+    history.setdefault('playlist_counts', {})
+    
+    # Get the current count and increment it
+    count_key = 'beatleader_accgraph_diff'
+    current_count = history['playlist_counts'].get(count_key, 0)
+    new_count = current_count + 1
+    history['playlist_counts'][count_key] = new_count
+    
+    # Fetch accuracy graph data
+    all_scores = api.get_player_accgraph(player_id)
+    if not all_scores:
+        logging.warning(f"No accgraph data found for player ID {player_id} on BeatLeader.")
+        return [], ""
+    logging.debug(f"Found {len(all_scores)} accgraph entries for player ID {player_id} on BeatLeader.")
+    
+    # Collect all star ratings
+    star_ratings = [entry['stars'] for entry in all_scores if entry.get('stars') is not None]
+    if not star_ratings:
+        logging.warning("No star ratings found in accgraph data.")
+        return [], ""
+    min_stars = min(star_ratings)
+    max_stars = max(star_ratings)
+    star_range = max_stars - min_stars
+
+    # Remove the bin size calculation logic
+    num_bins = math.ceil(star_range / bin_size)
+    logging.info(f"Using bin size: {bin_size}, resulting in {num_bins} bins.")
+
+    # Group accuracies by bins
+    bin_to_accuracies = defaultdict(list)
+    for entry in all_scores:
+        stars = entry.get('stars')
+        acc = entry.get('acc')
+        if stars is not None and acc is not None:
+            bin_index = int((stars - min_stars) / bin_size)
+            bin_to_accuracies[bin_index].append(acc)
+
+    # Calculate median accuracy for each bin
+    bin_to_median = {}
+    for bin_index, accs in bin_to_accuracies.items():
+        bin_to_median[bin_index] = median(accs)
+        bin_start = min_stars + bin_index * bin_size
+        bin_end = bin_start + bin_size
+        logging.debug(f"Median accuracy for bin {bin_index} (stars {bin_start:.2f} to {bin_end:.2f}): {bin_to_median[bin_index]:.4f}")
+
+    # Compute difference from median for each score
+    for entry in all_scores:
+        stars = entry.get('stars')
+        acc = entry.get('acc')
+        if stars is not None and acc is not None:
+            bin_index = int((stars - min_stars) / bin_size)
+            median_acc = bin_to_median.get(bin_index)
+            if median_acc is not None:
+                entry['diff_from_median'] = acc - median_acc
+            else:
+                entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end
+        else:
+            entry['diff_from_median'] = float('inf')  # Place entries with missing data at the end
+
+    # Sort scores by difference from median (ascending: most below median first)
+    all_scores.sort(key=lambda x: x.get('diff_from_median', float('inf')))
+
+    playlist_data = []
+    for score_entry in all_scores:
+        if len(playlist_data) >= song_count:
+            break
+
+        acc = score_entry.get('acc', 0)
+        stars = score_entry.get('stars')
+        song_hash = score_entry.get('hash')
+
+        if not song_hash or stars is None:
+            logging.debug(f"Skipping entry due to missing hash or stars: {score_entry}")
+            continue
+
+        # Use stars as a proxy for difficulty; adjust if you have actual difficulty levels
+        difficulty = f"Stars_{stars:.2f}"
+
+        # Avoid reusing the same song+difficulty
+        if song_hash in history['beatleader_accgraph_diff'] and difficulty in history['beatleader_accgraph_diff'][song_hash]:
+            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
+            continue
+
+        song_dict = {
+            'hash': song_hash,
+            'difficulties': [
+                {
+                    'name': difficulty,
+                    'characteristic': 'Standard'  # Adjust if different characteristics are needed
+                }
+            ]
+        }
+
+        playlist_data.append(song_dict)
+        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, "
+                      f"Accuracy={acc*100:.2f}%, Diff from Median={score_entry['diff_from_median']*100:.2f}%")
+
+        # Update history
+        history['beatleader_accgraph_diff'].setdefault(song_hash, []).append(difficulty)
+
+    if not playlist_data:
+        logging.info("No new songs found to add to the playlist based on history for BeatLeader accgraph difference.")
+    else:
+        for song in playlist_data:
+            song_hash = song['hash']
+            difficulty = song['difficulties'][0]['name']
+            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
+        logging.info(f"Total songs added to playlist from BeatLeader accgraph difference: {len(playlist_data)}")
+
+    save_history(history)
+
+    return playlist_data, f"beatleader_accgraph_diff-{new_count:02d}"
+
 def playlist_strategy_scoresaber_oldscores(
     api: ScoreSaberAPI, 
     song_count: int = 20
@@ -548,7 +682,7 @@ def map_leaders_by_month(month: int = 9, year: int = 2024, game_modes: List[str]
     logging.info(f"Collected leaderboards for {len(collected_data)} map+difficulty combinations, orderable by average accuracy of top ten plays for {month}/{year}.")
     return collected_data
 
-def playlist_strategy_highest_accuracy(
+def playlist_strategy_scoresaver_acc(
     song_count: int = 40
 ) -> List[Dict[str, Any]]:
     """
@@ -560,7 +694,7 @@ def playlist_strategy_highest_accuracy(
     :return: A list of dictionaries containing song information for the playlist.
     """
     history = load_history()
-    history.setdefault('highest_accuracy', {})
+    history.setdefault('scoresaver', {})
     history.setdefault('playlist_counts', {})
 
     # Get last month's date
@@ -585,7 +719,7 @@ def playlist_strategy_highest_accuracy(
             print("Invalid input. Please enter numbers only.")
 
     # Get the current count for highest accuracy and increment it
-    count_key = f"highest_accuracy-{year}-{month:02d}"
+    count_key = f"scoresaver-{year}-{month:02d}"
     current_count = history['playlist_counts'].get(count_key, 0)
     new_count = current_count + 1
     history['playlist_counts'][count_key] = new_count
@@ -611,7 +745,7 @@ def playlist_strategy_highest_accuracy(
         song_hash = entry['hash']
 
         # Check history to avoid reusing any map, regardless of difficulty
-        if song_hash in history['highest_accuracy']:
+        if song_hash in history['scoresaver']:
             logging.debug(f"Skipping song {song_hash} as it's in history.")
             continue
 
@@ -626,7 +760,7 @@ def playlist_strategy_highest_accuracy(
         logging.info(f"Song added: {entry['map_name']} ({difficulty}) - Average Accuracy: {entry['average_accuracy'] * 100:.2f}%")
 
         # Update history (now we're just adding the song hash, not the difficulty)
-        history['highest_accuracy'][song_hash] = True
+        history['scoresaver'][song_hash] = True
 
     # Log if no songs were added
     if not playlist_data:
@@ -636,7 +770,7 @@ def playlist_strategy_highest_accuracy(
 
     save_history(history)
 
-    return playlist_data, f"highest_accuracy-{year}-{month:02d}-{new_count:02d}"
+    return playlist_data, f"scoresaver-{year}-{month:02d}-{new_count:02d}"
 
 def reset_history(strategy: str) -> None:
     """
@@ -657,7 +791,15 @@ def reset_history(strategy: str) -> None:
 def get_strategy():
     parser = argparse.ArgumentParser(description="Generate Beat Saber playlists")
     parser.add_argument("-s", "--strategy", 
-                        choices=["scoresaber_oldscores", "beatleader_oldscores", "highest_accuracy", "beatleader_lowest_pp", "scoresaber_lowest_pp"],
+                        choices=[
+                            "scoresaber_oldscores",
+                            "beatleader_oldscores",
+                            "scoresaver_acc", 
+                        #   "beatleader_lowest_pp",
+                        #   "scoresaber_lowest_pp",
+                        #   "beatleader_lowest_acc",
+                            "beatleader_accuracy_gaps"
+                        ],
                         help="Specify the playlist generation strategy")
     parser.add_argument("-r", "--reset",
                         action="store_true",
@@ -680,24 +822,113 @@ def get_strategy():
     
     return args.strategy
 
+def playlist_strategy_beatleader_lowest_acc(
+    api: BeatLeaderAPI,
+    song_count: int = 20
+) -> List[Dict[str, Any]]:
+    player_id = prompt_for_player_id()
+    history = load_history()
+    history.setdefault('beatleader_lowest_acc', {})
+    history.setdefault('playlist_counts', {})
+    """Selects songs with the lowest accuracy, avoiding reusing the same song+difficulty."""
+
+    # Get the current count and increment it
+    count_key = 'beatleader_lowest_acc'
+    current_count = history['playlist_counts'].get(count_key, 0)
+    new_count = current_count + 1
+    history['playlist_counts'][count_key] = new_count
+
+    scores_data = api.get_player_scores(player_id)
+    all_scores = scores_data.get('playerScores', [])
+    if not all_scores:
+        logging.warning(f"No scores found for player ID {player_id} on BeatLeader.")
+        return [], ""
+    logging.debug(f"Found {len(all_scores)} scores for player ID {player_id} on BeatLeader.")
+
+    # Sort by accuracy in ascending order (lowest first)
+    all_scores.sort(key=lambda x: x.get('score', {}).get('accuracy', float('inf')))
+
+    playlist_data = []
+    for score_entry in all_scores:
+        if len(playlist_data) >= song_count:
+            break
+
+        score = score_entry.get('score', {})
+        leaderboard = score_entry.get('leaderboard', {})
+        
+        song_hash = leaderboard.get('songHash')
+        difficulty_raw = int(leaderboard.get('difficulty', ''))
+        game_mode = leaderboard.get('modeName', 'Standard')
+        accuracy = score.get('accuracy', 0)
+
+        if not song_hash or not difficulty_raw:
+            logging.debug(f"Skipping score due to missing song_hash or difficulty_raw: {score_entry}")
+            continue
+
+        difficulty = normalize_difficulty_name(difficulty_raw)
+
+        # avoid reusing song+difficulty
+        if song_hash in history['beatleader_lowest_acc'] and difficulty in history['beatleader_lowest_acc'][song_hash]:
+            logging.debug(f"Skipping song {song_hash} with difficulty {difficulty} as it's in history.")
+            continue
+
+        song_dict = {
+            'hash': song_hash,
+            'difficulties': [
+                {
+                    'name': difficulty,
+                    'characteristic': game_mode
+                }
+            ]
+        }
+
+        playlist_data.append(song_dict)
+        logging.debug(f"Selected song for playlist: Hash={song_hash}, Difficulty={difficulty}, Accuracy={accuracy*100:.2f}%")
+
+        # Update history
+        history['beatleader_lowest_acc'].setdefault(song_hash, []).append(difficulty)
+
+    if not playlist_data:
+        logging.info("No new songs found to add to the playlist based on history for BeatLeader lowest accuracy.")
+    else:
+        for song in playlist_data:
+            song_hash = song['hash']
+            difficulty = song['difficulties'][0]['name']
+            logging.info(f"Song added: Hash={song_hash}, Difficulty={difficulty}")
+        logging.info(f"Total songs added to playlist from BeatLeader lowest accuracy: {len(playlist_data)}")
+
+    save_history(history)
+
+    return playlist_data, f"beatleader_lowest_acc-{new_count:02d}"
+
 def saberlist() -> None:
     """
-    Generate a playlist of songs from a range of difficulties, all with scores previously set a long time ago.
-    The range of difficulties ensures that the first few songs are good for warming up.
+    Generate a playlist of songs using a specified strategy.
     Avoids reusing the same song+difficulty in a playlist based on history.
     """
     strategy = get_strategy()
 
     if strategy == 'scoresaber_oldscores':
         playlist_data, playlist_title = playlist_strategy_scoresaber_oldscores(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
+        playlist_builder = PlaylistBuilder()
     elif strategy == 'beatleader_oldscores':
         playlist_data, playlist_title = playlist_strategy_beatleader_oldscores(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
-    elif strategy == 'highest_accuracy':
-        playlist_data, playlist_title = playlist_strategy_highest_accuracy()
+        playlist_builder = PlaylistBuilder()
+    elif strategy == 'scoresaver_acc':
+        playlist_data, playlist_title = playlist_strategy_scoresaver_acc()
+        playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresavers')
     elif strategy == 'beatleader_lowest_pp':
         playlist_data, playlist_title = playlist_strategy_beatleader_lowest_pp(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
+        playlist_builder = PlaylistBuilder(covers_dir='./covers/beatleader')
     elif strategy == 'scoresaber_lowest_pp':
         playlist_data, playlist_title = playlist_strategy_scoresaber_lowest_pp(ScoreSaberAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
+        playlist_builder = PlaylistBuilder(covers_dir='./covers/scoresaber')
+    elif strategy == 'beatleader_lowest_acc':
+        playlist_data, playlist_title = playlist_strategy_beatleader_lowest_acc(BeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
+        playlist_builder = PlaylistBuilder(covers_dir='./covers/kaiju')
+    elif strategy == 'beatleader_accuracy_gaps':
+        playlist_data, playlist_title = playlist_strategy_beatleader_accuracy_gaps(SimpleBeatLeaderAPI(cache_expiry_days=CACHE_EXPIRY_DAYS))
+        playlist_builder = PlaylistBuilder(covers_dir='./covers/pajamas')
     else:
         logging.error(f"Unknown strategy '{strategy}'")
         return
@@ -706,7 +937,7 @@ def saberlist() -> None:
         logging.info("No new scores found to add to the playlist.")
         return
 
-    PlaylistBuilder().create_playlist(
+    playlist_builder.create_playlist(
         playlist_data,
         playlist_title=playlist_title,
         playlist_author="SaberList Tool"