Improved bot json parsing

This commit is contained in:
Dev 2024-09-14 20:45:27 +01:00
parent 2a2bc4f0c7
commit dd62a3b8af
2 changed files with 87 additions and 81 deletions

View File

@ -7,71 +7,85 @@ using System.IO;
using System.Linq; using System.Linq;
using System.Text.Json; using System.Text.Json;
using System.Threading.Tasks; using System.Threading.Tasks;
using Common.Models.Output;
namespace Common.Bots; namespace Common.Bots;
public static class BotParser public static class BotParser
{ {
static readonly JsonSerializerOptions serialiserOptions = new() { }; private static readonly JsonSerializerOptions serialiserOptions = new() { };
public static List<Datum> ParseAsync(string dumpPath, HashSet<string> botTypes) public static async Task<List<Datum>> ParseAsync(string dumpPath, HashSet<string> botTypes)
{ {
var stopwatch = Stopwatch.StartNew(); var stopwatch = Stopwatch.StartNew();
DiskHelpers.CreateDirIfDoesntExist(dumpPath); DiskHelpers.CreateDirIfDoesntExist(dumpPath);
var botFiles = Directory.GetFiles(dumpPath, "*.json", SearchOption.TopDirectoryOnly).ToList(); var botFiles = Directory.GetFiles(dumpPath, "*.json", SearchOption.TopDirectoryOnly);
LoggingHelpers.LogToConsole($"{botFiles.Count} bot dump files found"); LoggingHelpers.LogToConsole($"{botFiles.Length} bot dump files found");
var parsedBotsDict = new Dictionary<string, Datum>(); // key = bot type
var dictionaryLock = new object(); // Store bots keyed against their ID so we never get duplicates
var parsedBotsDict = new ConcurrentDictionary<string, Datum>();
int totalDupeCount = 0; int totalDupeCount = 0;
var tasks = new List<Task>();
foreach (var filePath in botFiles)
{
tasks.Add(ProcessBotFile(botTypes, filePath, parsedBotsDict, totalDupeCount));
}
var tasks = new List<Task>(45); await Task.WhenAll(tasks.ToArray());
foreach (var file in botFiles) stopwatch.Stop();
LoggingHelpers.LogToConsole($"Cleaned and Parsed: {parsedBotsDict.Count} bots. {totalDupeCount} dupes were ignored. Took {LoggingHelpers.LogTimeTaken(stopwatch.Elapsed.TotalSeconds)} seconds");
return [.. parsedBotsDict.Values];
}
private static async Task<int> ProcessBotFile(
HashSet<string> botTypes,
string filePath,
ConcurrentDictionary<string, Datum> parsedBotsDict,
int totalDupeCount)
{ {
tasks.Add(Task.Factory.StartNew(() => var splitFilePath = filePath.Split("\\");
{
var splitFilePath = file.Split("\\");
int dupeCount = 0; int dupeCount = 0;
var rawInputString = File.ReadAllText(file);
List<Datum> bots = null; List<Datum> bots = [];
try try
{ {
bots = ParseJson(rawInputString).ToList(); // Parse the bots inside the json file
using (var reader = new StreamReader(filePath))
{
var deSerialisedObject = JsonSerializer.Deserialize<Root>(reader.ReadToEnd(), serialiserOptions);
bots.AddRange(deSerialisedObject.data.Where(botData => botTypes.Contains(botData.Info.Settings.Role.ToLower())));
}
} }
catch (Exception ex) catch (Exception ex)
{ {
Console.WriteLine($"File parse fucked up: {file}"); Console.WriteLine($"File parse fucked up: {filePath}");
throw; throw;
} }
if (bots == null || bots.Count == 0) if (bots == null || bots.Count == 0)
{ {
Console.WriteLine($"Skipping file: {splitFilePath.Last()}. no bots found, "); Console.WriteLine($"Skipping file: {splitFilePath.Last()}. no bots found, ");
return; return totalDupeCount;
} }
//Console.WriteLine($"parsing: {bots.Count} bots in file {splitFilePath.Last()}"); //Console.WriteLine($"parsing: {bots.Count} bots in file {splitFilePath.Last()}");
foreach (var bot in bots) foreach (var bot in bots)
{ {
// I have no idea // Bot fucks up something, never allow it in
if (bot._id == "6483938c53cc9087c70eae86") if (bot._id == "6483938c53cc9087c70eae86")
{ {
Console.WriteLine("oh no"); Console.WriteLine("oh no");
}
// We dont know how to parse this bot type, need to add it to types enum
if (!botTypes.Contains(bot.Info.Settings.Role.ToLower()))
{
continue; continue;
} }
lock (dictionaryLock) // null out unnecessary data to save ram
{
bot.Stats = null; bot.Stats = null;
bot.Encyclopedia = null; bot.Encyclopedia = null;
bot.Hideout = null; bot.Hideout = null;
@ -79,30 +93,28 @@ public static class BotParser
bot.Bonuses = null; bot.Bonuses = null;
bot.InsuredItems = null; bot.InsuredItems = null;
// Bot already exists in dictionary, skip // Add bot if not already added
if (parsedBotsDict.TryAdd(bot._id, bot)) if (parsedBotsDict.TryAdd(bot._id, bot))
{ {
// Success - Null out data we don't need for generating bots to save RAM // Success
// Null out more data to save ram
bot.Inventory.items.RemoveAll(x => x.parentId == null);
} }
else else
{ {
//var existingBot = parsedBotsDict.FirstOrDefault(x => x._id == bot._id);
dupeCount++; dupeCount++;
continue;
}
} }
} }
totalDupeCount += dupeCount; totalDupeCount += dupeCount;
})); Console.WriteLine($"Parsed file: {filePath}");
return totalDupeCount;
} }
Task.WaitAll(tasks.ToArray()); private static IEnumerable<Datum> ParseJson(string json)
stopwatch.Stop(); {
var deSerialisedObject = JsonSerializer.Deserialize<Root>(json, serialiserOptions);
LoggingHelpers.LogToConsole($"Cleaned and Parsed: {parsedBotsDict.Count} bots. {totalDupeCount} dupes were ignored. Took {LoggingHelpers.LogTimeTaken(stopwatch.Elapsed.TotalSeconds)} seconds"); return deSerialisedObject.data;
return [.. parsedBotsDict.Values];
} }
private static string PruneMalformedBsgJson(string json, string fileName) private static string PruneMalformedBsgJson(string json, string fileName)
@ -128,10 +140,4 @@ public static class BotParser
return returnString; return returnString;
} }
private static IEnumerable<Datum> ParseJson(string json)
{
var deSerialisedObject = JsonSerializer.Deserialize<Root>(json, serialiserOptions);
return deSerialisedObject.data;
}
} }

View File

@ -66,7 +66,7 @@ internal static class Program
// Read raw bot dumps and turn into c# objects // Read raw bot dumps and turn into c# objects
var workingPath = Directory.GetCurrentDirectory(); var workingPath = Directory.GetCurrentDirectory();
var dumpPath = $"{workingPath}//dumps"; var dumpPath = $"{workingPath}//dumps";
var parsedBots = BotParser.ParseAsync(dumpPath, botTypes.ToHashSet()); var parsedBots = await BotParser.ParseAsync(dumpPath, botTypes.ToHashSet());
// Put in dictionary for better use later on // Put in dictionary for better use later on
var rawBotsCache = new Dictionary<string, List<Datum>>(45); var rawBotsCache = new Dictionary<string, List<Datum>>(45);