Improved bot json parsing

This commit is contained in:
Dev 2024-09-14 20:45:27 +01:00
parent 2a2bc4f0c7
commit dd62a3b8af
2 changed files with 87 additions and 81 deletions

View File

@ -7,71 +7,85 @@ using System.IO;
using System.Linq;
using System.Text.Json;
using System.Threading.Tasks;
using Common.Models.Output;
namespace Common.Bots;
public static class BotParser
{
static readonly JsonSerializerOptions serialiserOptions = new() { };
private static readonly JsonSerializerOptions serialiserOptions = new() { };
public static List<Datum> ParseAsync(string dumpPath, HashSet<string> botTypes)
public static async Task<List<Datum>> ParseAsync(string dumpPath, HashSet<string> botTypes)
{
var stopwatch = Stopwatch.StartNew();
DiskHelpers.CreateDirIfDoesntExist(dumpPath);
var botFiles = Directory.GetFiles(dumpPath, "*.json", SearchOption.TopDirectoryOnly).ToList();
LoggingHelpers.LogToConsole($"{botFiles.Count} bot dump files found");
var botFiles = Directory.GetFiles(dumpPath, "*.json", SearchOption.TopDirectoryOnly);
LoggingHelpers.LogToConsole($"{botFiles.Length} bot dump files found");
var parsedBotsDict = new Dictionary<string, Datum>();
var dictionaryLock = new object();
// key = bot type
// Store bots keyed against their ID so we never get duplicates
var parsedBotsDict = new ConcurrentDictionary<string, Datum>();
int totalDupeCount = 0;
var tasks = new List<Task>();
foreach (var filePath in botFiles)
{
tasks.Add(ProcessBotFile(botTypes, filePath, parsedBotsDict, totalDupeCount));
}
var tasks = new List<Task>(45);
foreach (var file in botFiles)
await Task.WhenAll(tasks.ToArray());
stopwatch.Stop();
LoggingHelpers.LogToConsole($"Cleaned and Parsed: {parsedBotsDict.Count} bots. {totalDupeCount} dupes were ignored. Took {LoggingHelpers.LogTimeTaken(stopwatch.Elapsed.TotalSeconds)} seconds");
return [.. parsedBotsDict.Values];
}
private static async Task<int> ProcessBotFile(
HashSet<string> botTypes,
string filePath,
ConcurrentDictionary<string, Datum> parsedBotsDict,
int totalDupeCount)
{
tasks.Add(Task.Factory.StartNew(() =>
{
var splitFilePath = file.Split("\\");
var splitFilePath = filePath.Split("\\");
int dupeCount = 0;
var rawInputString = File.ReadAllText(file);
List<Datum> bots = null;
List<Datum> bots = [];
try
{
bots = ParseJson(rawInputString).ToList();
// Parse the bots inside the json file
using (var reader = new StreamReader(filePath))
{
var deSerialisedObject = JsonSerializer.Deserialize<Root>(reader.ReadToEnd(), serialiserOptions);
bots.AddRange(deSerialisedObject.data.Where(botData => botTypes.Contains(botData.Info.Settings.Role.ToLower())));
}
}
catch (Exception ex)
{
Console.WriteLine($"File parse fucked up: {file}");
Console.WriteLine($"File parse fucked up: {filePath}");
throw;
}
if (bots == null || bots.Count == 0)
{
Console.WriteLine($"Skipping file: {splitFilePath.Last()}. no bots found, ");
return;
return totalDupeCount;
}
//Console.WriteLine($"parsing: {bots.Count} bots in file {splitFilePath.Last()}");
foreach (var bot in bots)
{
// I have no idea
// Bot fucks up something, never allow it in
if (bot._id == "6483938c53cc9087c70eae86")
{
Console.WriteLine("oh no");
}
// We dont know how to parse this bot type, need to add it to types enum
if (!botTypes.Contains(bot.Info.Settings.Role.ToLower()))
{
continue;
}
lock (dictionaryLock)
{
// null out unnecessary data to save ram
bot.Stats = null;
bot.Encyclopedia = null;
bot.Hideout = null;
@ -79,30 +93,28 @@ public static class BotParser
bot.Bonuses = null;
bot.InsuredItems = null;
// Bot already exists in dictionary, skip
// Add bot if not already added
if (parsedBotsDict.TryAdd(bot._id, bot))
{
// Success - Null out data we don't need for generating bots to save RAM
// Success
// Null out more data to save ram
bot.Inventory.items.RemoveAll(x => x.parentId == null);
}
else
{
//var existingBot = parsedBotsDict.FirstOrDefault(x => x._id == bot._id);
dupeCount++;
continue;
}
}
}
totalDupeCount += dupeCount;
}));
Console.WriteLine($"Parsed file: {filePath}");
return totalDupeCount;
}
Task.WaitAll(tasks.ToArray());
stopwatch.Stop();
LoggingHelpers.LogToConsole($"Cleaned and Parsed: {parsedBotsDict.Count} bots. {totalDupeCount} dupes were ignored. Took {LoggingHelpers.LogTimeTaken(stopwatch.Elapsed.TotalSeconds)} seconds");
return [.. parsedBotsDict.Values];
private static IEnumerable<Datum> ParseJson(string json)
{
var deSerialisedObject = JsonSerializer.Deserialize<Root>(json, serialiserOptions);
return deSerialisedObject.data;
}
private static string PruneMalformedBsgJson(string json, string fileName)
@ -128,10 +140,4 @@ public static class BotParser
return returnString;
}
private static IEnumerable<Datum> ParseJson(string json)
{
var deSerialisedObject = JsonSerializer.Deserialize<Root>(json, serialiserOptions);
return deSerialisedObject.data;
}
}

View File

@ -66,7 +66,7 @@ internal static class Program
// Read raw bot dumps and turn into c# objects
var workingPath = Directory.GetCurrentDirectory();
var dumpPath = $"{workingPath}//dumps";
var parsedBots = BotParser.ParseAsync(dumpPath, botTypes.ToHashSet());
var parsedBots = await BotParser.ParseAsync(dumpPath, botTypes.ToHashSet());
// Put in dictionary for better use later on
var rawBotsCache = new Dictionary<string, List<Datum>>(45);