diff --git a/Common.Models/Input/Settings.cs b/Common.Models/Input/Settings.cs index 3e00003..ab98a63 100644 --- a/Common.Models/Input/Settings.cs +++ b/Common.Models/Input/Settings.cs @@ -1,5 +1,4 @@ using Newtonsoft.Json; -using System.Collections.Concurrent; using System.Collections.Generic; using System.Runtime.Serialization; @@ -289,6 +288,24 @@ namespace Common.Models.Input public List InsuredItems { get; set; } public Hideout Hideout { get; set; } public IEnumerable Bonuses { get; set; } + + protected bool Equals(Datum other) + { + return _id == other._id; + } + + public override bool Equals(object obj) + { + if (ReferenceEquals(null, obj)) return false; + if (ReferenceEquals(this, obj)) return true; + if (obj.GetType() != this.GetType()) return false; + return Equals((Datum)obj); + } + + public override int GetHashCode() + { + return (_id != null ? _id.GetHashCode() : 0); + } } public class Root diff --git a/Common/Bot/BotParser.cs b/Common/Bot/BotParser.cs index 744189d..e67617c 100644 --- a/Common/Bot/BotParser.cs +++ b/Common/Bot/BotParser.cs @@ -1,4 +1,5 @@ -using Common.Models.Input; +using System.Collections.Concurrent; +using Common.Models.Input; using Newtonsoft.Json.Linq; using System.Collections.Generic; using System.Diagnostics; @@ -13,7 +14,7 @@ public static class BotParser { static readonly JsonSerializerOptions serialiserOptions = new() { }; - public static async Task> ParseAsync(string dumpPath, string[] botTypes) + public static List ParseAsync(string dumpPath, HashSet botTypes) { var stopwatch = Stopwatch.StartNew(); @@ -22,88 +23,89 @@ public static class BotParser var botFiles = Directory.GetFiles(dumpPath, "*.json", SearchOption.TopDirectoryOnly).ToList(); LoggingHelpers.LogToConsole($"{botFiles.Count} bot dump files found"); - var parsedBotsDict = new Dictionary(10000); + var parsedBotsDict = new HashSet(); + var dictionaryLock = new object(); + int totalDupeCount = 0; - ParallelOptions parallelOptions = new() + var tasks = new List(50); + foreach (var file in botFiles) { - MaxDegreeOfParallelism = 1 - }; - await Parallel.ForEachAsync(botFiles, parallelOptions, async (file, token) => - { - var splitFilePath = file.Split("\\"); - - int dupeCount = 0; - var rawInputString = await ReadFileContentsAsync(file); - - List bots = null; - try + tasks.Add(Task.Factory.StartNew(() => { - bots = ParseJson(rawInputString).ToList(); - } - catch (Exception ex) - { - Console.WriteLine($"File parse fucked up: {file}"); - throw; - } + var splitFilePath = file.Split("\\"); - if (bots == null || bots.Count == 0) - { - Console.WriteLine($"Skipping file: {splitFilePath.Last()}. no bots found, "); - return; - } + int dupeCount = 0; + var rawInputString = File.ReadAllText(file); - //Console.WriteLine($"parsing: {bots.Count} bots in file {splitFilePath.Last()}"); - foreach (var bot in bots) - { - // I have no idea - if (bot._id == "6483938c53cc9087c70eae86") + List bots = null; + try { - Console.WriteLine("oh no"); + bots = ParseJson(rawInputString).ToList(); + } + catch (Exception ex) + { + Console.WriteLine($"File parse fucked up: {file}"); + throw; } - // We dont know how to parse this bot type, need to add it to types enum - if (!botTypes.Contains(bot.Info.Settings.Role.ToLower())) + if (bots == null || bots.Count == 0) { - continue; + Console.WriteLine($"Skipping file: {splitFilePath.Last()}. no bots found, "); + return; } - // Bot already exists in dictionary, skip - if (parsedBotsDict.ContainsKey(bot._id)) + //Console.WriteLine($"parsing: {bots.Count} bots in file {splitFilePath.Last()}"); + foreach (var bot in bots) { - //var existingBot = parsedBotsDict[bot._id]; - dupeCount++; - continue; + // I have no idea + if (bot._id == "6483938c53cc9087c70eae86") + { + Console.WriteLine("oh no"); + } + + // We dont know how to parse this bot type, need to add it to types enum + if (!botTypes.Contains(bot.Info.Settings.Role.ToLower())) + { + continue; + } + + lock (dictionaryLock) + { + // Bot already exists in dictionary, skip + if (parsedBotsDict.Contains(bot)) + { + //var existingBot = parsedBotsDict[bot._id]; + dupeCount++; + continue; + } + + + if (!parsedBotsDict.Contains(bot)) + { + // Null out data we don't need for generating bots to save RAM + bot.Stats = null; + bot.Encyclopedia = null; + bot.Hideout = null; + bot.ConditionCounters = null; + bot.Bonuses = null; + bot.BackendCounters = null; + bot.InsuredItems = null; + parsedBotsDict.Add(bot); + } + } } + totalDupeCount += dupeCount; + })); + } - if (!parsedBotsDict.ContainsKey(bot._id)) - { - // Null out data we don't need for generating bots to save RAM - bot.Stats = null; - bot.Encyclopedia = null; - bot.Hideout = null; - bot.ConditionCounters = null; - bot.Bonuses = null; - bot.BackendCounters = null; - bot.InsuredItems = null; - parsedBotsDict.Add(bot._id, bot); - } - } - - totalDupeCount += dupeCount; - }); - + Task.WaitAll(tasks.ToArray()); stopwatch.Stop(); + LoggingHelpers.LogToConsole($"Cleaned and Parsed: {parsedBotsDict.Count} bots. {totalDupeCount} dupes were ignored. Took {LoggingHelpers.LogTimeTaken(stopwatch.Elapsed.TotalSeconds)} seconds"); - return (parsedBotsDict.Select(x => x.Value)).ToList(); - } - - private static async Task ReadFileContentsAsync(string file) - { - using var reader = File.OpenText(file); - return await reader.ReadToEndAsync(); + return parsedBotsDict.ToList(); } private static string PruneMalformedBsgJson(string json, string fileName) diff --git a/Generator/Program.cs b/Generator/Program.cs index 9bee596..6818514 100644 --- a/Generator/Program.cs +++ b/Generator/Program.cs @@ -50,7 +50,7 @@ internal static class Program // Read raw bot dumps and turn into c# objects var workingPath = Directory.GetCurrentDirectory(); var dumpPath = $"{workingPath}//dumps"; - var parsedBots = await BotParser.ParseAsync(dumpPath, botTypes); + var parsedBots = BotParser.ParseAsync(dumpPath, botTypes.ToHashSet()); if (parsedBots.Count == 0) {